[GPU] Model caching unit tests (#15413)

* gpu model caching unit tests

* added serialization unit tests

* added save and load for quantize primitive_inst

* reduced the range of inputs for Gemm tests

* updated the copyright year
This commit is contained in:
Eddy Kim 2023-02-22 14:53:43 +09:00 committed by GitHub
parent d464f38788
commit a6ff809ad7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
121 changed files with 8511 additions and 6665 deletions

View File

@ -43,6 +43,9 @@ public:
void setKernlImplParams(void* impl_params) { _impl_params = impl_params; }
void* getKernlImplParams() const { return _impl_params; }
std::streampos tellg() { return stream.tellg(); }
void seekg(std::streampos pos) { stream.seekg(pos); }
private:
std::istream& stream;
void* _impl_params;

View File

@ -0,0 +1,31 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <type_traits>
#include "buffer.hpp"
namespace cldnn {
struct input_info;
template <typename BufferType>
class Serializer<BufferType, input_info, typename std::enable_if<std::is_base_of<OutputBuffer<BufferType>, BufferType>::value>::type> {
public:
static void save(BufferType& buffer, const input_info& input) {
buffer << input.pid;
buffer << input.idx;
}
};
template <typename BufferType>
class Serializer<BufferType, input_info, typename std::enable_if<std::is_base_of<InputBuffer<BufferType>, BufferType>::value>::type> {
public:
static void load(BufferType& buffer, input_info& input) {
buffer >> input.pid;
buffer >> input.idx;
}
};
} // namespace cldnn

View File

@ -5,6 +5,7 @@
#pragma once
#include "primitive.hpp"
#include <vector>
#include "intel_gpu/graph/serialization/string_serializer.hpp"
namespace cldnn {
@ -74,6 +75,10 @@ struct activation_additional_params {
struct activation : public primitive_base<activation> {
CLDNN_DECLARE_PRIMITIVE(activation)
activation() : primitive_base("", {}) {}
DECLARE_OBJECT_TYPE_SERIALIZATION
/// @brief Constructs Relu primitive.
/// @param id This primitive id.
/// @param input Input primitive id.
@ -137,6 +142,18 @@ struct activation : public primitive_base<activation> {
additional_params_input.empty() == rhs_casted.additional_params_input.empty();
}
void save(BinaryOutputBuffer& ob) const override {
ob << make_data(&activation_function, sizeof(activation_func));
ob << make_data(&additional_params, sizeof(activation_additional_params));
ob << additional_params_input;
}
void load(BinaryInputBuffer& ib) override {
ib >> make_data(&activation_function, sizeof(activation_func));
ib >> make_data(&additional_params, sizeof(activation_additional_params));
ib >> additional_params_input;
}
protected:
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
if (additional_params_input.empty())

View File

@ -5,6 +5,9 @@
#pragma once
#include "primitive.hpp"
#include "openvino/op/util/attr_types.hpp"
#include "intel_gpu/graph/serialization/input_info_serializer.hpp"
#include "intel_gpu/graph/serialization/string_serializer.hpp"
#include "intel_gpu/graph/serialization/vector_serializer.hpp"
#include <algorithm>
#include <vector>
@ -19,6 +22,10 @@ namespace cldnn {
struct arg_max_min : public primitive_base<arg_max_min> {
CLDNN_DECLARE_PRIMITIVE(arg_max_min)
arg_max_min() : primitive_base("", {}) {}
DECLARE_OBJECT_TYPE_SERIALIZATION
/// @brief Constructs arg_max_min primitive.
/// @param id This primitive id.
/// @param input Input primitive id.
@ -95,5 +102,29 @@ struct arg_max_min : public primitive_base<arg_max_min> {
sort == rhs_casted.sort &&
values_first == rhs_casted.values_first;
}
uint32_t get_output_nums() const { return (input_size() == 3 ? 2 : output_size()); }
bool has_second_output() const { return get_output_nums() == 2; }
bool use_multiple_outputs() const { return input_size() != 3; }
void save(BinaryOutputBuffer& ob) const override {
ob << input;
ob << num_outputs;
ob << make_data(&mode, sizeof(ov::op::TopKMode));
ob << top_k;
ob << axis;
ob << make_data(&sort, sizeof(ov::op::TopKSortType));
ob << values_first;
}
void load(BinaryInputBuffer& ib) override {
ib >> input;
ib >> num_outputs;
ib >> make_data(&mode, sizeof(ov::op::TopKMode));
ib >> top_k;
ib >> axis;
ib >> make_data(&sort, sizeof(ov::op::TopKSortType));
ib >> values_first;
}
};
} // namespace cldnn

View File

@ -7,6 +7,7 @@
#include "activation.hpp"
#include <vector>
#include <algorithm>
#include "intel_gpu/graph/serialization/string_serializer.hpp"
namespace cldnn {
@ -189,6 +190,11 @@ protected:
struct lstm_gemm : public primitive_base<lstm_gemm> {
CLDNN_DECLARE_PRIMITIVE(lstm_gemm)
lstm_gemm() : primitive_base("", {}) {}
DECLARE_OBJECT_TYPE_SERIALIZATION
/// @brief Constructs lstm layer.
/// @param id This primitive id.
/// @param input input primitive id.
@ -242,6 +248,22 @@ struct lstm_gemm : public primitive_base<lstm_gemm> {
hidden.empty() == rhs_casted.hidden.empty();
}
void save(BinaryOutputBuffer& ob) const override {
ob << weights;
ob << recurrent;
ob << bias;
ob << hidden;
ob << direction;
}
void load(BinaryInputBuffer& ib) override {
ib >> weights;
ib >> recurrent;
ib >> bias;
ib >> hidden;
ib >> direction;
}
protected:
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
std::vector<std::reference_wrapper<const primitive_id>> ret;
@ -257,6 +279,11 @@ protected:
struct lstm_elt : public primitive_base<lstm_elt> {
CLDNN_DECLARE_PRIMITIVE(lstm_elt)
lstm_elt() : primitive_base("", {}) {}
DECLARE_OBJECT_TYPE_SERIALIZATION
using vec_activation = std::vector<activation_func>;
using vec_activation_param = std::vector<activation_additional_params>;
@ -342,6 +369,22 @@ struct lstm_elt : public primitive_base<lstm_elt> {
#undef cmp_fields
}
void save(BinaryOutputBuffer& ob) const override {
ob << cell;
ob << clip;
ob << input_forget;
ob << make_data(&offset_order, sizeof(lstm_weights_order));
ob << direction;
}
void load(BinaryInputBuffer& ib) override {
ib >> cell;
ib >> clip;
ib >> input_forget;
ib >> make_data(&offset_order, sizeof(lstm_weights_order));
ib >> direction;
}
protected:
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
std::vector<std::reference_wrapper<const primitive_id>> ret;

View File

@ -6,6 +6,8 @@
#include "primitive.hpp"
#include "intel_gpu/runtime/memory.hpp"
#include <vector>
#include "intel_gpu/graph/serialization/string_serializer.hpp"
#include "intel_gpu/graph/serialization/vector_serializer.hpp"
namespace cldnn {
@ -24,6 +26,10 @@ enum class reorder_mean_mode {
struct reorder : public primitive_base<reorder> {
CLDNN_DECLARE_PRIMITIVE(reorder)
reorder() : primitive_base("", {}), output_format(format::any) {}
DECLARE_OBJECT_TYPE_SERIALIZATION
/// @brief reorder memory types
enum class memory_type {
buffer,
@ -178,6 +184,24 @@ struct reorder : public primitive_base<reorder> {
mean.empty() == rhs_casted.mean.empty();
}
void save(BinaryOutputBuffer& ob) const override {
ob << make_data(&output_format, sizeof(format));
ob << mean;
ob << subtract_per_feature;
ob << make_data(&mean_mode, sizeof(reorder_mean_mode));
ob << make_data(&input_mem_type, sizeof(memory_type));
ob << truncate;
}
void load(BinaryInputBuffer& ib) override {
ib >> make_data(&output_format, sizeof(format));
ib >> mean;
ib >> subtract_per_feature;
ib >> make_data(&mean_mode, sizeof(reorder_mean_mode));
ib >> make_data(&input_mem_type, sizeof(memory_type));
ib >> truncate;
}
protected:
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
if (mean.empty())

View File

@ -13,6 +13,10 @@ namespace cldnn {
struct roi_pooling : public primitive_base<roi_pooling> {
CLDNN_DECLARE_PRIMITIVE(roi_pooling)
roi_pooling() : primitive_base("", {}) {}
DECLARE_OBJECT_TYPE_SERIALIZATION
roi_pooling(const primitive_id& id,
const input_info& input_data,
const input_info& input_rois,
@ -118,6 +122,36 @@ struct roi_pooling : public primitive_base<roi_pooling> {
cmp_fields(spatial_bins_y);
#undef cmp_fields
}
void save(BinaryOutputBuffer& ob) const override {
ob << make_data(&mode, sizeof(pooling_mode));
ob << position_sensitive;
ob << pooled_width;
ob << pooled_height;
ob << spatial_scale;
ob << trans_std;
ob << no_trans;
ob << output_dim;
ob << part_size;
ob << group_size;
ob << spatial_bins_x;
ob << spatial_bins_y;
}
void load(BinaryInputBuffer& ib) override {
ib >> make_data(&mode, sizeof(pooling_mode));
ib >> position_sensitive;
ib >> pooled_width;
ib >> pooled_height;
ib >> spatial_scale;
ib >> trans_std;
ib >> no_trans;
ib >> output_dim;
ib >> part_size;
ib >> group_size;
ib >> spatial_bins_x;
ib >> spatial_bins_y;
}
};
} // namespace cldnn

View File

@ -114,58 +114,6 @@ struct kernel_arguments_data {
const scalars_desc* scalars = nullptr;
};
struct kernel_arguments_data_idx {
std::vector<int32_t> inputs;
int32_t weights;
int32_t recurrent;
int32_t hidden;
int32_t cell;
int32_t bias;
int32_t weights_zero_points;
int32_t activations_zero_points;
int32_t compensation;
int32_t lookup_table;
int32_t scale_table;
int32_t slope;
std::vector<int32_t> fused_op_inputs;
scalars_desc scalars;
template <typename BufferType>
void save(BufferType& ob) const {
ob << inputs;
ob << weights;
ob << recurrent;
ob << hidden;
ob << cell;
ob << bias;
ob << weights_zero_points;
ob << activations_zero_points;
ob << compensation;
ob << lookup_table;
ob << scale_table;
ob << slope;
ob << fused_op_inputs;
}
template <typename BufferType>
void load(BufferType& ib) {
ib >> inputs;
ib >> weights;
ib >> recurrent;
ib >> hidden;
ib >> cell;
ib >> bias;
ib >> weights_zero_points;
ib >> activations_zero_points;
ib >> compensation;
ib >> lookup_table;
ib >> scale_table;
ib >> slope;
ib >> fused_op_inputs;
}
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// KernelString
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

View File

@ -31,7 +31,6 @@ public:
void init_kernels(const kernels_cache&) override {}
void set_arguments(primitive_inst& /*instance*/) override {}
void set_arguments(kernel_arguments_data_idx& /*instance*/) override {}
kernel_arguments_data get_arguments(const primitive_inst& /*instance*/) const override {
kernel_arguments_data args;
return args;

View File

@ -163,3 +163,4 @@ attach_activation_impl::attach_activation_impl() {
} // namespace cldnn
BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::activation_impl)
BIND_BINARY_BUFFER_WITH_TYPE(cldnn::activation)

View File

@ -51,7 +51,7 @@ protected:
kernel_arguments_data get_arguments(const typed_primitive_inst<arg_max_min>& instance) const override {
kernel_arguments_data args = parent::get_arguments(instance);
if (instance.node->has_second_output()) {
if (instance.get_typed_desc<arg_max_min>()->has_second_output()) {
if (args.inputs.size() > 1) {
args.inputs.erase(args.inputs.begin() + 1); // erase constant input in case of TOP_K
}
@ -138,3 +138,4 @@ attach_arg_max_min_impl::attach_arg_max_min_impl() {
} // namespace cldnn
BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::arg_max_min_impl)
BIND_BINARY_BUFFER_WITH_TYPE(cldnn::arg_max_min)

View File

@ -102,3 +102,4 @@ attach_lstm_elt_impl::attach_lstm_elt_impl() {
} // namespace cldnn
BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::lstm_elt_impl)
BIND_BINARY_BUFFER_WITH_TYPE(cldnn::lstm_elt)

View File

@ -103,3 +103,4 @@ attach_lstm_gemm_impl::attach_lstm_gemm_impl() {
} // namespace cldnn
BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::lstm_gemm_impl)
BIND_BINARY_BUFFER_WITH_TYPE(cldnn::lstm_gemm)

View File

@ -31,19 +31,19 @@ protected:
args.inputs.push_back(instance.input_memory_ptr(i));
}
if (instance.has_num_select_per_class() && !instance.node->num_select_per_class_node().is_constant()) {
if (instance.has_num_select_per_class() && !instance.num_select_per_class_inst()->is_constant()) {
args.inputs.push_back(instance.num_select_per_class_mem());
}
if (instance.has_iou_threshold() && !instance.node->iou_threshold_node().is_constant()) {
if (instance.has_iou_threshold() && !instance.iou_threshold_inst()->is_constant()) {
args.inputs.push_back(instance.iou_threshold_mem());
}
if (instance.has_score_threshold() && !instance.node->score_threshold_node().is_constant()) {
if (instance.has_score_threshold() && !instance.score_threshold_inst()->is_constant()) {
args.inputs.push_back(instance.score_threshold_mem());
}
if (instance.has_soft_nms_sigma() && !instance.node->soft_nms_sigma_node().is_constant()) {
if (instance.has_soft_nms_sigma() && !instance.soft_nms_sigma_inst()->is_constant()) {
args.inputs.push_back(instance.soft_nms_sigma_mem());
}

View File

@ -33,7 +33,6 @@ struct typed_primitive_impl_ocl : public typed_primitive_impl<PType> {
kernel_selector::kernel_data _kernel_data;
std::vector<kernel_id> _kernel_ids;
std::vector<kernel::ptr> _kernels;
kernel_arguments_data_idx _kernel_args;
typed_primitive_impl_ocl() : _kernel_data({}), _kernel_ids({}), _kernels({}) {
_kernel_data.weightsReorderParams.engine = kernel_selector::generic_kernel_params::Engine::NONE;
@ -75,7 +74,6 @@ struct typed_primitive_impl_ocl : public typed_primitive_impl<PType> {
ob << _kernel_data.internalBufferSizes;
ob << _kernel_data.kernels;
ob << _kernel_ids;
ob << _kernel_args;
}
void load(BinaryInputBuffer& ib) override {
@ -83,7 +81,6 @@ struct typed_primitive_impl_ocl : public typed_primitive_impl<PType> {
ib >> _kernel_data.internalBufferSizes;
ib >> _kernel_data.kernels;
ib >> _kernel_ids;
ib >> _kernel_args;
}
template<typename ImplType>
@ -126,38 +123,6 @@ protected:
return args;
}
kernel_arguments_data get_arguments_by_idx(const typed_primitive_inst<PType>& instance) const {
kernel_arguments_data args;
for (uint32_t i = 0; i < _kernel_args.inputs.size(); i++) {
args.inputs.push_back(instance.dep_memory_ptr(_kernel_args.inputs[i]));
}
args.weights = (_kernel_args.weights >= 0) ? instance.dep_memory_ptr(_kernel_args.weights) : args.weights;
args.recurrent = (_kernel_args.recurrent >= 0) ? instance.dep_memory_ptr(_kernel_args.recurrent) : args.recurrent;
args.hidden = (_kernel_args.hidden >= 0) ? instance.dep_memory_ptr(_kernel_args.hidden) : args.hidden;
args.cell = (_kernel_args.cell >= 0) ? instance.dep_memory_ptr(_kernel_args.cell) : args.cell;
args.bias = (_kernel_args.bias >= 0) ? instance.dep_memory_ptr(_kernel_args.bias) : args.bias;
args.weights_zero_points = (_kernel_args.weights_zero_points >= 0) ?
instance.dep_memory_ptr(_kernel_args.weights_zero_points) : args.weights_zero_points;
args.activations_zero_points = (_kernel_args.activations_zero_points >= 0) ?
instance.dep_memory_ptr(_kernel_args.activations_zero_points) : args.activations_zero_points;
args.compensation = (_kernel_args.compensation >= 0) ? instance.dep_memory_ptr(_kernel_args.compensation) : args.compensation;
args.lookup_table = (_kernel_args.lookup_table >= 0) ? instance.dep_memory_ptr(_kernel_args.lookup_table) : args.lookup_table;
args.scale_table = (_kernel_args.scale_table >= 0) ? instance.dep_memory_ptr(_kernel_args.scale_table) : args.scale_table;
args.slope = (_kernel_args.slope >= 0) ? instance.dep_memory_ptr(_kernel_args.slope) : args.slope;
for (size_t i = 0; i < _kernel_args.fused_op_inputs.size(); i++) {
args.fused_op_inputs.push_back(instance.dep_memory_ptr(_kernel_args.fused_op_inputs[i]));
}
for (size_t i = 0; i < instance.outputs_memory_count(); i++) {
args.outputs.push_back(instance.output_memory_ptr(i));
}
return args;
}
event::ptr aggregate_events(const std::vector<event::ptr>& events, stream& stream, bool group = false, bool is_output = false) const {
if (events.size() == 1 && !is_output)
return events[0];
@ -211,31 +176,21 @@ protected:
stream& stream = instance.get_network().get_stream();
size_t k_idx = 0;
for (size_t kd_idx = 0; kd_idx < _kernel_data.kernels.size(); ++kd_idx) {
kernel_arguments_data args;
if (_kernel_data.kernels[kd_idx].skip_execution) {
continue;
}
if (_kernel_args.inputs.size() > 0) {
args = get_arguments_by_idx(instance);
} else {
args = get_arguments(instance);
}
auto args = get_arguments(instance);
args.scalars = &_kernel_data.kernels[kd_idx].params.scalars;
for (const auto& m : instance.get_intermediates_memories()) {
args.intermediates.push_back(m);
}
args.scalars = &_kernel_data.kernels[kd_idx].params.scalars;
stream.set_arguments(*_kernels[k_idx++], _kernel_data.kernels[kd_idx].params, args);
}
}
void set_arguments_impl(kernel_arguments_data_idx& args_idx) override {
this->_kernel_args = args_idx;
}
kernel_arguments_data get_arguments_impl(const typed_primitive_inst<PType>& instance) const override {
for (size_t k = 0; k < _kernels.size(); ++k) {
auto args = get_arguments(instance);
@ -274,20 +229,13 @@ protected:
is_output_event = instance.is_output_event();
}
kernel_arguments_data args;
if (_kernel_args.inputs.size() > 0) {
args = get_arguments_by_idx(instance);
} else {
args = get_arguments(instance);
for (const auto& m : instance.get_intermediates_memories()) {
args.intermediates.push_back(m);
}
}
auto args = get_arguments(instance);
args.scalars = &_kernel_data.kernels[kd_idx].params.scalars;
for (const auto& m : instance.get_intermediates_memories()) {
args.intermediates.push_back(m);
}
auto ev = stream.enqueue_kernel(*_kernels[k_idx++], _kernel_data.kernels[kd_idx].params, args, tmp_events, is_output_event);
new_events.push_back(ev);
all_events.push_back(ev);

View File

@ -34,8 +34,8 @@ protected:
for (size_t i = 0; i < instance.inputs_memory_count(); i++) {
args.inputs.push_back(instance.input_memory_ptr(i));
}
if (instance.node->get_scale_shift_opt()) {
if (instance.node->get_dependencies().size() == 9) {
if (instance.scale_shift_opt) {
if (instance.dependencies().size() == 9) {
args.inputs.push_back(instance.dep_memory_ptr(5));
args.inputs.push_back(instance.dep_memory_ptr(6));
args.inputs.push_back(instance.dep_memory_ptr(7));

View File

@ -140,3 +140,4 @@ attach_reorder_impl::attach_reorder_impl() {
} // namespace cldnn
BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::reorder_impl)
BIND_BINARY_BUFFER_WITH_TYPE(cldnn::reorder)

View File

@ -49,7 +49,7 @@ protected:
kernel_arguments_data get_arguments(const typed_primitive_inst<roi_pooling>& instance) const override {
kernel_arguments_data args;
if (instance.argument->mode == pooling_mode::deformable_bilinear && !instance.argument->no_trans)
if (instance.get_typed_desc<roi_pooling>()->mode == pooling_mode::deformable_bilinear && !instance.get_typed_desc<roi_pooling>()->no_trans)
args.inputs = {
instance.input_memory_ptr(),
instance.rois_memory(),
@ -109,3 +109,4 @@ attach_roi_pooling_impl::attach_roi_pooling_impl() {
} // namespace cldnn
BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::roi_pooling_impl)
BIND_BINARY_BUFFER_WITH_TYPE(cldnn::roi_pooling)

View File

@ -58,7 +58,7 @@ public:
memory::ptr slope_memory() const { return dep_memory_ptr(1); }
bool is_parameterized() const { return !argument->additional_params_input.empty(); }
bool is_parameterized() const { return !get_typed_desc<activation>()->additional_params_input.empty(); }
};
using activation_inst = typed_primitive_inst<activation>;

View File

@ -19,12 +19,6 @@ public:
typed_program_node(std::shared_ptr<primitive> prim, program& prog) : parent(prim, prog) {}
program_node& input() const { return get_dependency(0); }
uint32_t get_output_nums() const {
return (get_primitive()->input_size() == 3 ? 2 : get_primitive()->output_size());
}
bool has_second_output() const { return get_output_nums() == 2; }
bool use_multiple_outputs() const { return get_primitive()->input_size() != 3; }
std::vector<size_t> get_shape_infer_dependencies() const override { return {1}; }
};

View File

@ -45,16 +45,16 @@ public:
typed_primitive_inst(network& network, lstm_elt_node const& node);
memory::ptr cell_memory() const { return dep_memory_ptr(1); }
bool cell_term() const { return !argument->cell.empty(); }
lstm_weights_order offset_order() const { return argument->offset_order; }
bool cell_term() const { return !get_typed_desc<lstm_elt>()->cell.empty(); }
lstm_weights_order offset_order() const { return get_typed_desc<lstm_elt>()->offset_order; }
float clip() const {
float clip_val = argument->clip;
float clip_val = get_typed_desc<lstm_elt>()->clip;
if (clip_val < 0)
throw std::range_error("Clip value < 0");
return clip_val;
}
bool input_forget() const { return argument->input_forget; }
uint32_t direction() const { return argument->direction; }
bool input_forget() const { return get_typed_desc<lstm_elt>()->input_forget; }
uint32_t direction() const { return get_typed_desc<lstm_elt>()->direction; }
};
using lstm_elt_inst = typed_primitive_inst<lstm_elt>;

View File

@ -44,9 +44,9 @@ public:
memory::ptr recurrent_memory() const { return dep_memory_ptr(2); }
memory::ptr bias_memory() const { return dep_memory_ptr(3); }
memory::ptr hidden_memory() const { return bias_term() ? dep_memory_ptr(4) : dep_memory_ptr(3); }
bool bias_term() const { return !argument->bias.empty(); }
bool hidden_term() const { return !argument->hidden.empty(); }
uint32_t direction() const { return argument->direction; }
bool bias_term() const { return !get_typed_desc<lstm_gemm>()->bias.empty(); }
bool hidden_term() const { return !get_typed_desc<lstm_gemm>()->hidden.empty(); }
uint32_t direction() const { return get_typed_desc<lstm_gemm>()->direction; }
};
using lstm_gemm_inst = typed_primitive_inst<lstm_gemm>;

View File

@ -64,10 +64,10 @@ public:
typed_primitive_inst(network& network, const multiclass_nms_node& node) : parent(network, node) {}
memory::ptr output_indices_memory() const {
return dep_memory_ptr(node->get_dependencies().size() - 2);
return dep_memory_ptr(dependencies().size() - 2);
}
memory::ptr output_num_memory() const {
return dep_memory_ptr(node->get_dependencies().size() - 1);
return dep_memory_ptr(dependencies().size() - 1);
}
};

View File

@ -91,6 +91,27 @@ class typed_primitive_inst<non_max_suppression> : public typed_primitive_inst_ba
using parent = typed_primitive_inst_base<non_max_suppression>;
using parent::parent;
size_t get_iou_threshold_offset() const {
size_t offset = 2;
offset += has_num_select_per_class();
return offset;
}
size_t get_score_threshold_offset() const {
size_t offset = 2;
offset += has_num_select_per_class();
offset += has_iou_threshold();
return offset;
}
size_t get_soft_nms_sigma_offset() const {
size_t offset = 2;
offset += has_num_select_per_class();
offset += has_iou_threshold();
offset += has_score_threshold();
return offset;
}
public:
typed_primitive_inst(network& network, non_max_suppression_node const& node)
: parent(network, node)
@ -113,29 +134,32 @@ public:
memory::ptr num_select_per_class_mem() const {
return dep_memory_ptr(2);
}
std::shared_ptr<const primitive_inst> num_select_per_class_inst() const {
return dependencies().at(2).first;
}
bool has_iou_threshold() const { return !get_typed_desc<non_max_suppression>()->iou_threshold.empty(); }
memory::ptr iou_threshold_mem() const {
size_t offset = 2;
offset += has_num_select_per_class();
return dep_memory_ptr(offset);
return dep_memory_ptr(get_iou_threshold_offset());
}
std::shared_ptr<const primitive_inst> iou_threshold_inst() const {
return dependencies().at(get_iou_threshold_offset()).first;
}
bool has_score_threshold() const { return !get_typed_desc<non_max_suppression>()->score_threshold.empty(); }
memory::ptr score_threshold_mem() const {
size_t offset = 2;
offset += has_num_select_per_class();
offset += has_iou_threshold();
return dep_memory_ptr(offset);
return dep_memory_ptr(get_score_threshold_offset());
}
std::shared_ptr<const primitive_inst> score_threshold_inst() const {
return dependencies().at(get_score_threshold_offset()).first;
}
bool has_soft_nms_sigma() const { return !get_typed_desc<non_max_suppression>()->soft_nms_sigma.empty(); }
memory::ptr soft_nms_sigma_mem() const {
size_t offset = 2;
offset += has_num_select_per_class();
offset += has_iou_threshold();
offset += has_score_threshold();
return dep_memory_ptr(offset);
return dep_memory_ptr(get_soft_nms_sigma_offset());
}
std::shared_ptr<const primitive_inst> soft_nms_sigma_inst() const {
return dependencies().at(get_soft_nms_sigma_offset()).first;
}
bool has_second_output() const { return !get_typed_desc<non_max_suppression>()->second_output.empty(); }

View File

@ -49,7 +49,6 @@ struct primitive_impl {
virtual void set_node_params(const program_node&) {}
virtual std::string get_type() const = 0;
virtual void set_arguments(primitive_inst& instance) = 0;
virtual void set_arguments(kernel_arguments_data_idx& args_idx) = 0;
virtual kernel_arguments_data get_arguments(const primitive_inst& instance) const = 0;
virtual event::ptr execute(const std::vector<event::ptr>& events, primitive_inst& instance) = 0;
std::string get_kernel_name() const { return _kernel_name; }
@ -288,7 +287,6 @@ protected:
memory::ptr allocate_internal_buffer(size_t idx);
static std::vector<std::shared_ptr<primitive_inst>> build_exec_deps(
std::vector<std::pair<std::shared_ptr<primitive_inst>, int32_t>> const& mem_deps);
void convert_args(const kernel_arguments_data& args, kernel_arguments_data_idx& args_idx) const;
int32_t get_index_in_deps(memory::cptr arg) const;
// event function called by primitive_inst::execute after checking if primitive should rerun and before calling
@ -382,16 +380,11 @@ private:
return set_arguments_impl(reinterpret_cast<typed_primitive_inst<PType>&>(instance));
}
void set_arguments(kernel_arguments_data_idx& args_idx) override {
return set_arguments_impl(args_idx);
}
kernel_arguments_data get_arguments(const primitive_inst& instance) const override {
return get_arguments_impl(reinterpret_cast<const typed_primitive_inst<PType>&>(instance));
}
virtual void set_arguments_impl(typed_primitive_inst<PType>& /*instance*/) {}
virtual void set_arguments_impl(kernel_arguments_data_idx& /*args_idx*/) {}
virtual kernel_arguments_data get_arguments_impl(const typed_primitive_inst<PType>& /*instance*/) const {
kernel_arguments_data args;
return args;

View File

@ -167,8 +167,12 @@ public:
static std::vector<layout> calc_output_layouts(quantize_node const& node, kernel_impl_params const& impl_param);
static layout calc_output_layout(quantize_node const& node, kernel_impl_params const& impl_param);
static std::string to_string(quantize_node const& node);
void save(BinaryOutputBuffer& ob) const override;
void load(BinaryInputBuffer& ib) override;
typed_primitive_inst(network& network, quantize_node const& desc);
bool scale_shift_opt; // This is for serialization. Please do not remove it.
};
using quantize_inst = typed_primitive_inst<quantize>;

View File

@ -65,7 +65,7 @@ public:
memory::ptr mean_nv12_memory() const { return dep_memory_ptr(2); }
memory::ptr mean_memory() const { return dep_memory_ptr(1); }
bool has_mean() const { return !argument->mean.empty(); }
bool has_mean() const { return !get_typed_desc<reorder>()->mean.empty(); }
void update_output_memory() override;
bool requires_reinterpret() const { return _req_reinterpr; }

View File

@ -409,7 +409,8 @@ network::network(cldnn::BinaryInputBuffer& ib, const ExecutionConfig& config, st
for (const auto& p_inst : _exec_order) {
ib >> *p_inst;
_primitives[p_inst->id()] = p_inst;
p_inst->init_kernels(kernels_cache);
if (p_inst->get_impl() != nullptr)
p_inst->init_kernels(kernels_cache);
}
for (auto& item : _primitives) {

View File

@ -1142,44 +1142,12 @@ void primitive_inst::save(cldnn::BinaryOutputBuffer& ob) const {
if (_impl != nullptr) {
ob << true;
kernel_arguments_data args = _impl->get_arguments(*this);
kernel_arguments_data_idx args_idx;
convert_args(args, args_idx);
_impl->set_arguments(args_idx);
ob << _impl;
} else {
ob << false;
}
}
void primitive_inst::convert_args(const kernel_arguments_data& args, kernel_arguments_data_idx& args_idx) const {
if (args.inputs.size() > 0) {
args_idx.inputs.resize(args.inputs.size());
for (uint32_t idx = 0; idx < args.inputs.size(); ++idx) {
args_idx.inputs[idx] = get_index_in_deps(args.inputs[idx]);
}
}
args_idx.weights = (args.weights == nullptr) ? -1 : get_index_in_deps(args.weights);
args_idx.recurrent = (args.recurrent == nullptr) ? -1 : get_index_in_deps(args.recurrent);
args_idx.hidden = (args.hidden == nullptr) ? -1 : get_index_in_deps(args.hidden);
args_idx.cell = (args.cell == nullptr) ? -1 : get_index_in_deps(args.cell);
args_idx.bias = (args.bias == nullptr) ? -1 : get_index_in_deps(args.bias);
args_idx.weights_zero_points = (args.weights_zero_points == nullptr) ? -1 : get_index_in_deps(args.weights_zero_points);
args_idx.activations_zero_points = (args.activations_zero_points == nullptr) ? -1 : get_index_in_deps(args.activations_zero_points);
args_idx.compensation = (args.compensation == nullptr) ? -1 : get_index_in_deps(args.compensation);
args_idx.lookup_table = (args.lookup_table == nullptr) ? -1 : get_index_in_deps(args.lookup_table);
args_idx.scale_table = (args.scale_table == nullptr) ? -1 : get_index_in_deps(args.scale_table);
args_idx.slope = (args.slope == nullptr) ? -1 : get_index_in_deps(args.slope);
if (args.fused_op_inputs.size() > 0) {
args_idx.fused_op_inputs.resize(args.fused_op_inputs.size());
for (uint32_t idx = 0; idx < args.fused_op_inputs.size(); ++idx) {
args_idx.fused_op_inputs[idx] = get_index_in_deps(args.fused_op_inputs[idx]);
}
}
}
int32_t primitive_inst::get_index_in_deps(memory::cptr arg) const {
for (uint32_t idx = 0; idx < _deps.size(); ++idx) {
if (arg == dep_memory_ptr(idx))

View File

@ -74,6 +74,17 @@ std::string quantize_inst::to_string(quantize_node const& node) {
return primitive_description.str();
}
quantize_inst::typed_primitive_inst(network& network, quantize_node const& node) : parent(network, node) {}
quantize_inst::typed_primitive_inst(network& network, quantize_node const& node) : parent(network, node) {
scale_shift_opt = node.get_scale_shift_opt();
}
void quantize_inst::save(cldnn::BinaryOutputBuffer& ob) const {
parent::save(ob);
ob << scale_shift_opt;
}
void quantize_inst::load(BinaryInputBuffer& ib) {
parent::load(ib);
ib >> scale_shift_opt;
}
} // namespace cldnn

View File

@ -229,9 +229,10 @@ CompiledModel::CompiledModel(std::istream& networkModel, InferenceEngine::Remote
setOutputs(new_results);
}
auto graph_base = std::make_shared<Graph>(ib, context_impl, m_config, 0);
auto pos = ib.tellg();
for (uint16_t n = 0; n < m_config.get_property(ov::num_streams); n++) {
auto graph = n == 0 ? graph_base : std::make_shared<Graph>(graph_base, n);
ib.seekg(pos);
auto graph = std::make_shared<Graph>(ib, context_impl, m_config, n);
m_graphs.push_back(graph);
}
}
@ -317,10 +318,6 @@ IInferRequestInternal::Ptr CompiledModel::CreateInferRequest() {
}
bool CompiledModel::is_serializable() {
// Model with multiple graphs is not yet supported.
if (m_graphs.size() != 1)
return false;
// Dynamic model serialization is not yet supported.
if (m_graphs[0]->GetNetwork()->is_dynamic())
return false;

View File

@ -84,7 +84,11 @@ Graph::Graph(cldnn::BinaryInputBuffer &ib, RemoteContextImpl::Ptr context, const
ib >> primitiveIDs;
ib >> outputDims;
m_networks.emplace_back(std::make_shared<cldnn::network>(ib, get_engine().create_stream(config), get_engine(), m_stream_id));
size_t num_networks;
ib >> num_networks;
for (size_t i = 0; i < num_networks; ++i) {
m_networks.emplace_back(std::make_shared<cldnn::network>(ib, get_engine().create_stream(config), get_engine(), m_stream_id));
}
}
Graph::Graph(std::shared_ptr<Graph> graph, uint16_t stream_id)
@ -500,9 +504,10 @@ void Graph::Export(cldnn::BinaryOutputBuffer &ob) {
ob << primitiveIDs;
ob << outputDims;
auto m_network = m_networks.back();
m_network->save(ob);
ob << m_networks.size();
for (auto net : m_networks) {
net->save(ob);
}
}
std::shared_ptr<ngraph::Function> Graph::GetExecGraphInfo() {

View File

@ -1695,24 +1695,7 @@ struct activation_random_test : testing::TestWithParam<activation_random_test_pa
ExecutionConfig config{ov::intel_gpu::custom_outputs(std::vector<std::string>{"activation"})};
std::shared_ptr<cldnn::network> net;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topo, config);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
net = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
}
} else {
net = std::make_shared<cldnn::network>(engine, topo, config);
}
cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);
net->set_input_data("in", in_mem);

View File

@ -135,24 +135,7 @@ public:
topology.add(adaptive_pooling("adaptive_avg_pooling_blocked", input_info("input_reordered"), params.outputTensor));
topology.add(reorder("adaptive_avg_pooling", input_info("adaptive_avg_pooling_blocked"), plain_layout, data_type));
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topology);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
}
} else {
network = std::make_shared<cldnn::network>(engine, topology);
}
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("input", input);

View File

@ -162,25 +162,7 @@ public:
result_id = reorder_result_id;
}
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topology);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
}
} else {
network = std::make_shared<cldnn::network>(engine, topology);
}
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data(input_data_id, input_mem);

View File

@ -123,24 +123,7 @@ void test_add_reorders_gpu_basic_reshape_and_tile(bool is_caching_test) {
set_values(input, input_vec);
tile_ref<T>(input, output_ref, 2, 4);
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topology);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
}
} else {
network = std::make_shared<cldnn::network>(engine, topology);
}
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("input", input);

View File

@ -701,24 +701,7 @@ void test_top_k_layer_tests_sort_probabilities_by_indices(bool is_caching_test)
set_values(input, input_vec);
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topology);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
}
} else {
network = std::make_shared<cldnn::network>(engine, topology);
}
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("input", input);
auto outputs = network->execute();
@ -868,24 +851,7 @@ void test_top_k_layer_md_sync(bool is_caching_test) {
true));
topology.add(mutable_data("arg_max.1", { input_info("arg_max.0") }, shared_memory));
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topology);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
}
} else {
network = std::make_shared<cldnn::network>(engine, topology);
}
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("input1", input1);
auto outputs = network->execute();

View File

@ -677,24 +677,7 @@ void test_batch_to_space_fp32_gpu_i41021_bs1221_cb0201_ce0810_b_fs_yx_fsv16(bool
tensor(format::bfyx, {1,8,3,1}, 1)));
topology.add(reorder("bts_to_bfyx", input_info("batch_to_space"), format::bfyx, data_types::f32));
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topology);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
}
} else {
network = std::make_shared<cldnn::network>(engine, topology);
}
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("Input", input);

View File

@ -230,24 +230,7 @@ TEST_P(binary_convolution_test, conv) {
topology_bin.add(binary_convolution(output_name, input_info(input_name), {output_name + weights_suffix},
stride, pad, dilation, os_size, 1, p.pad_value, p.dt));
cldnn::network::ptr network_bin;
if (p.is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topology_bin, config);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network_bin = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
}
} else {
network_bin = std::make_shared<cldnn::network>(engine, topology_bin, config);
}
cldnn::network::ptr network_bin = get_network(engine, topology_bin, config, get_test_stream_ptr(), p.is_caching_test);
network_bin->set_input_data(input_name, input);

View File

@ -85,25 +85,7 @@ public:
pad_mode,
pad_value),
reorder("output", input_info("border"), cldnn::format::bfyx, T_dt));
std::shared_ptr<cldnn::network> target_network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, target_topology);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
target_network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
}
} else {
target_network = std::make_shared<cldnn::network>(engine, target_topology);
}
cldnn::network::ptr target_network = get_network(engine, target_topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
target_network->set_input_data("input", input);
auto target_output = target_network->execute().at("output").get_memory();
cldnn::mem_lock<T> target_output_ptr(target_output, get_test_stream());

View File

@ -212,24 +212,7 @@ void start_broadcast_test_5d(format cldnn_format, data_types cldnn_data_type, st
set_values(input, input_data);
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topology);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
}
} else {
network = std::make_shared<cldnn::network>(engine, topology);
}
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("input", input);
auto outputs = network->execute();

View File

@ -59,24 +59,7 @@ struct bucketize_test : testing::TestWithParam<bucketize_test_params<I, B, O>> {
topology.add(
reorder("plane_bucketize_left_bound", input_info("bucketize_left_bound"), format::bfyx, type_to_data_type<O>::value));
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topology);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
}
} else {
network = std::make_shared<cldnn::network>(engine, topology);
}
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("input", input);
network->set_input_data("buckets", buckets);

View File

@ -127,24 +127,7 @@ void start_cl_mem_check_2_inputs(bool is_caching_test) {
topology.add(input2);
topology.add(reorder("reorder", input_info("input"), input_info("input2"), output_layout));
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(*engine, topology);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, *engine);
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), *engine);
}
} else {
network = std::make_shared<cldnn::network>(*engine, topology);
}
cldnn::network::ptr network = get_network(*engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("input", input_memory);
network->set_input_data("input2", input_memory2);

View File

@ -36,24 +36,7 @@ void exexute_network(cldnn::engine& engine, const ExecutionConfig& cfg, bool is_
};
set_values(input, input_vec);
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topology, cfg);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, cfg, get_test_stream_ptr(), engine);
}
} else {
network = std::make_shared<cldnn::network>(engine, topology, cfg);
}
cldnn::network::ptr network = get_network(engine, topology, cfg, get_test_stream_ptr(), is_caching_test);
network->set_input_data("input", input);
auto outputs = network->execute();

View File

@ -981,24 +981,7 @@ public:
topology.add(pooling("pool_final", input_info("conv"), pooling_mode::max, {1, 1}, {1, 1}));
topology.add(reorder("reorder", input_info("pool_final"), layout(data_type, format::bfyx, {(int32_t)batch_num, (int32_t)output_f, (int32_t)input_y, (int32_t)input_x})));
std::shared_ptr<cldnn::network> concat_network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topology, config);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
concat_network = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
}
} else {
concat_network = std::make_shared<cldnn::network>(engine, topology, config);
}
cldnn::network::ptr concat_network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
for (size_t i = 0; i < in_features.size(); i++) {
concat_network->set_input_data(input_ids[i], in_memory[i]);

View File

@ -593,24 +593,7 @@ void test_convert_color_i420_to_rgb_three_planes_surface_u8(bool is_caching_test
topology.add(convert_color("convert_color", { input_info("input"), input_info("input2"), input_info("input3") }, cldnn::convert_color::color_format::I420, cldnn::convert_color::color_format::RGB,
cldnn::convert_color::memory_type::image, output_layout));
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(*engine, topology);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, *engine);
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), *engine);
}
} else {
network = std::make_shared<cldnn::network>(*engine, topology);
}
cldnn::network::ptr network = get_network(*engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("input", input_memory);
network->set_input_data("input2", input_memory2);

View File

@ -9422,24 +9422,7 @@ void test_convolution_f32_gpu_convolution_gpu_bfyx_f16_depthwise_x_bloxk_size_1(
config.set_property(ov::intel_gpu::optimize_data(true));
ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "convolution_gpu_bfyx_f16_depthwise" };
config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_fsv", conv_impl } }));
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topology, config);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
}
} else {
network = std::make_shared<cldnn::network>(engine, topology, config);
}
cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
network->set_input_data("input", input_mem);

View File

@ -1213,24 +1213,7 @@ TEST_P(crop_gpu, pad_test) {
ExecutionConfig config;
config.set_property(ov::intel_gpu::optimize_data(true));
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topology, config);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
}
} else {
network = std::make_shared<cldnn::network>(engine, topology, config);
}
cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
network->set_input_data("input", input);
auto outputs = network->execute();

View File

@ -104,24 +104,7 @@ public:
topology.add(ctc_loss("ctc_loss", inputs_ids, p.preprocess_collapse_repeated, p.ctc_merge_repeated, p.unique));
topology.add(reorder("reordered_ctc_loss", input_info("ctc_loss"), plane_format, float_data_type));
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topology);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
}
} else {
network = std::make_shared<cldnn::network>(engine, topology);
}
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
for (auto& input : inputs) {
network->set_input_data(std::get<0>(input), std::get<1>(input));

View File

@ -185,24 +185,7 @@ public:
topology.add(input_layout("Input0", input->get_layout()));
topology.add(cum_sum("cum_sum", input_info("Input0"), axis, exclusive, reverse));
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topology);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
}
} else {
network = std::make_shared<cldnn::network>(engine, topology);
}
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("Input0", input);

View File

@ -547,24 +547,7 @@ void test_custom_gpu_primitive_u8_add_basic_in2x2x2x2(bool is_caching_test) {
2, 60, 0, 20
});
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topology);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
}
} else {
network = std::make_shared<cldnn::network>(engine, topology);
}
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("input", input);
network->set_input_data("input2", input2);

View File

@ -2360,24 +2360,7 @@ void test_deconvolution_f16_fw_gpu_basic_wsiz2x2_in1x2x2x2_fs_b_yx_fsv32_stride1
ov::intel_gpu::ExecutionConfig config;
config.set_property(ov::intel_gpu::optimize_data(true));
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topology, config);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
}
} else {
network = std::make_shared<cldnn::network>(engine, topology, config);
}
cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
network->set_input_data("input", input);

View File

@ -992,24 +992,7 @@ void test_depth_concatenate_f32_gpu_basic_bfwzyx_along_w(bool is_caching_test) {
ExecutionConfig config;
config.set_property(ov::intel_gpu::optimize_data(true));
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topology, config);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
}
} else {
network = std::make_shared<cldnn::network>(engine, topology, config);
}
cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
network->set_input_data("input1", input1);

View File

@ -393,24 +393,7 @@ void test_depth_to_space_fp32_gpu_d1822_bs2_depth_first(bool is_caching_test) {
depth_to_space("depth_to_space", input_info("Input0"), block_size, depth_to_space_mode::depth_first)
);
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topology);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
}
} else {
network = std::make_shared<cldnn::network>(engine, topology);
}
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("Input0", input1);

View File

@ -147,25 +147,7 @@ public:
topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k));
ExecutionConfig config;
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topology, config);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
}
} else {
network = std::make_shared<cldnn::network>(engine, topology, config);
}
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("input_location", input_location);
network->set_input_data("input_confidence", input_confidence);
@ -200,25 +182,7 @@ public:
topology.add(detection_output("detection_output_1", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k));
topology.add(detection_output("detection_output_2", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k));
ExecutionConfig config;
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topology, config);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
}
} else {
network = std::make_shared<cldnn::network>(engine, topology, config);
}
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("input_location", input_location);
network->set_input_data("input_confidence", input_confidence);
@ -260,25 +224,7 @@ public:
topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold));
ExecutionConfig config;
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topology, config);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
}
} else {
network = std::make_shared<cldnn::network>(engine, topology, config);
}
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("input_location", input_location);
network->set_input_data("input_confidence", input_confidence);
@ -326,25 +272,7 @@ public:
topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold));
ExecutionConfig config;
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topology, config);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
}
} else {
network = std::make_shared<cldnn::network>(engine, topology, config);
}
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("input_location", input_location);
network->set_input_data("input_confidence", input_confidence);
@ -386,25 +314,7 @@ public:
topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold));
ExecutionConfig config;
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topology, config);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
}
} else {
network = std::make_shared<cldnn::network>(engine, topology, config);
}
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("input_location", input_location);
network->set_input_data("input_confidence", input_confidence);
@ -457,25 +367,7 @@ public:
topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k));
ExecutionConfig config;
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topology, config);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
}
} else {
network = std::make_shared<cldnn::network>(engine, topology, config);
}
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("input_location", input_location);
network->set_input_data("input_confidence", input_confidence);
@ -538,25 +430,7 @@ public:
prior_coordinates_offset, prior_is_normalized, input_width, input_height, decrease_label_id
));
ExecutionConfig config;
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topology, config);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
}
} else {
network = std::make_shared<cldnn::network>(engine, topology, config);
}
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("input_location", input_location);
network->set_input_data("input_confidence", input_confidence);
@ -606,25 +480,7 @@ public:
topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold));
ExecutionConfig config;
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topology, config);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
}
} else {
network = std::make_shared<cldnn::network>(engine, topology, config);
}
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("input_location", input_location);
network->set_input_data("input_confidence", input_confidence);
@ -685,25 +541,7 @@ public:
topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k));
ExecutionConfig config;
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topology, config);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
}
} else {
network = std::make_shared<cldnn::network>(engine, topology, config);
}
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("input_location", input_location);
network->set_input_data("input_confidence", input_confidence);
@ -751,25 +589,7 @@ public:
topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold));
ExecutionConfig config;
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topology, config);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
}
} else {
network = std::make_shared<cldnn::network>(engine, topology, config);
}
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("input_location", input_location);
network->set_input_data("input_confidence", input_confidence);
@ -820,25 +640,7 @@ public:
topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k));
ExecutionConfig config;
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topology, config);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
}
} else {
network = std::make_shared<cldnn::network>(engine, topology, config);
}
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("input_location", input_location);
network->set_input_data("input_confidence", input_confidence);
@ -884,25 +686,7 @@ public:
topology.add(detection_output("detection_output", input_info("input_location_padded"), input_info("input_confidence_padded"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k));
ExecutionConfig config;
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topology, config);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
}
} else {
network = std::make_shared<cldnn::network>(engine, topology, config);
}
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("input_location", input_location);
network->set_input_data("input_confidence", input_confidence);
@ -965,25 +749,7 @@ public:
prior_is_normalized, this->img_size, this->img_size
));
ExecutionConfig config;
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topology, config);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
}
} else {
network = std::make_shared<cldnn::network>(engine, topology, config);
}
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("input_location", input_location);
network->set_input_data("input_confidence", input_confidence);

View File

@ -118,24 +118,7 @@ public:
// It's simpler to use "bfwzyx" format for all cases, as input and output can have different ranks
topology.add(reorder("out", input_info("dft"), format::bfwzyx, data_type));
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topology);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
}
} else {
network = std::make_shared<cldnn::network>(engine, topology);
}
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("input", input);
const auto outputs = network->execute();

View File

@ -4155,24 +4155,7 @@ struct eltwise_random_test : testing::TestWithParam<eltwise_random_test_params>
ExecutionConfig config_opt;
config_opt.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{"eltwise_opt"}));
std::shared_ptr<cldnn::network> net_opt;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topo_opt, config_opt);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
net_opt = std::make_shared<cldnn::network>(ib, config_opt, get_test_stream_ptr(), engine);
}
} else {
net_opt = std::make_shared<cldnn::network>(engine, topo_opt, config_opt);
}
cldnn::network::ptr net_opt = get_network(engine, topo_opt, config_opt, get_test_stream_ptr(), is_caching_test);
net_opt->set_input_data("input1", input1);
net_opt->set_input_data("input2", input2);

View File

@ -1394,24 +1394,7 @@ void test_embedding_bag_fp32_gpu_extended5_6(bool is_caching_test) {
embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1"), input_info("Input2") }, type, output_shape)
);
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topology);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
}
} else {
network = std::make_shared<cldnn::network>(engine, topology);
}
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("Input0", emb_table);
network->set_input_data("Input1", indices);

View File

@ -143,24 +143,7 @@ public:
const primitive_id eddo_id = "experimental_detectron_detection_output";
topology.add(reorder(eddo_id, input_info(b_eddo_primitive) /*b_eddo_id*/, format::bfyx, data_type));
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topology);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
}
} else {
network = std::make_shared<cldnn::network>(engine, topology);
}
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data(input_boxes_id, input_boxes);
network->set_input_data(input_deltas_id, input_deltas);

View File

@ -241,24 +241,7 @@ public:
const primitive_id reorder_result_id = edgpsi_id + "Reordered";
topology.add(reorder(reorder_result_id, input_info(edgpsi_primitive), format::bfyx, data_type));
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topology);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
}
} else {
network = std::make_shared<cldnn::network>(engine, topology);
}
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data(input_im_info_id, input_im_info);
network->set_input_data(input_anchors_id, input_anchors);

View File

@ -62,24 +62,7 @@ public:
params.imageShape.first,
params.imageShape.second));
cldnn::network::ptr network;
if (params.is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topology);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
}
} else {
network = std::make_shared<cldnn::network>(engine, topology);
}
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), params.is_caching_test);
network->set_input_data(priors_id, prior_input);

View File

@ -53,24 +53,7 @@ void test_experimental_detectron_roi_feature_extractor_gpu_fp32_one_level(bool i
topology.add(activation(activation_abs_id, feature_extractor_id, activation_func::abs));
topology.add(mutable_data(second_output_r_id, {feature_extractor_id}, second_output));
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topology);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
}
} else {
network = std::make_shared<cldnn::network>(engine, topology);
}
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data(input_rois_id, roi_input);
network->set_input_data(input_level_1_id, level_1);

View File

@ -159,22 +159,7 @@ TEST(experimental_detectron_topk_rois_gpu_test, export_import) {
rois_num));
topology.add(reorder("plane_output", input_info(experimental_detectron_topk_rois_id), format::bfyx, test_data_type));
cldnn::network::ptr network;
{
membuf mem_buf;
{
cldnn::network _network(engine, topology);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
}
}
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), true);
network->set_input_data(input_rois_id, roi_input);
network->set_input_data(input_indices_id, roi_indices);

View File

@ -518,24 +518,7 @@ void test_extract_image_patches_gpu_basic5(bool is_caching_test) {
topology.add(input_layout("Input0", input->get_layout()));
topology.add(extract_image_patches("extract_image_patches", input_info("Input0"), sizes, strides, rates, auto_pad, output_shape));
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topology);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
}
} else {
network = std::make_shared<cldnn::network>(engine, topology);
}
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("Input0", input);
auto outputs = network->execute();

View File

@ -85,24 +85,7 @@ public:
tp.add(reorder("output", input_info("eye"), oupput_fmt, type_to_data_type<OutputType>::value));
}
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine_, tp);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine_);
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine_);
}
} else {
network = std::make_shared<cldnn::network>(engine_, tp);
}
cldnn::network::ptr network = get_network(engine_, tp, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
auto outputs = network->execute();

View File

@ -30,24 +30,7 @@ inline void DoTest(engine& engine,
gather_elements("gather_elements", input_info("InputData"), input_info("InputIndices"), input1->get_layout().format, output_tensor, axis)
);
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topology);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
}
} else {
network = std::make_shared<cldnn::network>(engine, topology);
}
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("InputData", input0);
network->set_input_data("InputIndices", input1);

View File

@ -1938,24 +1938,7 @@ void test_gather_gpu_u8_322_axisF(bool is_caching_test) {
topology.add(
gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{3, 2, 2, 1}));
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topology);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
}
} else {
network = std::make_shared<cldnn::network>(engine, topology);
}
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("InputDictionary", input1);
network->set_input_data("InputText", input2);

View File

@ -39,24 +39,7 @@ inline void DoTestBase(engine& engine,
topology.add(input_layout("InputIndices", input1->get_layout()));
topology.add(gather_nd_inst);
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topology);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
}
} else {
network = std::make_shared<cldnn::network>(engine, topology);
}
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("InputData", input0);
network->set_input_data("InputIndices", input1);

View File

@ -213,24 +213,7 @@ public:
const primitive_id reorder_result_id = result_id + "_reordered";
topology.add(reorder(reorder_result_id, input_info(result_id), plain_layout, data_type));
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topology);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
}
} else {
network = std::make_shared<cldnn::network>(engine, topology);
}
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data(step_id, step_input);
network->set_input_data(parent_id, parent_input);

View File

@ -95,7 +95,7 @@ protected:
public:
virtual ~GemmGPUTest() {}
void test() {
void test(bool is_caching_test = false) {
fill_gemm_params();
@ -124,12 +124,31 @@ public:
tp.add(g);
tp.add(reorder("output", input_info("gemm_output"), format::bfyx, data_types::f32));
network network(engine, tp);
for (auto &input : network_inputs) {
network.set_input_data(input.first, input.second);
cldnn::network::ptr network;
if (is_caching_test) {
std::cout << "cached" << std::endl;
membuf mem_buf;
{
cldnn::network _network(engine, tp);
process_program(_network.get_program());
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
}
} else {
network = std::make_shared<cldnn::network>(engine, tp);
process_program(network->get_program());
}
auto outputs = network.execute();
process_program(network.get_program());
for (auto &input : network_inputs) {
network->set_input_data(input.first, input.second);
}
auto outputs = network->execute();
auto output = outputs.at("output").get_memory();
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
@ -160,7 +179,7 @@ class GemmGPUTestRandom : public GemmGPUTest {
auto &v = input_data[i];
v.resize(size);
for(size_t i = 0; i < size; ++i) {
v[i] = generate_random_value() / 10.f;
v[i] = generate_random_value() / 20.f;
}
}
}
@ -233,12 +252,13 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(false), ::testing::Values(true),
::testing::Values(1.0f), ::testing::Values(0.0f)));
TEST(gemm_gpu, basic_bfyx_t2_inplace_crop_with_pad) {
template <typename T>
void test_basic_bfyx_t2_inplace_crop_with_pad(bool is_caching_test) {
auto& engine = get_test_engine();
auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 4, 3 } });
auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 4, 1 } });
std::vector<float> input_data = {
std::vector<T> input_data = {
1.f, -2.f, 3.f, -4.f,
5.f, 6.f, 1.f, 2.f,
3.f, 3.f, 2.f, -1.f,
@ -248,13 +268,13 @@ TEST(gemm_gpu, basic_bfyx_t2_inplace_crop_with_pad) {
3.f, 3.f, 2.f, -1.f,
};
std::vector<float> input_data2 = {
std::vector<T> input_data2 = {
2.f, 5.f, -4.f, -7.f,
};
set_values(input, input_data);
set_values(input2, input_data2);
std::vector<float> out_data = {
std::vector<T> out_data = {
8.f, 22.f, 20.f
};
@ -274,13 +294,13 @@ TEST(gemm_gpu, basic_bfyx_t2_inplace_crop_with_pad) {
ExecutionConfig config;
config.set_property(ov::intel_gpu::optimize_data(true));
network network(engine, topology, config);
network.set_input_data("input", input);
network.set_input_data("input2", input2);
auto outputs = network.execute();
cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
network->set_input_data("input", input);
network->set_input_data("input2", input2);
auto outputs = network->execute();
auto output = outputs.at("output").get_memory();
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
cldnn::mem_lock<T> output_ptr(output, get_test_stream());
ASSERT_EQ(output_ptr.size(), (uint32_t)3);
for (uint32_t i = 0; i < out_data.size(); ++i) {
@ -288,6 +308,10 @@ TEST(gemm_gpu, basic_bfyx_t2_inplace_crop_with_pad) {
}
}
TEST(gemm_gpu, basic_bfyx_t2_inplace_crop_with_pad) {
test_basic_bfyx_t2_inplace_crop_with_pad<float>(false);
}
TEST(gemm_gpu, dynamic) {
auto& engine = get_test_engine();
ov::Shape in1_shape = { 1, 1, 3, 4 };
@ -745,7 +769,7 @@ INSTANTIATE_TEST_SUITE_P(
GemmGPUTestRandom,
::testing::Combine(
::testing::Values(std::vector<std::vector<int32_t>>{{ 5, 1, 500, 9 }, { 5, 1, 1, 500 }}),
::testing::Values(std::vector<std::vector<float>>{{}, {}}),
::testing::Values(std::vector<std::vector<float>>{{}, {}}),
::testing::ValuesIn(planar_formats),
::testing::ValuesIn(float_types),
::testing::Values(std::vector<float>{}),
@ -1182,7 +1206,7 @@ public:
return (x % x_size) * x_pitch + (y % y_size) * y_pitch + (f % f_num) * f_pitch + (b % b_num) * b_pitch;
}
void execute(gemm_params& p) {
void execute(gemm_params& p, bool is_caching_test = false) {
auto& engine = get_test_engine();
if (!engine.get_device_info().supports_immad)
return;
@ -1294,13 +1318,13 @@ public:
#endif
cfg.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"gemm_bfyx", gemm_impl} }));
network network(engine, topology, cfg);
network.set_input_data("input0", input0_mem);
network.set_input_data("input1", input1_mem);
cldnn::network::ptr network = get_network(engine, topology, cfg, get_test_stream_ptr(), is_caching_test);
network->set_input_data("input0", input0_mem);
network->set_input_data("input1", input1_mem);
if (p.beta != 0) {
network.set_input_data("input2", input2_mem);
network->set_input_data("input2", input2_mem);
}
auto outputs = network.execute();
auto outputs = network->execute();
auto output = outputs.at("reorder_bfyx").get_memory();
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
@ -1649,4 +1673,50 @@ INSTANTIATE_TEST_SUITE_P(gemm_gpu, gemm_fp16_tiled_nn_broadcast_tests, ::testing
#endif // ENABLE_ONEDNN_FOR_GPU
#ifdef RUN_ALL_MODEL_CACHING_TESTS
TEST_P(GemmGPUTest, basic_cached) {
ASSERT_NO_FATAL_FAILURE(test(true));
}
TEST_P(GemmGPUTestRandom, basic_cached) {
ASSERT_NO_FATAL_FAILURE(test(true));
}
#ifdef ENABLE_ONEDNN_FOR_GPU
TEST_P(gemm_int8_simple_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); }
TEST_P(gemm_uint8_simple_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); }
TEST_P(gemm_fp16_simple_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); }
TEST_P(gemm_fp32_simple_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); }
TEST_P(gemm_int8_transposition_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); }
TEST_P(gemm_uint8_transposition_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); }
TEST_P(gemm_fp16_transposition_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); }
TEST_P(gemm_fp32_transposition_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); }
TEST_P(gemm_int8_broadcasting_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); }
TEST_P(gemm_fp16_broadcasting_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); }
TEST_P(gemm_fp32_broadcasting_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); }
TEST_P(gemm_int8_combo_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); }
TEST_P(gemm_uint8_combo_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); }
TEST_P(gemm_fp16_combo_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); }
TEST_P(gemm_fp32_combo_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); }
#else
TEST_P(gemm_int8_transposition_tests, basic_cached) { auto p = GetParam(); execute(p, true); }
TEST_P(gemm_int8_broadcast_tests, basic_cached) { auto p = GetParam(); execute(p, true); }
TEST_P(gemm_int8_leftovers_tests, basic_cached) { auto p = GetParam(); execute(p, true); }
TEST_P(gemm_int8_combo_tests, basic_cached) { auto p = GetParam(); execute(p, true); }
TEST_P(gemm_int8_slm_combo_tests, basic_cached) { auto p = GetParam(); execute(p, true); }
TEST_P(gemm_fp32_tiled_nn_tests, basic_cached) { auto p = GetParam(); execute(p, true); }
TEST_P(gemm_fp32_tiled_nt_tests, basic_cached) { auto p = GetParam(); execute(p, true); }
TEST_P(gemm_fp32_tiled_tn_tests, basic_cached) { auto p = GetParam(); execute(p, true); }
TEST_P(gemm_fp32_tiled_tt_tests, basic_cached) { auto p = GetParam(); execute(p, true); }
TEST_P(gemm_fp32_tiled_nn_broadcast_tests, basic_cached) { auto p = GetParam(); execute(p, true); }
TEST_P(gemm_fp16_tiled_nn_tests, basic_cached) { auto p = GetParam(); execute(p, true); }
TEST_P(gemm_fp16_tiled_nt_tests, basic_cached) { auto p = GetParam(); execute(p, true); }
TEST_P(gemm_fp16_tiled_tn_tests, basic_cached) { auto p = GetParam(); execute(p, true); }
TEST_P(gemm_fp16_tiled_tt_tests, basic_cached) { auto p = GetParam(); execute(p, true); }
TEST_P(gemm_fp16_tiled_nn_broadcast_tests, basic_cached) { auto p = GetParam(); execute(p); }
#endif // ENABLE_ONEDNN_FOR_GPU
#endif // RUN_ALL_MODEL_CACHING_TESTS
TEST(gemm_gpu, basic_bfyx_t2_inplace_crop_with_pad_cached) {
test_basic_bfyx_t2_inplace_crop_with_pad<float>(true);
}
} // namespace

View File

@ -355,24 +355,7 @@ public:
const primitive_id reorder_result_id = generate_proposals_id + "Reordered";
topology.add(reorder(reorder_result_id, input_info(generate_proposals_id), format::bfyx, data_type));
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topology);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
}
} else {
network = std::make_shared<cldnn::network>(engine, topology);
}
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data(input_im_info_id, input_im_info);
network->set_input_data(input_anchors_id, input_anchors);

View File

@ -7,6 +7,12 @@
#include "intel_gpu/primitives/grid_sample.hpp"
#include "test_utils/test_utils.h"
#ifdef RUN_ALL_MODEL_CACHING_TESTS
#define RUN_CACHING_TEST false, true
#else
#define RUN_CACHING_TEST false
#endif
using namespace cldnn;
using namespace tests;
@ -24,7 +30,7 @@ struct grid_sample_test_inputs {
};
template <class TD, class TG>
using grid_sample_test_params = std::tuple<grid_sample_test_inputs<TD, TG>, format::type>;
using grid_sample_test_params = std::tuple<grid_sample_test_inputs<TD, TG>, format::type, bool>;
template <class T>
float getError();
@ -45,7 +51,8 @@ public:
void test() {
format::type fmt;
grid_sample_test_inputs<TD, TG> p;
std::tie(p, fmt) = testing::TestWithParam<grid_sample_test_params<TD, TG>>::GetParam();
bool is_caching_test;
std::tie(p, fmt, is_caching_test) = testing::TestWithParam<grid_sample_test_params<TD, TG>>::GetParam();
auto& engine = get_test_engine();
const auto data_data_type = type_to_data_type<TD>::value;
@ -68,10 +75,10 @@ public:
topology.add(grid_sample("grid_sample", { input_info("reordered_data"), input_info("reordered_grid") }, p.attributes));
topology.add(reorder("plane_grid_sample", input_info("grid_sample"), plane_format, data_data_type));
network network(engine, topology);
network.set_input_data("data", data);
network.set_input_data("grid", grid);
const auto outputs = network.execute();
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("data", data);
network->set_input_data("grid", grid);
const auto outputs = network->execute();
ASSERT_EQ(outputs.size(), std::size_t(1));
ASSERT_EQ(outputs.begin()->first, "plane_grid_sample");
@ -88,11 +95,13 @@ public:
static std::string PrintToStringParamName(const testing::TestParamInfo<grid_sample_test_params<TD, TG>>& info) {
format::type fmt;
grid_sample_test_inputs<TD, TG> p;
std::tie(p, fmt) = info.param;
bool is_caching_test;
std::tie(p, fmt, is_caching_test) = info.param;
std::ostringstream result;
result << "TestName=" << p.test_name << ";";
result << "Format=" << fmt_to_str(fmt);
result << "Format=" << fmt_to_str(fmt) << ";";
result << "Cached=" << bool_to_str(is_caching_test) << ";";
return result.str();
}
};
@ -673,13 +682,23 @@ TEST_P(grid_sample_gpu_test_FLOAT16_FLOAT16, test) {
INSTANTIATE_TEST_SUITE_P(smoke_grid_sample_gpu_test_float_float,
grid_sample_gpu_test_float_float,
testing::Combine(testing::ValuesIn(getParamsToCheckLayouts<float, float>()),
testing::ValuesIn(layout_formats)),
testing::ValuesIn(layout_formats),
testing::Values(RUN_CACHING_TEST)),
grid_sample_gpu_test_float_float::PrintToStringParamName);
INSTANTIATE_TEST_SUITE_P(smoke_grid_sample_gpu_test_FLOAT16_FLOAT16,
grid_sample_gpu_test_FLOAT16_FLOAT16,
testing::Combine(testing::ValuesIn(getParamsToCheckLogic<FLOAT16, FLOAT16>()),
testing::Values(format::bfyx)),
testing::Values(format::bfyx),
testing::Values(RUN_CACHING_TEST)),
grid_sample_gpu_test_FLOAT16_FLOAT16::PrintToStringParamName);
#ifndef RUN_ALL_MODEL_CACHING_TESTS
INSTANTIATE_TEST_SUITE_P(smoke_grid_sample_gpu_test_FLOAT16_FLOAT16_cached,
grid_sample_gpu_test_FLOAT16_FLOAT16,
testing::Combine(testing::ValuesIn(getNearestParamsOddDimensionsOuterGrids<FLOAT16, FLOAT16>()),
testing::Values(format::bfyx),
testing::Values(true)),
grid_sample_gpu_test_FLOAT16_FLOAT16::PrintToStringParamName);
#endif
} // namespace

View File

@ -73,24 +73,7 @@ void test_loop_gpu_basic_no_concat(bool is_caching_test)
input_primitive_maps, output_primitive_maps, back_edges, 8)
);
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topology);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
}
} else {
network = std::make_shared<cldnn::network>(engine, topology);
}
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("input", input_mem);
network->set_input_data("trip_count", trip_count_mem);
@ -191,25 +174,7 @@ void test_loop_gpu_basic_concat(bool is_caching_test)
input_primitive_maps, output_primitive_maps, back_edges, trip_count)
);
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topology);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
}
} else {
network = std::make_shared<cldnn::network>(engine, topology);
}
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("input", input_mem);
network->set_input_data("trip_count", trip_count_mem);
network->set_input_data("initial_condition", initial_condition_mem);
@ -349,25 +314,7 @@ void test_loop_gpu_basic_concat_nested(bool is_caching_test)
/////////////////////////////////
// network execution
/////////////////////////////////
cldnn::network::ptr network;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, main_topology);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
}
} else {
network = std::make_shared<cldnn::network>(engine, main_topology);
}
cldnn::network::ptr network = get_network(engine, main_topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("input", input_mem);
network->set_input_data("trip_count", trip_count_mem);
network->set_input_data("initial_condition", initial_condition_mem);

View File

@ -11,7 +11,8 @@
using namespace cldnn;
using namespace ::tests;
TEST(lrn_fp32_gpu, basic) {
template <typename T>
void test_fp32_basic(bool is_caching_test) {
// input : 1x16x1x1
// Output : 1x16x1x1
auto& engine = get_test_engine();
@ -22,11 +23,9 @@ TEST(lrn_fp32_gpu, basic) {
const size_t x = 1;
auto input = engine.allocate_memory({ data_types::f32, format::b_fs_yx_fsv16, { b, f, x, y } });
std::vector<float> inputVals(b * f * y * x);
std::generate(inputVals.begin(), inputVals.end(), []() {
static float n = 0;
return n++;
});
std::vector<T> inputVals(b * f * y * x);
T n = 0;
std::generate(inputVals.begin(), inputVals.end(), [n]() mutable { return n++; });
set_values(input, inputVals);
@ -38,11 +37,11 @@ TEST(lrn_fp32_gpu, basic) {
float beta = 1.f;
topology.add(lrn("lrn", input_info("input"), size, k, alpha, beta, cldnn::lrn_norm_region_across_channel));
network network(engine, topology);
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network.set_input_data("input", input);
network->set_input_data("input", input);
auto outputs = network.execute();
auto outputs = network->execute();
auto output = outputs.at("lrn").get_memory();
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
@ -60,7 +59,12 @@ TEST(lrn_fp32_gpu, basic) {
}
}
TEST(lrn_fp32_gpu, basic2) {
TEST(lrn_fp32_gpu, basic) {
test_fp32_basic<float>(false);
}
template <typename T>
void test_fp32_basic2(bool is_caching_test) {
// input : 1x16x1x1
// Output : 1x16x1x1
auto& engine = get_test_engine();
@ -71,11 +75,9 @@ TEST(lrn_fp32_gpu, basic2) {
const size_t x = 1;
auto input = engine.allocate_memory({ data_types::f32, format::b_fs_yx_fsv16, { b, f, x, y } });
std::vector<float> inputVals(b * f * y * x);
std::generate(inputVals.begin(), inputVals.end(), []() {
static float n = 0;
return n++;
});
std::vector<T> inputVals(b * f * y * x);
T n = 0;
std::generate(inputVals.begin(), inputVals.end(), [n]() mutable { return n++; });
set_values(input, inputVals);
@ -87,11 +89,11 @@ TEST(lrn_fp32_gpu, basic2) {
float beta = 1.f;
topology.add(lrn("lrn", input_info("input"), size, k, alpha, beta, cldnn::lrn_norm_region_across_channel));
network network(engine, topology);
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network.set_input_data("input", input);
network->set_input_data("input", input);
auto outputs = network.execute();
auto outputs = network->execute();
auto output = outputs.at("lrn").get_memory();
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
@ -109,7 +111,12 @@ TEST(lrn_fp32_gpu, basic2) {
}
}
TEST(lrn_fp16_gpu, basic1) {
TEST(lrn_fp32_gpu, basic2) {
test_fp32_basic2<float>(false);
}
template <typename T>
void test_fp16_basic1(bool is_caching_test) {
// input : 1x16x1x1
// Output : 1x16x1x1
auto& engine = get_test_engine();
@ -120,11 +127,9 @@ TEST(lrn_fp16_gpu, basic1) {
const size_t x = 1;
auto input = engine.allocate_memory({ data_types::f16, format::b_fs_yx_fsv16, { b, f, x, y } });
std::vector<half_t> inputVals(b * f * y * x);
std::generate(inputVals.begin(), inputVals.end(), []() {
static float n = 0;
return half_t(n++);
});
std::vector<T> inputVals(b * f * y * x);
float n = 0;
std::generate(inputVals.begin(), inputVals.end(), [n]() mutable { return T(n++); });
set_values(input, inputVals);
@ -136,11 +141,11 @@ TEST(lrn_fp16_gpu, basic1) {
float beta = 1.f;
topology.add(lrn("lrn", input_info("input"), size, k, alpha, beta, cldnn::lrn_norm_region_across_channel));
network network(engine, topology);
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network.set_input_data("input", input);
network->set_input_data("input", input);
auto outputs = network.execute();
auto outputs = network->execute();
auto output = outputs.at("lrn").get_memory();
cldnn::mem_lock<uint16_t> output_ptr(output, get_test_stream());
@ -158,7 +163,12 @@ TEST(lrn_fp16_gpu, basic1) {
}
}
TEST(lrn_fp32_gpu, basic3) {
TEST(lrn_fp16_gpu, basic1) {
test_fp16_basic1<half_t>(false);
}
template <typename T>
void test_fp32_basic3(bool is_caching_test) {
// input : 2x16x4x4
// Output : 2x16x4x4
auto& engine = get_test_engine();
@ -169,11 +179,9 @@ TEST(lrn_fp32_gpu, basic3) {
const size_t x = 4;
auto input = engine.allocate_memory({ data_types::f32, format::b_fs_yx_fsv16, { b, f, x, y } });
std::vector<float> inputVals(b * f * y * x);
std::generate(inputVals.begin(), inputVals.end(), []() {
static float n = 0;
return n++;
});
std::vector<T> inputVals(b * f * y * x);
T n = 0;
std::generate(inputVals.begin(), inputVals.end(), [n]() mutable { return n++; });
set_values(input, inputVals);
@ -185,11 +193,11 @@ TEST(lrn_fp32_gpu, basic3) {
float beta = 0.75f;
topology.add(lrn("lrn", input_info("input"), size, k, alpha, beta, cldnn::lrn_norm_region_across_channel));
network network(engine, topology);
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network.set_input_data("input", input);
network->set_input_data("input", input);
auto outputs = network.execute();
auto outputs = network->execute();
auto output = outputs.at("lrn").get_memory();
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
@ -249,3 +257,24 @@ TEST(lrn_fp32_gpu, basic3) {
ASSERT_TRUE(are_equal(expected_results[i], output_ptr[i])) << i;
}
}
TEST(lrn_fp32_gpu, basic3) {
test_fp32_basic3<float>(false);
}
#ifdef RUN_ALL_MODEL_CACHING_TESTS
TEST(lrn_fp32_gpu, basic_cached) {
test_fp32_basic<float>(true);
}
TEST(lrn_fp32_gpu, basic2_cached) {
test_fp32_basic2<float>(true);
}
TEST(lrn_fp16_gpu, basic1_cached) {
test_fp16_basic1<half_t>(true);
}
#endif
TEST(lrn_fp32_gpu, basic3_cached) {
test_fp32_basic3<float>(true);
}

View File

@ -191,7 +191,7 @@ void lstm_reference(VVVVF<T>& input, VVVVF<T>& hidden, VVVVF<T>& cell,
template<typename T>
void generic_lstm_gemm_gpu_test(int sequence_len, int direction, int batch_size, int input_size, int hidden_size,
bool hasBias = true, bool hasHidden = true) {
bool hasBias, bool hasHidden, bool is_caching_test = false) {
int min_random = -2, max_random = 2;
VVVVF<T> ref_input = generate_random_4d<T>(batch_size, sequence_len, 1, input_size, min_random, max_random);
@ -244,13 +244,13 @@ void generic_lstm_gemm_gpu_test(int sequence_len, int direction, int batch_size,
topology.add(lstm_gemm("lstm_gemm", input_info("input"), "weights", "recurrent", hasBias ? "biases" : "", hasHidden ? "hidden" : ""));
network network(engine, topology);
network.set_input_data("input", input);
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("input", input);
if (hasHidden) {
network.set_input_data("hidden", hidden);
network->set_input_data("hidden", hidden);
}
auto outputs = network.execute();
auto outputs = network->execute();
ASSERT_EQ(outputs.size(), size_t(1));
auto output = outputs.begin()->second.get_memory();
@ -264,8 +264,8 @@ void generic_lstm_gemm_gpu_test(int sequence_len, int direction, int batch_size,
template<typename T>
void generic_lstm_elt_gpu_test(int /* sequence_len */, int direction, int batch_size,
int /* input_size */, int hidden_size, bool hasCell = true,
T clip_threshold = (T)0.f, bool input_forget = false) {
int /* input_size */, int hidden_size, bool hasCell,
T clip_threshold, bool input_forget, bool is_caching_test = false) {
// tempGEMM = [ 1, direction, batch, 4 * hidden_size ] input
// cell = [ 1, direction, batch, hidden_size ] optional
// output = [ 2, direction, batch, hidden_size ] output concat[hidden, cell]
@ -307,13 +307,13 @@ void generic_lstm_elt_gpu_test(int /* sequence_len */, int direction, int batch_
}
topology.add(lstm_elt("lstm_elt", input_info("tempGEMM"), hasCell ? "cell" : "", clip_threshold, input_forget));
network network(engine, topology);
network.set_input_data("tempGEMM", tempGEMM);
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("tempGEMM", tempGEMM);
if (hasCell) {
network.set_input_data("cell", cell);
network->set_input_data("cell", cell);
}
auto outputs = network.execute();
auto outputs = network->execute();
ASSERT_EQ(outputs.size(), size_t(1));
auto output = outputs.begin()->second.get_memory();
@ -390,7 +390,7 @@ void generate_lstm_topology(topology& t, memory::ptr input, memory::ptr hidden,
template<typename T>
void generic_lstm_custom_gpu_test(int sequence_len, int direction, int batch_size, int input_size, int hidden_size,
bool hasBias = true, bool hasInitialHidden = true, bool hasInitialCell = true) {
bool hasBias, bool hasInitialHidden, bool hasInitialCell, bool is_caching_test = false) {
std::cout << "Input Size = " << input_size << " Hidden Size = " << hidden_size << " Sequence Len = " << sequence_len << " Batch Size = " << batch_size << std::endl;
int min_random = -2, max_random = 2;
VVVVF<T> ref_input = generate_random_4d<T>(batch_size, sequence_len, 1, input_size, min_random, max_random);
@ -430,11 +430,11 @@ void generic_lstm_custom_gpu_test(int sequence_len, int direction, int batch_siz
generate_lstm_topology(topology, input, hidden, cell, weights, recurrent, biases, sequence_len,
hasBias, hasInitialHidden, hasInitialCell);
network network(engine, topology);
network.set_input_data("input", input);
if (hasInitialHidden) network.set_input_data("hidden", hidden);
if (hasInitialCell) network.set_input_data("cell", cell);
auto outputs = network.execute();
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("input", input);
if (hasInitialHidden) network->set_input_data("hidden", hidden);
if (hasInitialCell) network->set_input_data("cell", cell);
auto outputs = network->execute();
ASSERT_EQ(outputs.size(), size_t(1));
size_t output_size = outputs.begin()->second.get_memory()->size() / sizeof(T);
@ -457,8 +457,8 @@ void generic_lstm_custom_gpu_test(int sequence_len, int direction, int batch_siz
// -------------------------------------------------------
template<typename T>
void generic_lstm_gpu_test(int layers, int sequence_len, int direction, int batch_size, int input_size, int hidden_size,
bool hasBias = true, bool hasInitialHidden = true, bool hasInitialCell = true,
T clip_threshold = 0, bool input_forget = false) {
bool hasBias, bool hasInitialHidden, bool hasInitialCell,
T clip_threshold, bool input_forget, bool is_caching_test = false) {
std::cout << "Layers = " << layers << " Input Size = " << input_size << " Hidden Size = " << hidden_size
<< " Sequence Len = " << sequence_len << " Direction = " << direction << " Batch Size = " << batch_size << std::endl;
int min_random = -2, max_random = 2;
@ -596,14 +596,14 @@ void generic_lstm_gpu_test(int layers, int sequence_len, int direction, int batc
prev_lstm_id = lstm_id;
}
network network(engine, topology);
network.set_input_data("input", input);
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("input", input);
for (int i = 0; i < layers; ++i) {
std::string sid = get_string_id(i);
if (hasInitialHidden) network.set_input_data("hidden" + sid, hidden[i]);
if (hasInitialCell) network.set_input_data("cell" + sid, cell[i]);
if (hasInitialHidden) network->set_input_data("hidden" + sid, hidden[i]);
if (hasInitialCell) network->set_input_data("cell" + sid, cell[i]);
}
auto outputs = network.execute();
auto outputs = network->execute();
{
ASSERT_EQ(outputs.size(), size_t(1));
size_t output_size = outputs.begin()->second.get_memory()->size() / sizeof(T);
@ -637,7 +637,7 @@ void generic_lstm_gpu_test(int layers, int sequence_len, int direction, int batc
// -------------------------------------------------------
template<typename T>
void lstm_gpu_output_test(const lstm_output_selection& output_selection, int directions) {
void lstm_gpu_output_test(const lstm_output_selection& output_selection, int directions, bool is_caching_test = false) {
int layers = 1;
int sequence_len = 4;
int batch_size = 3;
@ -722,12 +722,12 @@ void lstm_gpu_output_test(const lstm_output_selection& output_selection, int dir
topology.add(crop("crop:last_cell", input_info("lstm"), cell_tensor, tensor{0, concatenation_len - 1, 0, 0}));
}
network network(engine, topology);
network.set_input_data("input", input);
network.set_input_data("hidden", hidden);
network.set_input_data("cell", cell);
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("input", input);
network->set_input_data("hidden", hidden);
network->set_input_data("cell", cell);
auto outputs = network.execute();
auto outputs = network->execute();
uint32_t ref_num_output_primitives = 1; // Output will return atleast 1 primitive
if (emit_last_cell) {
@ -798,7 +798,7 @@ void lstm_gpu_output_test(const lstm_output_selection& output_selection, int dir
// -------------------------------------------------------
template<typename T>
void lstm_gpu_format_test(const cldnn::format& format, int directions) {
void lstm_gpu_format_test(const cldnn::format& format, int directions, bool is_caching_test = false) {
int layers = 1;
int sequence_len = 6;
int batch_size = 3;
@ -886,13 +886,14 @@ void lstm_gpu_format_test(const cldnn::format& format, int directions) {
topology.add(crop("crop:last_cell", input_info("lstm"), cell_tensor, tensor{0, concatenation_len - 1, 0, 0}));
}
network network(engine, topology);
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
std::map<primitive_id, network_output> outputs;
network.set_input_data("input", input);
network.set_input_data("hidden", hidden);
network.set_input_data("cell", cell);
outputs = network.execute();
network->set_input_data("input", input);
network->set_input_data("hidden", hidden);
network->set_input_data("cell", cell);
outputs = network->execute();
uint32_t ref_num_output_primitives = 1; // Output will return atleast 1 primitive
@ -979,7 +980,7 @@ void lstm_gpu_format_test(const cldnn::format& format, int directions) {
// -------------------------------------------------------
template<typename T>
void lstm_gpu_users_test() {
void lstm_gpu_users_test(bool is_caching_test = false) {
int sequence_len = 2;
int batch_size = 1;
int input_size = 1;
@ -1052,13 +1053,14 @@ void lstm_gpu_users_test() {
std::vector<input_info> output_ids_offsets { input_info("lstm"), input_info("hidden") };
topology.add(concatenation("concatenation", output_ids_offsets, 1));
network network(engine, topology);
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
std::map<primitive_id, network_output> outputs;
network.set_input_data("input", input);
network.set_input_data("hidden", hidden);
network.set_input_data("cell", cell);
outputs = network.execute();
network->set_input_data("input", input);
network->set_input_data("hidden", hidden);
network->set_input_data("cell", cell);
outputs = network->execute();
// check if the number of returned primitives match the expected number of output primitives
ASSERT_EQ(size_t(1), outputs.size());
@ -1081,9 +1083,9 @@ void lstm_gpu_users_test() {
template<typename T>
void lstm_gpu_concatenated_input_test(int layers, int sequence_len, int direction,
int batch_size, int input_size, int hidden_size,
bool has_bias = true, bool has_initial_hidden = true,
bool has_initial_cell = true, float clip_threshold = 0,
bool input_forget = false)
bool has_bias, bool has_initial_hidden,
bool has_initial_cell, float clip_threshold,
bool input_forget, bool is_caching_test = false)
{
std::cout << "Layers = " << layers << " Input Size = " << input_size << " Hidden Size = " << hidden_size
<< " Sequence Len = " << sequence_len << " Direction = " << direction << " Batch Size = " << batch_size << std::endl;
@ -1210,14 +1212,14 @@ void lstm_gpu_concatenated_input_test(int layers, int sequence_len, int directio
prev_node_id = output_crop_id;
}
network network(engine, topology);
network.set_input_data("input", input);
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("input", input);
for (int i = 0; i < layers; ++i) {
std::string sid = get_string_id(i);
if (has_initial_hidden) network.set_input_data("hidden" + sid, hidden[i]);
if (has_initial_cell) network.set_input_data("cell" + sid, cell[i]);
if (has_initial_hidden) network->set_input_data("hidden" + sid, hidden[i]);
if (has_initial_cell) network->set_input_data("cell" + sid, cell[i]);
}
auto outputs = network.execute();
auto outputs = network->execute();
{
ASSERT_EQ(outputs.size(), size_t(1));
size_t output_size = outputs.begin()->second.get_memory()->size() / sizeof(T);
@ -1254,7 +1256,7 @@ void lstm_gpu_concatenated_input_test(int layers, int sequence_len, int directio
template<typename T>
void lstm_gpu_chain_test(int batch_size, int input_size, int hidden_size,
int directions, size_t layers, size_t chains, int sequence_len,
const lstm_output_selection& output_selection)
const lstm_output_selection& output_selection, bool is_caching_test = false)
{
int min_random = -2, max_random = 2;
bool has_bias = false;
@ -1553,15 +1555,15 @@ void lstm_gpu_chain_test(int batch_size, int input_size, int hidden_size,
}
// Creating network out of the above designed topology
cldnn::network network(engine, topology);
network.set_input_data("input", input);
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("input", input);
for (size_t layer = 0; layer < layers; layer++) {
std::string sid = get_string_id(layer);
if (has_initial_hidden) network.set_input_data("hidden:000:" + sid, hidden[0][layer]); // 0 is the chain link index
if (has_initial_cell) network.set_input_data("cell:000:" + sid, cell[0][layer]); // 0 is the chain link index
if (has_initial_hidden) network->set_input_data("hidden:000:" + sid, hidden[0][layer]); // 0 is the chain link index
if (has_initial_cell) network->set_input_data("cell:000:" + sid, cell[0][layer]); // 0 is the chain link index
}
auto outputs = network.execute();
auto outputs = network->execute();
for (auto itr = outputs.begin(); itr != outputs.end(); itr++)
{
auto output_layout = itr->second.get_memory()->get_layout();
@ -1666,23 +1668,23 @@ TEST(lstm_gemm_gpu, gemv_bfyx_1x64_lstm_gemm_no_hidden_bias_f32) {
// LSTM ELT Tests
TEST(DISABLED_lstm_elt_gpu, generic_lstm_elt_test_clip_f32) {
generic_lstm_elt_gpu_test<float>(1, 1, 4, 6, 3, true, 0.3f);
generic_lstm_elt_gpu_test<float>(1, 1, 4, 6, 3, true, 0.3f, false);
}
TEST(lstm_elt_gpu, generic_lstm_elt_test_input_forget_f32) {
generic_lstm_elt_gpu_test<float>(1, 1, 4, 6, 3, true, 0.f, 1);
generic_lstm_elt_gpu_test<float>(1, 1, 4, 6, 3, true, 0.f, true);
}
TEST(DISABLED_lstm_elt_gpu, generic_lstm_elt_test_clip_input_forget_f32) {
generic_lstm_elt_gpu_test<float>(1, 1, 4, 6, 3, true, 0.5f, 1);
generic_lstm_elt_gpu_test<float>(1, 1, 4, 6, 3, true, 0.5f, true);
}
TEST(lstm_elt_gpu, generic_lstm_elt_test_f32) {
generic_lstm_elt_gpu_test<float>(1, 1, 4, 6, 3, true);
generic_lstm_elt_gpu_test<float>(1, 1, 4, 6, 3, true, 0.f, false);
}
TEST(lstm_elt_gpu, generic_lstm_elt_no_cell_f32) {
generic_lstm_elt_gpu_test<float>(1, 1, 4, 6, 3, false);
generic_lstm_elt_gpu_test<float>(1, 1, 4, 6, 3, false, 0.f, false);
}
TEST(lstm_custom_gpu, generic_lstm_custom_f32) {
@ -1720,35 +1722,35 @@ TEST(lstm_custom_gpu, generic_lstm_custom_no_bias_hidden_cell_f32) {
// generic_lstm_gpu_test paramters:
// layers, sequence, dir, batch, input, hidden, bias, initial_h, initial_cell, threshold, coupled_input_forget
TEST(lstm_gpu, generic_lstm_f32) {
generic_lstm_gpu_test<float>(1, 7, 1, 3, 3, 2, true, true, true);
generic_lstm_gpu_test<float>(1, 7, 1, 3, 3, 2, true, true, true, 0, false);
}
TEST(lstm_gpu, generic_lstm_no_bias_f32) {
generic_lstm_gpu_test<float>(1, 7, 1, 3, 3, 2, false, true, true);
generic_lstm_gpu_test<float>(1, 7, 1, 3, 3, 2, false, true, true, 0, false);
}
TEST(lstm_gpu, generic_lstm_no_hidden_f32) {
generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, true, false, true);
generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, true, false, true, 0, false);
}
TEST(lstm_gpu, generic_lstm_no_bias_hidden_f32) {
generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, false, false, true);
generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, false, false, true, 0, false);
}
TEST(lstm_gpu, generic_lstm_no_cell_f32) {
generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, true, true, false);
generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, true, true, false, 0, false);
}
TEST(lstm_gpu, generic_lstm_no_bias_cell_f32) {
generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, false, true, false);
generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, false, true, false, 0, false);
}
TEST(lstm_gpu, generic_lstm_no_hidden_cell_f32) {
generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, true, false, false);
generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, true, false, false, 0, false);
}
TEST(lstm_gpu, generic_lstm_no_bias_hidden_cell_f32) {
generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, false, false, false);
generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, false, false, false, 0, false);
}
TEST(DISABLED_lstm_gpu, generic_lstm_clip_f32) {
@ -1765,46 +1767,46 @@ TEST(DISABLED_lstm_gpu, generic_lstm_clip_input_forget_f32) {
TEST(lstm_gpu, generic_lstm_offset_order_ifoz_f32) {
default_offset_type = lstm_weights_order::ifoz;
generic_lstm_gpu_test<float>(1, 7, 1, 3, 3, 2, true, true, true);
generic_lstm_gpu_test<float>(1, 7, 1, 3, 3, 2, true, true, true, 0, false);
default_offset_type = lstm_weights_order::iofz;
}
TEST(lstm_gpu, generic_lstm_canonical_f32) {
generic_lstm_gpu_test<float>(1, 1, 1, 1, 1, 1, true, true, true);
generic_lstm_gpu_test<float>(1, 1, 1, 1, 1, 1, true, true, true, 0, false);
}
// bidirectional support
TEST(lstm_gpu, generic_lstm_bi_f32) {
generic_lstm_gpu_test<float>(1, 7, 2, 2, 3, 4, false, false, false);
generic_lstm_gpu_test<float>(1, 7, 2, 2, 3, 4, false, false, false, 0, false);
}
TEST(lstm_gpu, generic_lstm_bi_bias_f32) {
generic_lstm_gpu_test<float>(1, 7, 2, 2, 3, 4, true, false, false);
generic_lstm_gpu_test<float>(1, 7, 2, 2, 3, 4, true, false, false, 0, false);
}
TEST(lstm_gpu, generic_lstm_bi_bias_hidden_f32) {
generic_lstm_gpu_test<float>(1, 7, 2, 2, 3, 4, true, true, false);
generic_lstm_gpu_test<float>(1, 7, 2, 2, 3, 4, true, true, false, 0, false);
}
TEST(lstm_gpu, generic_lstm_bi_bias_hidden_cell_f32) {
generic_lstm_gpu_test<float>(1, 7, 2, 2, 3, 4, true, true, true);
generic_lstm_gpu_test<float>(1, 7, 2, 2, 3, 4, true, true, true, 0, false);
}
// multi-layer support
TEST(lstm_gpu, generic_lstm_stacked_no_seq_f32) {
generic_lstm_gpu_test<float>(4, 1, 1, 3, 3, 2, true, true, true);
generic_lstm_gpu_test<float>(4, 1, 1, 3, 3, 2, true, true, true, 0, false);
}
TEST(lstm_gpu, generic_lstm_stacked_seq_f32) {
generic_lstm_gpu_test<float>(4, 7, 1, 3, 3, 2, true, true, true);
generic_lstm_gpu_test<float>(4, 7, 1, 3, 3, 2, true, true, true, 0, false);
}
TEST(lstm_gpu, generic_lstm_stacked_bi_f32) {
generic_lstm_gpu_test<float>(4, 7, 2, 3, 3, 2, true, true, true);
generic_lstm_gpu_test<float>(4, 7, 2, 3, 3, 2, true, true, true, 0, false);
}
TEST(lstm_gpu, generic_lstm_stacked_seq_bi_f32) {
generic_lstm_gpu_test<float>(4, 7, 2, 3, 3, 2, true, true, true);
generic_lstm_gpu_test<float>(4, 7, 2, 3, 3, 2, true, true, true, 0, false);
}
// optional outputs support
@ -1864,11 +1866,11 @@ TEST(lstm_gpu, lstm_users_f32) {
// Test for LSTM with concatenated input
TEST(lstm_gpu, generic_lstm_concatenated_input) {
lstm_gpu_concatenated_input_test<float>(1, 2, 2, 1, 1, 1, true, true, true);
lstm_gpu_concatenated_input_test<float>(1, 2, 2, 1, 1, 1, true, true, true, 0, false);
}
TEST(lstm_gpu, generic_lstm_concatenated_input_multi_layer) {
lstm_gpu_concatenated_input_test<float>(5, 5, 2, 1, 1, 4, true, true, true);
lstm_gpu_concatenated_input_test<float>(5, 5, 2, 1, 1, 4, true, true, true, 0, false);
}
// test for LSTM with chain and stack (multilayer)
@ -1938,55 +1940,55 @@ TEST(lstm_gemm_gpu, generic_lstm_gemm_no_hidden_bias_f16) {
}
TEST(DISABLED_lstm_elt_gpu, generic_lstm_elt_test_clip_f16) {
generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, true, 0.3f);
generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, true, 0.3f, false);
}
TEST(lstm_elt_gpu, generic_lstm_elt_test_input_forget_f16) {
generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, true, 0.f, 1);
generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, true, 0.f, true);
}
TEST(DISABLED_lstm_elt_gpu, generic_lstm_elt_test_clip_input_forget_f16) {
generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, true, 0.5f, 1);
generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, true, 0.5f, true);
}
TEST(lstm_elt_gpu, generic_lstm_elt_test_f16) {
generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, true);
generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, true, 0.f, false);
}
TEST(lstm_elt_gpu, generic_lstm_elt_no_cell_f16) {
generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, false);
generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, false, 0.f, false);
}
TEST(lstm_gpu, generic_lstm_f16) {
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 3, 3, 2, true, true, true);
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 3, 3, 2, true, true, true, 0, false);
}
TEST(lstm_gpu, generic_lstm_no_bias_f16) {
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 3, 3, 2, false, true, true);
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 3, 3, 2, false, true, true, 0, false);
}
TEST(lstm_gpu, generic_lstm_no_hidden_f16) {
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, true, false, true);
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, true, false, true, 0, false);
}
TEST(lstm_gpu, generic_lstm_no_bias_hidden_f16) {
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, false, false, true);
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, false, false, true, 0, false);
}
TEST(lstm_gpu, generic_lstm_no_cell_f16) {
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, true, true, false);
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, true, true, false, 0, false);
}
TEST(lstm_gpu, generic_lstm_no_bias_cell_f16) {
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, false, true, false);
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, false, true, false, 0, false);
}
TEST(lstm_gpu, generic_lstm_no_hidden_cell_f16) {
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, true, false, false);
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, true, false, false, 0, false);
}
TEST(lstm_gpu, generic_lstm_no_bias_hidden_cell_f16) {
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, false, false, false);
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, false, false, false, 0, false);
}
TEST(DISABLED_lstm_gpu, generic_lstm_clip_f16) {
@ -2003,37 +2005,396 @@ TEST(DISABLED_lstm_gpu, generic_lstm_clip_input_forget_f16) {
TEST(lstm_gpu, generic_lstm_offset_order_ifoz_f16) {
default_offset_type = lstm_weights_order::ifoz;
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 3, 3, 2, true, true, true);
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 3, 3, 2, true, true, true, 0, false);
default_offset_type = lstm_weights_order::iofz;
}
TEST(lstm_gpu, generic_lstm_canonical_f16) {
generic_lstm_gpu_test<FLOAT16>(1, 1, 1, 1, 1, 1, true, true, true);
generic_lstm_gpu_test<FLOAT16>(1, 1, 1, 1, 1, 1, true, true, true, 0, false);
}
// bidirectional support
TEST(lstm_gpu, generic_lstm_bi_bias_f16) {
generic_lstm_gpu_test<FLOAT16>(1, 7, 2, 2, 3, 4, true, false, false);
generic_lstm_gpu_test<FLOAT16>(1, 7, 2, 2, 3, 4, true, false, false, 0, false);
}
TEST(lstm_gpu, generic_lstm_bi_bias_hidden_f16) {
generic_lstm_gpu_test<FLOAT16>(1, 7, 2, 2, 3, 4, true, true, false);
generic_lstm_gpu_test<FLOAT16>(1, 7, 2, 2, 3, 4, true, true, false, 0, false);
}
TEST(lstm_gpu, generic_lstm_bi_bias_hidden_cell_f16) {
generic_lstm_gpu_test<FLOAT16>(1, 7, 2, 2, 3, 4, true, true, true);
generic_lstm_gpu_test<FLOAT16>(1, 7, 2, 2, 3, 4, true, true, true, 0, false);
}
// multi-layer support
TEST(lstm_gpu, generic_lstm_stacked_seq_f16) {
generic_lstm_gpu_test<FLOAT16>(4, 7, 1, 3, 3, 2, true, true, true);
generic_lstm_gpu_test<FLOAT16>(4, 7, 1, 3, 3, 2, true, true, true, 0, false);
}
TEST(lstm_gpu, generic_lstm_stacked_bi_f16) {
generic_lstm_gpu_test<FLOAT16>(4, 7, 2, 3, 3, 2, true, true, true);
generic_lstm_gpu_test<FLOAT16>(4, 7, 2, 3, 3, 2, true, true, true, 0, false);
}
// TODO: Add tests for the following:
// integration testing using multi-layer and chained LSTMs
// LSTMs single input
// optional activation list
#ifdef RUN_ALL_MODEL_CACHING_TESTS
TEST(lstm_gemm_gpu, generic_lstm_gemm_test_f32_cached) {
generic_lstm_gemm_gpu_test<float>(1, 1, 3, 6, 2, true, true, true);
}
TEST(lstm_gemm_gpu, generic_lstm_gemm_no_bias_f32_cached) {
generic_lstm_gemm_gpu_test<float>(1, 1, 3, 6, 2, false, true, true);
}
TEST(lstm_gemm_gpu, generic_lstm_gemm_no_hidden_f32_cached) {
generic_lstm_gemm_gpu_test<float>(1, 1, 3, 6, 2, true, false, true);
}
TEST(lstm_gemm_gpu, generic_lstm_gemm_no_hidden_bias_f32_cached) {
generic_lstm_gemm_gpu_test<float>(1, 1, 3, 6, 2, false, false, true);
}
TEST(lstm_gemm_gpu, gemv_bfyx_1x64_lstm_gemm_test_f32_cached) {
generic_lstm_gemm_gpu_test<float>(5, 1, 1, 1024, 1024, true, true, true);
}
TEST(lstm_gemm_gpu, gemv_bfyx_1x64_lstm_gemm_no_bias_f32_cached) {
generic_lstm_gemm_gpu_test<float>(1, 1, 1, 256, 2, false, true, true);
}
TEST(lstm_gemm_gpu, gemv_bfyx_1x64_lstm_gemm_no_hidden_f32_cached) {
generic_lstm_gemm_gpu_test<float>(1, 1, 1, 64, 2, true, false, true);
}
TEST(lstm_gemm_gpu, gemv_bfyx_1x64_lstm_gemm_no_hidden_bias_f32_cached) {
generic_lstm_gemm_gpu_test<float>(1, 1, 1, 64, 2, false, false, true);
}
TEST(DISABLED_lstm_elt_gpu, generic_lstm_elt_test_clip_f32_cached) {
generic_lstm_elt_gpu_test<float>(1, 1, 4, 6, 3, true, 0.3f, false, true);
}
TEST(lstm_elt_gpu, generic_lstm_elt_test_input_forget_f32_cached) {
generic_lstm_elt_gpu_test<float>(1, 1, 4, 6, 3, true, 0.f, true, true);
}
TEST(DISABLED_lstm_elt_gpu, generic_lstm_elt_test_clip_input_forget_f32_cached) {
generic_lstm_elt_gpu_test<float>(1, 1, 4, 6, 3, true, 0.5f, true, true);
}
TEST(lstm_elt_gpu, generic_lstm_elt_test_f32_cached) {
generic_lstm_elt_gpu_test<float>(1, 1, 4, 6, 3, true, 0.f, false, true);
}
TEST(lstm_elt_gpu, generic_lstm_elt_no_cell_f32_cached) {
generic_lstm_elt_gpu_test<float>(1, 1, 4, 6, 3, false, 0.f, false, true);
}
TEST(lstm_custom_gpu, generic_lstm_custom_f32_cached) {
generic_lstm_custom_gpu_test<float>(3, 1, 3, 3, 2, true, true, true, true);
}
TEST(lstm_custom_gpu, generic_lstm_custom_no_biasf32_cached) {
generic_lstm_custom_gpu_test<float>(3, 1, 3, 3, 2, false, true, true, true);
}
TEST(lstm_custom_gpu, generic_lstm_custom_no_hidden_f32_cached) {
generic_lstm_custom_gpu_test<float>(3, 1, 3, 3, 2, true, false, true, true);
}
TEST(lstm_custom_gpu, generic_lstm_custom_no_bias_hidden_f32_cached) {
generic_lstm_custom_gpu_test<float>(3, 1, 3, 3, 2, false, false, true, true);
}
TEST(lstm_custom_gpu, generic_lstm_custom_no_cell_f32_cached) {
generic_lstm_custom_gpu_test<float>(3, 1, 3, 3, 2, true, true, false, true);
}
TEST(lstm_custom_gpu, generic_lstm_custom_no_bias_cell_f32_cached) {
generic_lstm_custom_gpu_test<float>(3, 1, 3, 3, 2, false, true, false, true);
}
TEST(lstm_custom_gpu, generic_lstm_custom_no_hidden_cell_f32_cached) {
generic_lstm_custom_gpu_test<float>(3, 1, 3, 3, 2, true, false, false, true);
}
TEST(lstm_custom_gpu, generic_lstm_custom_no_bias_hidden_cell_f32_cached) {
generic_lstm_custom_gpu_test<float>(3, 1, 3, 3, 2, false, false, false, true);
}
TEST(lstm_gpu, generic_lstm_f32_cached) {
generic_lstm_gpu_test<float>(1, 7, 1, 3, 3, 2, true, true, true, 0, false, true);
}
TEST(lstm_gpu, generic_lstm_no_bias_f32_cached) {
generic_lstm_gpu_test<float>(1, 7, 1, 3, 3, 2, false, true, true, 0, false, true);
}
TEST(lstm_gpu, generic_lstm_no_hidden_f32_cached) {
generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, true, false, true, 0, false, true);
}
TEST(lstm_gpu, generic_lstm_no_bias_hidden_f32_cached) {
generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, false, false, true, 0, false, true);
}
TEST(lstm_gpu, generic_lstm_no_cell_f32_cached) {
generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, true, true, false, 0, false, true);
}
TEST(lstm_gpu, generic_lstm_no_bias_cell_f32_cached) {
generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, false, true, false, 0, false, true);
}
TEST(lstm_gpu, generic_lstm_no_hidden_cell_f32_cached) {
generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, true, false, false, 0, false, true);
}
TEST(lstm_gpu, generic_lstm_no_bias_hidden_cell_f32_cached) {
generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, false, false, false, 0, false, true);
}
TEST(DISABLED_lstm_gpu, generic_lstm_clip_f32_cached) {
generic_lstm_gpu_test<float>(1, 7, 1, 3, 3, 2, true, true, true, 0.3f, 0, true);
}
TEST(lstm_gpu, generic_lstm_input_forget_f32_cached) {
generic_lstm_gpu_test<float>(1, 7, 1, 3, 3, 2, true, true, true, 0.f, 1, true);
}
TEST(DISABLED_lstm_gpu, generic_lstm_clip_input_forget_f32_cached) {
generic_lstm_gpu_test<float>(1, 7, 1, 3, 3, 2, true, true, true, 0.3f, 1, true);
}
TEST(lstm_gpu, generic_lstm_offset_order_ifoz_f32_cached) {
default_offset_type = lstm_weights_order::ifoz;
generic_lstm_gpu_test<float>(1, 7, 1, 3, 3, 2, true, true, true, 0, false, true);
default_offset_type = lstm_weights_order::iofz;
}
TEST(lstm_gpu, generic_lstm_canonical_f32_cached) {
generic_lstm_gpu_test<float>(1, 1, 1, 1, 1, 1, true, true, true, 0, false, true);
}
TEST(lstm_gpu, generic_lstm_bi_f32_cached) {
generic_lstm_gpu_test<float>(1, 7, 2, 2, 3, 4, false, false, false, 0, false, true);
}
TEST(lstm_gpu, generic_lstm_bi_bias_f32_cached) {
generic_lstm_gpu_test<float>(1, 7, 2, 2, 3, 4, true, false, false, 0, false, true);
}
TEST(lstm_gpu, generic_lstm_bi_bias_hidden_f32_cached) {
generic_lstm_gpu_test<float>(1, 7, 2, 2, 3, 4, true, true, false, 0, false, true);
}
TEST(lstm_gpu, generic_lstm_bi_bias_hidden_cell_f32_cached) {
generic_lstm_gpu_test<float>(1, 7, 2, 2, 3, 4, true, true, true, 0, false, true);
}
TEST(lstm_gpu, generic_lstm_stacked_no_seq_f32_cached) {
generic_lstm_gpu_test<float>(4, 1, 1, 3, 3, 2, true, true, true, 0, false, true);
}
TEST(lstm_gpu, generic_lstm_stacked_seq_f32_cached) {
generic_lstm_gpu_test<float>(4, 7, 1, 3, 3, 2, true, true, true, 0, false, true);
}
TEST(lstm_gpu, generic_lstm_stacked_bi_f32_cached) {
generic_lstm_gpu_test<float>(4, 7, 2, 3, 3, 2, true, true, true, 0, false, true);
}
TEST(lstm_gpu, generic_lstm_stacked_seq_bi_f32_cached) {
generic_lstm_gpu_test<float>(4, 7, 2, 3, 3, 2, true, true, true, 0, false, true);
}
TEST(lstm_gpu, output_test_sequence_f32_cached) {
lstm_gpu_output_test<float>(lstm_output_selection::sequence, 1, true);
}
TEST(lstm_gpu, output_test_hidden_f32_cached) {
lstm_gpu_output_test<float>(lstm_output_selection::hidden, 1, true);
}
TEST(lstm_gpu, output_test_hidden_cell_f32_cached) {
lstm_gpu_output_test<float>(lstm_output_selection::hidden_cell, 1, true);
}
TEST(lstm_gpu, output_test_sequence_cell_f32_cached) {
lstm_gpu_output_test<float>(lstm_output_selection::sequence_cell, 1, true);
}
TEST(lstm_gpu, output_test_sequence_bi_f32_cached) {
lstm_gpu_output_test<float>(lstm_output_selection::sequence, 2, true);
}
TEST(lstm_gpu, output_test_hidden_bi_f32_cached) {
lstm_gpu_output_test<float>(lstm_output_selection::hidden, 2, true);
}
TEST(lstm_gpu, output_test_hidden_cell_bi_f32_cached) {
lstm_gpu_output_test<float>(lstm_output_selection::hidden_cell, 2, true);
}
TEST(lstm_gpu, output_test_sequence_cell_bi_f32_cached) {
lstm_gpu_output_test<float>(lstm_output_selection::sequence_cell, 2, true);
}
TEST(lstm_gpu, lstm_gpu_format_bfyx_f32_cached) {
lstm_gpu_format_test<float>(cldnn::format::bfyx, 1, true);
}
TEST(lstm_gpu, lstm_gpu_format_bfyx_bi_f32_cached) {
lstm_gpu_format_test<float>(cldnn::format::bfyx, 2, true);
}
TEST(lstm_gpu, lstm_gpu_format_fyxb_f32_cached) {
lstm_gpu_format_test<float>(cldnn::format::fyxb, 1, true);
}
TEST(lstm_gpu, lstm_gpu_format_fyxb_bi_f32_cached) {
lstm_gpu_format_test<float>(cldnn::format::fyxb, 2, true);
}
TEST(lstm_gpu, lstm_users_f32_cached) {
lstm_gpu_users_test<float>(true);
}
TEST(lstm_gpu, generic_lstm_concatenated_input_cached) {
lstm_gpu_concatenated_input_test<float>(1, 2, 2, 1, 1, 1, true, true, true, 0, false, true);
}
TEST(lstm_gpu, generic_lstm_concatenated_input_multi_layer_cached) {
lstm_gpu_concatenated_input_test<float>(5, 5, 2, 1, 1, 4, true, true, true, 0, false, true);
}
TEST(lstm_gpu, generic_lstm_chained_unidirectional_f32_cached) {
lstm_gpu_chain_test<float>(1, 2, 4, 1, 1, 2, 1, lstm_output_selection::sequence_cell, true);
}
TEST(lstm_gpu, generic_lstm_chained_bidirectional_f32_cached) {
lstm_gpu_chain_test<float>(1, 2, 4, 2, 1, 1, 1, lstm_output_selection::sequence_cell, true);
}
TEST(lstm_gpu, generic_lstm_chained_no_stack_bidirectional_f32_cached) {
lstm_gpu_chain_test<float>(2, 2, 4, 2, 1, 2, 5, lstm_output_selection::sequence_cell, true);
}
TEST(lstm_gpu, generic_lstm_chained_stacked_bidirectional_f32_cached) {
lstm_gpu_chain_test<float>(2, 2, 4, 2, 4, 2, 5, lstm_output_selection::sequence_cell, true);
}
// FP16 Half precision tests
TEST(lstm_gemm_gpu, generic_lstm_gemm_test_f16_cached) {
generic_lstm_gemm_gpu_test<FLOAT16>(1, 1, 3, 6, 2, true, true, true);
}
TEST(lstm_gemm_gpu, generic_lstm_gemm_no_bias_f16_cached) {
generic_lstm_gemm_gpu_test<FLOAT16>(1, 1, 3, 6, 2, false, true, true);
}
TEST(lstm_gemm_gpu, generic_lstm_gemm_no_hidden_f16_cached) {
generic_lstm_gemm_gpu_test<FLOAT16>(1, 1, 3, 6, 2, true, false, true);
}
TEST(lstm_gemm_gpu, generic_lstm_gemm_no_hidden_bias_f16_cached) {
generic_lstm_gemm_gpu_test<FLOAT16>(1, 1, 3, 6, 2, false, false, true);
}
TEST(DISABLED_lstm_elt_gpu, generic_lstm_elt_test_clip_f16_cached) {
generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, true, 0.3f, false, true);
}
TEST(lstm_elt_gpu, generic_lstm_elt_test_input_forget_f16_cached) {
generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, true, 0.f, true, true);
}
TEST(DISABLED_lstm_elt_gpu, generic_lstm_elt_test_clip_input_forget_f16_cached) {
generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, true, 0.5f, true, true);
}
TEST(lstm_elt_gpu, generic_lstm_elt_test_f16_cached) {
generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, true, 0.f, false, true);
}
TEST(lstm_elt_gpu, generic_lstm_elt_no_cell_f16_cached) {
generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, false, 0.f, false, true);
}
TEST(lstm_gpu, generic_lstm_f16_cached) {
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 3, 3, 2, true, true, true, 0, false, true);
}
TEST(lstm_gpu, generic_lstm_no_bias_f16_cached) {
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 3, 3, 2, false, true, true, 0, false, true);
}
TEST(lstm_gpu, generic_lstm_no_hidden_f16_cached) {
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, true, false, true, 0, false, true);
}
TEST(lstm_gpu, generic_lstm_no_bias_hidden_f16_cached) {
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, false, false, true, 0, false, true);
}
TEST(lstm_gpu, generic_lstm_no_cell_f16_cached) {
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, true, true, false, 0, false, true);
}
TEST(lstm_gpu, generic_lstm_no_bias_cell_f16_cached) {
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, false, true, false, 0, false, true);
}
TEST(lstm_gpu, generic_lstm_no_hidden_cell_f16_cached) {
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, true, false, false, 0, false, true);
}
TEST(lstm_gpu, generic_lstm_no_bias_hidden_cell_f16_cached) {
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, false, false, false, 0, false, true);
}
TEST(DISABLED_lstm_gpu, generic_lstm_clip_f16_cached) {
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 3, 3, 2, true, true, true, 0.3f, 0, true);
}
TEST(lstm_gpu, generic_lstm_input_forget_f16_cached) {
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 3, 3, 2, true, true, true, 0.f, 1, true);
}
TEST(DISABLED_lstm_gpu, generic_lstm_clip_input_forget_f16_cached) {
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 3, 3, 2, true, true, true, 0.3f, 1, true);
}
TEST(lstm_gpu, generic_lstm_offset_order_ifoz_f16_cached) {
default_offset_type = lstm_weights_order::ifoz;
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 3, 3, 2, true, true, true, 0, false, true);
default_offset_type = lstm_weights_order::iofz;
}
TEST(lstm_gpu, generic_lstm_canonical_f16_cached) {
generic_lstm_gpu_test<FLOAT16>(1, 1, 1, 1, 1, 1, true, true, true, 0, false, true);
}
// bidirectional support
TEST(lstm_gpu, generic_lstm_bi_bias_f16_cached) {
generic_lstm_gpu_test<FLOAT16>(1, 7, 2, 2, 3, 4, true, false, false, 0, false, true);
}
TEST(lstm_gpu, generic_lstm_bi_bias_hidden_f16_cached) {
generic_lstm_gpu_test<FLOAT16>(1, 7, 2, 2, 3, 4, true, true, false, 0, false, true);
}
TEST(lstm_gpu, generic_lstm_bi_bias_hidden_cell_f16_cached) {
generic_lstm_gpu_test<FLOAT16>(1, 7, 2, 2, 3, 4, true, true, true, 0, false, true);
}
TEST(lstm_gpu, generic_lstm_stacked_seq_f16_cached) {
generic_lstm_gpu_test<FLOAT16>(4, 7, 1, 3, 3, 2, true, true, true, 0, false, true);
}
#endif
TEST(lstm_gpu, generic_lstm_stacked_bi_f16_cached) {
generic_lstm_gpu_test<FLOAT16>(4, 7, 2, 3, 3, 2, true, true, true, 0, false, true);
}

View File

@ -48,7 +48,7 @@ struct matrix_nms_test_inputs {
std::string test_name;
};
using matrix_nms_test_params = std::tuple<matrix_nms_test_inputs, format::type>;
using matrix_nms_test_params = std::tuple<matrix_nms_test_inputs, format::type, bool>;
template <class T>
struct matrix_nms_gpu_test : public testing::TestWithParam<matrix_nms_test_params> {
@ -56,7 +56,8 @@ public:
void test() {
format::type blocked_format;
matrix_nms_test_inputs test_inputs;
std::tie(test_inputs, blocked_format) = testing::TestWithParam<matrix_nms_test_params>::GetParam();
bool is_caching_test;
std::tie(test_inputs, blocked_format, is_caching_test) = testing::TestWithParam<matrix_nms_test_params>::GetParam();
const auto data_type = type_to_data_type<T>::value;
const auto plain_format = format::bfyx;
@ -106,11 +107,12 @@ public:
attrs));
topology.add(reorder("matrix_nms", input_info("reordered_matrix_nms"), plain_format, data_type));
network network(engine, topology);
network.set_input_data("boxes", boxes);
network.set_input_data("scores", scores);
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
auto outputs = network.execute();
network->set_input_data("boxes", boxes);
network->set_input_data("scores", scores);
auto outputs = network->execute();
auto output = outputs.at("matrix_nms").get_memory();
cldnn::mem_lock<T> output_ptr(output, get_test_stream());
@ -124,14 +126,16 @@ public:
ASSERT_NEAR(expected_output[i], output_ptr[i], THRESHOLD);
}
ASSERT_EQ(test_inputs.expected_selected_boxes.size(), selected_boxes_ptr.size());
for (size_t i = 0; i < test_inputs.expected_selected_boxes.size(); ++i) {
ASSERT_EQ(test_inputs.expected_selected_boxes[i], selected_boxes_ptr[i]);
}
if (!is_caching_test) {
ASSERT_EQ(test_inputs.expected_selected_boxes.size(), selected_boxes_ptr.size());
for (size_t i = 0; i < test_inputs.expected_selected_boxes.size(); ++i) {
ASSERT_EQ(test_inputs.expected_selected_boxes[i], selected_boxes_ptr[i]);
}
ASSERT_EQ(test_inputs.expected_valid_outputs.size(), valid_outputs_ptr.size());
for (size_t i = 0; i < test_inputs.expected_valid_outputs.size(); ++i) {
ASSERT_EQ(test_inputs.expected_valid_outputs[i], valid_outputs_ptr[i]);
ASSERT_EQ(test_inputs.expected_valid_outputs.size(), valid_outputs_ptr.size());
for (size_t i = 0; i < test_inputs.expected_valid_outputs.size(); ++i) {
ASSERT_EQ(test_inputs.expected_valid_outputs[i], valid_outputs_ptr[i]);
}
}
}
@ -158,7 +162,8 @@ public:
result << "Normalized=" << bool_to_str(test_inputs.normalized) << "_";
result << "sort_result_type=" << sort_res_type_str << "_";
result << "decay_function=" << decay_function_str << "_";
result << "Format=" << fmt_to_str(std::get<1>(info.param));
result << "Format=" << fmt_to_str(std::get<1>(info.param)) << "_";
result << "Cached=" << bool_to_str(std::get<2>(info.param));
if (!test_inputs.test_name.empty())
result << "_TN=" << test_inputs.test_name;
@ -630,6 +635,12 @@ const std::vector<format::type> layout_formats = {format::bfyx,
format::bs_fs_yx_bsv32_fsv32,
format::bs_fs_yx_bsv32_fsv16};
#ifdef RUN_ALL_MODEL_CACHING_TESTS
const std::vector<bool> run_caching_test = {false, true};
#else
const std::vector<bool> run_caching_test = {false};
#endif
#define INSTANTIATE_MATRIX_NMS_TEST_SUITE(input_type, func) \
using matrix_nms_gpu_test_##input_type##func = matrix_nms_gpu_test<input_type>; \
TEST_P(matrix_nms_gpu_test_##input_type##func, test) { \
@ -637,7 +648,8 @@ const std::vector<format::type> layout_formats = {format::bfyx,
} \
INSTANTIATE_TEST_SUITE_P(matrix_nms_test_##input_type##func, \
matrix_nms_gpu_test_##input_type##func, \
testing::Combine(testing::Values(func()), testing::ValuesIn(layout_formats)), \
testing::Combine(testing::Values(func()), testing::ValuesIn(layout_formats), \
testing::ValuesIn(run_caching_test)), \
matrix_nms_gpu_test_##input_type##func::PrintToStringParamName);
INSTANTIATE_MATRIX_NMS_TEST_SUITE(float, get_matrix_nms_smoke_inputs)
@ -668,6 +680,14 @@ INSTANTIATE_MATRIX_NMS_TEST_SUITE(FLOAT16, get_matrix_nms_top_k_inputs)
INSTANTIATE_MATRIX_NMS_TEST_SUITE(FLOAT16, get_matrix_nms_single_box_inputs)
INSTANTIATE_MATRIX_NMS_TEST_SUITE(FLOAT16, get_matrix_nms_no_output_inputs)
#ifndef RUN_ALL_MODEL_CACHING_TESTS
INSTANTIATE_TEST_SUITE_P(matrix_nms_test_FLOAT16get_matrix_nms_smoke_inputs_cached,
matrix_nms_gpu_test_FLOAT16get_matrix_nms_smoke_inputs,
testing::Combine(testing::Values(get_matrix_nms_smoke_inputs()), testing::ValuesIn(layout_formats),
testing::Values(true)),
matrix_nms_gpu_test_FLOAT16get_matrix_nms_smoke_inputs::PrintToStringParamName);
#endif
#undef INSTANTIATE_MATRIX_NMS_TEST_SUITE
} // namespace

View File

@ -57,6 +57,8 @@ struct MulticlassNmsParams {
std::vector<T> expected_selected_outputs;
std::vector<T_IND> expected_selected_indices;
std::vector<T_IND> expected_selected_num;
bool is_caching_test;
};
template<typename T, typename T_IND>
@ -170,15 +172,16 @@ public:
topology.add(reorder("multiclass_nms", input_info("multiclass_nms_reordered"), plain_format, data_type));
ExecutionConfig config;
config.set_property(ov::intel_gpu::optimize_data(false));
network network(engine, topology, config);
network.set_input_data("input_boxes", input_boxes);
network.set_input_data("input_scores", input_scores);
cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), param.is_caching_test);
network->set_input_data("input_boxes", input_boxes);
network->set_input_data("input_scores", input_scores);
if (param.has_roisnum) {
network.set_input_data("input_roisnum", input_roisnum);
network->set_input_data("input_roisnum", input_roisnum);
}
const auto outputs = network.execute();
const auto outputs = network->execute();
const auto output_boxes = outputs.at("multiclass_nms").get_memory();
const cldnn::mem_lock<T> output_boxes_ptr(output_boxes, get_test_stream());
@ -209,13 +212,17 @@ public:
get_test_stream());
ASSERT_EQ(output_selected_num_ptr.size(), param.num_batches) << "format=" << fmt_to_str(target_format);
for (size_t i = 0; i < param.num_batches; ++i) {
ASSERT_EQ(param.expected_selected_num[i], output_selected_num_ptr[i])
<< "format=" << fmt_to_str(target_format) << " i=" << i;
if (!param.is_caching_test) {
for (size_t i = 0; i < param.num_batches; ++i) {
ASSERT_EQ(param.expected_selected_num[i], output_selected_num_ptr[i])
<< "format=" << fmt_to_str(target_format) << " i=" << i;
}
}
for (size_t box = 0; box < dim; ++box) {
ASSERT_EQ(param.expected_selected_indices[box], output_selected_indices_ptr[box]) << "box=" << box;
if (!param.is_caching_test) {
ASSERT_EQ(param.expected_selected_indices[box], output_selected_indices_ptr[box]) << "box=" << box;
}
for (size_t j = 0; j < 6; ++j) {
const auto idx = box * 6 + j;
@ -266,7 +273,7 @@ TEST_P(multiclass_nms_test_blocked, basic) {
}
template<typename T, typename T_IND>
std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams(bool is_caching_test = false) {
std::vector<MulticlassNmsParams<T, T_IND>> params = {
{"by_score",
cldnn::multiclass_nms::sort_result_type::score,
@ -292,7 +299,8 @@ std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
0.00, 0.90, 0.00, 0.00, 1.00, 1.00, 1.00, 0.80, 0.00, 10.00, 1.00, 11.00,
-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0}),
std::vector<T_IND>{3, 0, 0, 3, -1, -1},
std::vector<T_IND>{4}},
std::vector<T_IND>{4},
is_caching_test},
{"by_class_id",
cldnn::multiclass_nms::sort_result_type::classid,
@ -306,7 +314,8 @@ std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
1.00, 0.95, 0.00, 0.00, 1.00, 1.00, 1.00, 0.80, 0.00, 10.00, 1.00, 11.00,
-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0}),
std::vector<T_IND>{3, 0, 0, 3, -1, -1},
std::vector<T_IND>{4}},
std::vector<T_IND>{4},
is_caching_test},
{"three_inputs",
cldnn::multiclass_nms::sort_result_type::score,
@ -346,7 +355,8 @@ std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
-1.0, -1.0, -1.0, -1.0, -1.0, -1.0}),
std::vector<T_IND>{1, 0, -1, -1, -1, -1,
2, 3, -1, -1, -1, -1},
std::vector<T_IND>{2, 2}},
std::vector<T_IND>{2, 2},
is_caching_test},
{"across_batches_by_score",
cldnn::multiclass_nms::sort_result_type::score,
@ -384,7 +394,8 @@ std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
-1.0, -1.0, -1.0, -1.0, -1.0, -1.0,
-1.0, -1.0, -1.0, -1.0, -1.0, -1.0}),
std::vector<T_IND>{3, 0, 6, 0, -1, -1, 3, 9, 4, 5, -1, -1},
std::vector<T_IND>{4, 4}},
std::vector<T_IND>{4, 4},
is_caching_test},
{"across_batches_by_class_id",
cldnn::multiclass_nms::sort_result_type::classid,
@ -423,7 +434,8 @@ std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
-1.0, -1.0, -1.0, -1.0, -1.0, -1.0,
-1.0, -1.0, -1.0, -1.0, -1.0, -1.0}),
std::vector<T_IND>{3, 0, 0, 3, -1, -1, 4, 5, 6, 9, -1, -1},
std::vector<T_IND>{4, 4}},
std::vector<T_IND>{4, 4},
is_caching_test},
{"normalized",
cldnn::multiclass_nms::sort_result_type::score,
@ -449,7 +461,8 @@ std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
getValues<T>({0.00, 0.95, 0.00, 10.00, 1.00, 11.00, 0.00, 0.90, 1.00,
1.00, 0.00, 0.00, 0.00, 0.75, 0.00, 0.10, 1.00, 1.10}),
std::vector<T_IND>{3, 0, 1},
std::vector<T_IND>{3}},
std::vector<T_IND>{3},
is_caching_test},
{"identical_boxes",
cldnn::multiclass_nms::sort_result_type::score,
@ -477,7 +490,8 @@ std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
-1.0, -1.0, -1.0, -1.0, -1.0, -1.0,
-1.0, -1.0, -1.0, -1.0, -1.0, -1.0}),
std::vector<T_IND>{0, -1, -1},
std::vector<T_IND>{1}},
std::vector<T_IND>{1},
is_caching_test},
{"limit_output_size",
cldnn::multiclass_nms::sort_result_type::score,
@ -501,7 +515,8 @@ std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
std::vector<T_IND>{},
getValues<T>({0.00, 0.95, 0.00, 10.00, 1.00, 11.00, 0.00, 0.90, 0.00, 0.00, 1.00, 1.00}),
std::vector<T_IND>{3, 0},
std::vector<T_IND>{2}},
std::vector<T_IND>{2},
is_caching_test},
{"single_box",
cldnn::multiclass_nms::sort_result_type::score,
@ -525,7 +540,8 @@ std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
getValues<T>({0.00, 0.90, 0.00, 0.00, 1.00, 1.00}),
std::vector<T_IND>{0},
std::vector<T_IND>{1}},
std::vector<T_IND>{1},
is_caching_test},
{"iou_threshold",
cldnn::multiclass_nms::sort_result_type::score,
@ -551,7 +567,8 @@ std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
getValues<T>({0.00, 0.95, 0.00, 10.00, 1.00, 11.00, 0.00, 0.90, 0.00,
0.00, 1.00, 1.00, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0}),
std::vector<T_IND>{3, 0, -1},
std::vector<T_IND>{2}},
std::vector<T_IND>{2},
is_caching_test},
{"iou_and_score_thresholds",
cldnn::multiclass_nms::sort_result_type::score,
@ -577,7 +594,8 @@ std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
getValues<T>({0.00, 0.96, 0.00, 10.00, 1.00, 11.00, -1.0, -1.0, -1.0,
-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0}),
std::vector<T_IND>{3, -1, -1},
std::vector<T_IND>{1}},
std::vector<T_IND>{1},
is_caching_test},
{"no_output",
cldnn::multiclass_nms::sort_result_type::score,
@ -607,7 +625,8 @@ std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
-1.0, -1.0, -1.0, -1.0, -1.0, -1.0,
-1.0, -1.0, -1.0, -1.0, -1.0, -1.0}),
std::vector<T_IND>{-1, -1, -1, -1, -1, -1},
std::vector<T_IND>{0}},
std::vector<T_IND>{0},
is_caching_test},
{"background_class",
cldnn::multiclass_nms::sort_result_type::classid,
@ -648,7 +667,8 @@ std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
1.00, 0.80, 0.00, 10.00, 1.00, 11.00,
-1.0, -1.0, -1.0, -1.0, -1.0, -1.0}),
std::vector<T_IND>{0, 3, -1, 6, 9, -1},
std::vector<T_IND>{2, 2}},
std::vector<T_IND>{2, 2},
is_caching_test},
{"keep_top_k",
cldnn::multiclass_nms::sort_result_type::classid,
@ -681,7 +701,8 @@ std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
1.00, 0.95, 0.00, 0.00, 1.00, 1.00,
1.00, 0.80, 0.00, 10.00, 1.00, 11.00}),
std::vector<T_IND>{3, 0, 0, 4, 6, 9},
std::vector<T_IND>{3, 3}},
std::vector<T_IND>{3, 3},
is_caching_test},
{"normalized_by_classid",
cldnn::multiclass_nms::sort_result_type::classid,
@ -735,14 +756,15 @@ std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
-1, -1, -1, -1, -1, -1,
2, 4, 5, 6, 9, 11,
-1, -1, -1, -1, -1, -1},
std::vector<T_IND>{6, 6}},
std::vector<T_IND>{6, 6},
is_caching_test},
};
return params;
}
template<typename T, typename T_IND>
std::vector<MulticlassNmsParams<T, T_IND>> getParamsForBlockedLayout() {
std::vector<MulticlassNmsParams<T, T_IND>> getParamsForBlockedLayout(bool is_caching_test = false) {
MulticlassNmsParams<T, T_IND> param = {
"blocked_format_three_inputs",
cldnn::multiclass_nms::sort_result_type::score,
@ -798,7 +820,8 @@ std::vector<MulticlassNmsParams<T, T_IND>> getParamsForBlockedLayout() {
std::vector<T_IND>{1, 0, -1, -1, -1, -1,
2, 3, -1, -1, -1, -1},
std::vector<T_IND>{2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
is_caching_test
};
const auto indices_size = param.num_batches * param.num_boxes;
@ -829,8 +852,24 @@ INSTANTIATE_TEST_SUITE_P(multiclass_nms_gpu_test,
PrintToStringParamName());
INSTANTIATE_TEST_SUITE_P(multiclass_nms_gpu_test_blocked,
multiclass_nms_test_f32_i32,
multiclass_nms_test_blocked,
::testing::ValuesIn(getParamsForBlockedLayout<float, int32_t>()),
PrintToStringParamName());
#ifdef RUN_ALL_MODEL_CACHING_TESTS
INSTANTIATE_TEST_SUITE_P(multiclass_nms_gpu_test_cached,
multiclass_nms_test_f32_i32,
::testing::ValuesIn(getMulticlassNmsParams<float, int32_t>(true)),
PrintToStringParamName());
INSTANTIATE_TEST_SUITE_P(multiclass_nms_gpu_test_cached,
multiclass_nms_test_f16_i64,
::testing::ValuesIn(getMulticlassNmsParams<half_t, int64_t>(true)),
PrintToStringParamName());
#endif
INSTANTIATE_TEST_SUITE_P(multiclass_nms_gpu_test_blocked_cached,
multiclass_nms_test_blocked,
::testing::ValuesIn(getParamsForBlockedLayout<float, int32_t>(true)),
PrintToStringParamName());
}; // namespace

View File

@ -104,112 +104,80 @@ void mvn_compute_mean_within_channels(cldnn::memory::ptr output, bool normalize_
}
}
TEST(mvn_gpu_test, mvn_test_across_channels_outside_sqrt_bfyx) {
template <typename T>
void test_mvn_test_across_channels_outside_sqrt_bfyx(bool is_caching_test) {
// mvn across channels fp32 test with normalize_variance set to false
using namespace cldnn;
using namespace ::tests;
auto& engine = get_test_engine();
auto input = engine.allocate_memory({data_types::f32, format::bfyx, {7, 10, 17, 13}});
cldnn::data_types input_data_type = std::is_same<T, FLOAT16>::value ? data_types::f16 : data_types::f32;
tests::set_random_values<float>(input, true, 8, 100);
auto input = engine.allocate_memory({input_data_type, format::bfyx, {7, 10, 17, 13}});
tests::set_random_values<T>(input, true, 8, 100);
topology topology;
topology.add(input_layout("input", input->get_layout()));
topology.add(mvn("mvn", input_info("input"), false, 1e-10f, false, true));
network network(engine, topology);
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network.set_input_data("input", input);
network->set_input_data("input", input);
auto outputs = network.execute();
auto outputs = network->execute();
ASSERT_EQ(outputs.size(), size_t(1));
ASSERT_EQ(outputs.begin()->first, "mvn");
auto output = outputs.begin()->second.get_memory();
mvn_compute_mean_across_channels<float>(output, false);
mvn_compute_mean_across_channels<T>(output, false);
}
TEST(mvn_gpu_test, mvn_test_across_channels_outside_sqrt_bfyx) {
test_mvn_test_across_channels_outside_sqrt_bfyx<float>(false);
}
template <typename T>
void test_mvn_test_across_channels_inside_sqrt_bfyx(bool is_caching_test) {
// mvn across channels fp32 test with normalize_variance set to false
using namespace cldnn;
using namespace tests;
auto& engine = get_test_engine();
cldnn::data_types input_data_type = std::is_same<T, FLOAT16>::value ? data_types::f16 : data_types::f32;
auto input = engine.allocate_memory({input_data_type, format::bfyx, {7, 10, 17, 13}});
tests::set_random_values<T>(input, true, 8, 100);
topology topology;
topology.add(input_layout("input", input->get_layout()));
topology.add(mvn("mvn", input_info("input"), false, 1e-10f, true, true));
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("input", input);
auto outputs = network->execute();
ASSERT_EQ(outputs.size(), size_t(1));
ASSERT_EQ(outputs.begin()->first, "mvn");
auto output = outputs.begin()->second.get_memory();
mvn_compute_mean_across_channels<T>(output, false);
}
TEST(mvn_gpu_test, mvn_test_across_channels_inside_sqrt_bfyx) {
// mvn across channels fp32 test with normalize_variance set to false
using namespace cldnn;
using namespace tests;
auto& engine = get_test_engine();
auto input = engine.allocate_memory({data_types::f32, format::bfyx, {7, 10, 17, 13}});
tests::set_random_values<float>(input, true, 8, 100);
topology topology;
topology.add(input_layout("input", input->get_layout()));
topology.add(mvn("mvn", input_info("input"), false, 1e-10f, true, true));
network network(engine, topology);
network.set_input_data("input", input);
auto outputs = network.execute();
ASSERT_EQ(outputs.size(), size_t(1));
ASSERT_EQ(outputs.begin()->first, "mvn");
auto output = outputs.begin()->second.get_memory();
mvn_compute_mean_across_channels<float>(output, false);
test_mvn_test_across_channels_inside_sqrt_bfyx<float>(false);
}
TEST(mvn_gpu_test, mvn_test_across_channels_bfyx_outside_sqrt_fp16) {
// mvn across channels fp16 test with normalize_variance set to false
using namespace cldnn;
using namespace ::tests;
auto& engine = get_test_engine();
auto input = engine.allocate_memory({data_types::f16, format::bfyx, {7, 10, 17, 13}});
tests::set_random_values<FLOAT16>(input, true, 8, 100);
topology topology;
topology.add(input_layout("input", input->get_layout()));
topology.add(mvn("mvn", input_info("input"), false, 1e-10f, false, true));
network network(engine, topology);
network.set_input_data("input", input);
auto outputs = network.execute();
ASSERT_EQ(outputs.size(), size_t(1));
ASSERT_EQ(outputs.begin()->first, "mvn");
auto output = outputs.begin()->second.get_memory();
mvn_compute_mean_across_channels<FLOAT16>(output, false);
TEST(mvn_gpu_test, mvn_test_across_channels_outside_sqrt_bfyx_fp16) {
test_mvn_test_across_channels_outside_sqrt_bfyx<FLOAT16>(false);
}
TEST(mvn_gpu_test, mvn_test_across_channels_inside_sqrt_bfyx_fp16) {
// mvn across channels fp16 test with normalize_variance set to false
using namespace cldnn;
using namespace tests;
auto& engine = get_test_engine();
auto input = engine.allocate_memory({data_types::f16, format::bfyx, {7, 10, 17, 13}});
tests::set_random_values<FLOAT16>(input, true, 8, 100);
topology topology;
topology.add(input_layout("input", input->get_layout()));
topology.add(mvn("mvn", input_info("input"), false, 1e-10f, true, true));
network network(engine, topology);
network.set_input_data("input", input);
auto outputs = network.execute();
ASSERT_EQ(outputs.size(), size_t(1));
ASSERT_EQ(outputs.begin()->first, "mvn");
auto output = outputs.begin()->second.get_memory();
mvn_compute_mean_across_channels<FLOAT16>(output, false);
test_mvn_test_across_channels_inside_sqrt_bfyx<FLOAT16>(false);
}
TEST(mvn_gpu_test, mvn_test_across_channels_outside_sqrt_bfyx_normalize_variance) {
@ -666,7 +634,7 @@ struct mvn_random_test : ::testing::TestWithParam<mvn_basic_test_params> {
}
}
void execute(const mvn_basic_test_params& params, engine& eng) {
void execute(const mvn_basic_test_params& params, engine& eng, bool is_caching_test) {
auto& size = params.input_size;
auto& output_pad = params.output_pad;
@ -695,11 +663,11 @@ struct mvn_random_test : ::testing::TestWithParam<mvn_basic_test_params> {
prim.output_paddings = {output_pad};
topo.add(prim);
network net(eng, topo);
cldnn::network::ptr net = get_network(eng, topo, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
net.set_input_data("input", input);
net->set_input_data("input", input);
auto outputs = net.execute();
auto outputs = net->execute();
ASSERT_EQ(outputs.size(), size_t(1));
ASSERT_EQ(outputs.begin()->first, "mvn");
@ -710,7 +678,7 @@ struct mvn_random_test : ::testing::TestWithParam<mvn_basic_test_params> {
TEST_P(mvn_random_test, random) {
auto& engine = tests::get_test_engine();
this->execute(GetParam(), engine);
this->execute(GetParam(), engine, false);
}
struct mvn_test_case_generator : std::vector<mvn_basic_test_params> {
@ -857,7 +825,7 @@ struct mvn_random_test_bsv32 : ::testing::TestWithParam<mvn_basic_test_params> {
}
}
void execute(const mvn_basic_test_params& params) {
void execute(const mvn_basic_test_params& params, bool is_caching_test) {
auto& size = params.input_size;
auto& output_pad = params.output_pad;
auto& engine = get_test_engine();
@ -888,10 +856,11 @@ struct mvn_random_test_bsv32 : ::testing::TestWithParam<mvn_basic_test_params> {
config.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{"mvn"}));
config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"mvn", {format::type::bfyx, "mvn_gpu_bfyx_opt"}} }));
network net(engine, topo, config);
net.set_input_data("input", input);
cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);
auto outputs = net.execute();
net->set_input_data("input", input);
auto outputs = net->execute();
auto output = outputs.at("mvn").get_memory();
topology topo_opt;
@ -904,10 +873,11 @@ struct mvn_random_test_bsv32 : ::testing::TestWithParam<mvn_basic_test_params> {
config_opt.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{"mvn_opt", "input_to_target_layout"}));
config_opt.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"mvn_opt", {params.input_format, "mvn_gpu_b_fs_yx_fsv16_imad"}} }));
network net_opt(engine, topo_opt, config_opt);
net_opt.set_input_data("input", input);
cldnn::network::ptr net_opt = get_network(engine, topo_opt, config_opt, get_test_stream_ptr(), is_caching_test);
auto outputs_opt = net_opt.execute();
net_opt->set_input_data("input", input);
auto outputs_opt = net_opt->execute();
auto output_opt = outputs_opt.at("mvn_opt").get_memory();
auto output_dtype = output->get_layout().data_type;
@ -933,7 +903,7 @@ struct mvn_random_test_bsv32 : ::testing::TestWithParam<mvn_basic_test_params> {
};
TEST_P(mvn_random_test_bsv32, random) {
this->execute(GetParam());
this->execute(GetParam(), false);
}
struct mvn_test_case_generator_bsv32 : std::vector<mvn_basic_test_params> {
@ -964,3 +934,29 @@ INSTANTIATE_TEST_SUITE_P(mvn_fsv16,
mvn_random_test_bsv32,
testing::ValuesIn(mvn_test_case_generator_bsv32()
.bsv32_tests(format::b_fs_yx_fsv16, data_types::i8)));
TEST(mvn_gpu_test, mvn_test_across_channels_outside_sqrt_bfyx_cached) {
test_mvn_test_across_channels_outside_sqrt_bfyx<float>(true);
}
#ifdef RUN_ALL_MODEL_CACHING_TESTS
TEST(mvn_gpu_test, mvn_test_across_channels_inside_sqrt_bfyx_cached) {
test_mvn_test_across_channels_inside_sqrt_bfyx<float>(true);
}
TEST(mvn_gpu_test, mvn_test_across_channels_outside_sqrt_bfyx_fp16_cached) {
test_mvn_test_across_channels_outside_sqrt_bfyx<FLOAT16>(true);
}
TEST(mvn_gpu_test, mvn_test_across_channels_inside_sqrt_bfyx_fp16_cached) {
test_mvn_test_across_channels_inside_sqrt_bfyx<FLOAT16>(true);
}
TEST_P(mvn_random_test, random_cached) {
auto& engine = tests::get_test_engine();
this->execute(GetParam(), engine, true);
}
TEST_P(mvn_random_test_bsv32, random_cached) {
this->execute(GetParam(), true);
}
#endif

View File

@ -128,24 +128,7 @@ struct non_max_suppression_basic : public testing::Test {
ExecutionConfig config;
config.set_property(ov::intel_gpu::optimize_data(true));
cldnn::network::ptr net;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topo, config);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, get_test_engine());
net = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
}
} else {
net = std::make_shared<cldnn::network>(engine, topo, config);
}
cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);
auto boxes_mem = this->get_boxes_memory(engine);
auto scores_mem = this->get_scores_memory(engine);
@ -206,24 +189,7 @@ struct non_max_suppression_basic : public testing::Test {
ExecutionConfig config;
config.set_property(ov::intel_gpu::optimize_data(true));
cldnn::network::ptr net;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topo, config);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, get_test_engine());
net = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
}
} else {
net = std::make_shared<cldnn::network>(engine, topo, config);
}
cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);
auto boxes_mem = this->get_boxes_memory(engine);
auto scores_mem = this->get_scores_memory(engine);
@ -294,24 +260,7 @@ struct non_max_suppression_basic : public testing::Test {
ExecutionConfig config;
config.set_property(ov::intel_gpu::optimize_data(true));
cldnn::network::ptr net;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topo, config);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, get_test_engine());
net = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
}
} else {
net = std::make_shared<cldnn::network>(engine, topo, config);
}
cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);
auto boxes_mem = this->get_boxes_memory(engine);
auto scores_mem = this->get_scores_memory(engine);
@ -430,24 +379,7 @@ struct non_max_suppression_basic : public testing::Test {
config.set_property(ov::intel_gpu::optimize_data(true));
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
cldnn::network::ptr net;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topo, config);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, get_test_engine());
net = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
}
} else {
net = std::make_shared<cldnn::network>(engine, topo, config);
}
cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);
auto boxes_mem = this->get_boxes_memory(engine);
auto scores_mem = this->get_scores_memory(engine);
@ -556,24 +488,7 @@ struct non_max_suppression_basic : public testing::Test {
ExecutionConfig config;
config.set_property(ov::intel_gpu::optimize_data(true));
cldnn::network::ptr net;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topo, config);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, get_test_engine());
net = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
}
} else {
net = std::make_shared<cldnn::network>(engine, topo, config);
}
cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);
auto boxes_mem = this->get_boxes_memory(engine);
auto scores_mem = this->get_scores_memory(engine);
@ -630,24 +545,7 @@ struct non_max_suppression_basic : public testing::Test {
ExecutionConfig config;
config.set_property(ov::intel_gpu::optimize_data(true));
cldnn::network::ptr net;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topo, config);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, get_test_engine());
net = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
}
} else {
net = std::make_shared<cldnn::network>(engine, topo, config);
}
cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);
auto boxes_mem = this->get_boxes_memory(engine);
auto scores_mem = this->get_scores_memory(engine);
@ -708,24 +606,7 @@ struct non_max_suppression_basic : public testing::Test {
ExecutionConfig config;
config.set_property(ov::intel_gpu::optimize_data(true));
cldnn::network::ptr net;
if (is_caching_test) {
membuf mem_buf;
{
cldnn::network _network(engine, topo, config);
std::ostream out_mem(&mem_buf);
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
_network.save(ob);
}
{
std::istream in_mem(&mem_buf);
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, get_test_engine());
net = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
}
} else {
net = std::make_shared<cldnn::network>(engine, topo, config);
}
cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);
auto boxes_mem = this->get_boxes_memory(engine);
auto scores_mem = this->get_scores_memory(engine);

View File

@ -54,6 +54,57 @@ struct normalize_basic : public testing::Test {
return inputVals;
}
void execute(bool is_caching_test) {
// Input : 1x2x3x3
// Output : 1x2x3x3
auto& engine = get_test_engine();
const unsigned b = 1;
const unsigned f = 2;
const unsigned y = 3;
const unsigned x = 3;
auto input = engine.allocate_memory({this->data_type, format::bfyx, {b, f, y, x}});
auto weights = engine.allocate_memory({data_types::f32, format::bfyx, {1, f, 1, 1}});
auto inputVals = this->get_input_values(b, f, y, x);
std::vector<float> weightVals(f);
for (auto& it : weightVals) {
it = 1.f;
}
set_values(input, inputVals);
set_values(weights, weightVals);
topology topology;
topology.add(input_layout("Input0", input->get_layout()));
topology.add(data("Input1", weights));
topology.add(reorder("reordered_Input0", input_info("Input0"), this->format, this->data_type));
topology.add(reorder("reordered_Input1", input_info("Input1"), this->format, data_types::f32));
topology.add(normalize("normalize2", input_info("reordered_Input0"), "reordered_Input1", this->across_spatial));
topology.add(reorder("plane_normalize2", input_info("normalize2"), format::bfyx, this->output_data_type));
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("Input0", input);
auto outputs = network->execute();
auto output = outputs.at("plane_normalize2").get_memory();
if (this->data_type == data_types::f16) {
cldnn::mem_lock<half_t> output_ptr(output, get_test_stream());
auto expected_results = this->get_expected_result();
for (size_t i = 0; i < expected_results.size(); ++i) {
ASSERT_NEAR(expected_results[i], output_ptr[i], 0.001);
}
} else {
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
auto expected_results = this->get_expected_result();
for (size_t i = 0; i < expected_results.size(); ++i) {
ASSERT_TRUE(are_equal(expected_results[i], output_ptr[i]));
}
}
}
private:
static const std::vector<output_type> get_expected_result(std::true_type) {
static const std::vector<float> result = {0.f,
@ -144,52 +195,23 @@ using format_types = testing::Types<normalize_input_types<format::bfyx, float, f
TYPED_TEST_SUITE(normalize_basic, format_types);
TYPED_TEST(normalize_basic, basic) {
// Input : 1x2x3x3
// Output : 1x2x3x3
auto& engine = get_test_engine();
const unsigned b = 1;
const unsigned f = 2;
const unsigned y = 3;
const unsigned x = 3;
auto input = engine.allocate_memory({this->data_type, format::bfyx, {b, f, y, x}});
auto weights = engine.allocate_memory({data_types::f32, format::bfyx, {1, f, 1, 1}});
auto inputVals = this->get_input_values(b, f, y, x);
std::vector<float> weightVals(f);
for (auto& it : weightVals) {
it = 1.f;
}
set_values(input, inputVals);
set_values(weights, weightVals);
topology topology;
topology.add(input_layout("Input0", input->get_layout()));
topology.add(data("Input1", weights));
topology.add(reorder("reordered_Input0", input_info("Input0"), this->format, this->data_type));
topology.add(reorder("reordered_Input1", input_info("Input1"), this->format, data_types::f32));
topology.add(normalize("normalize2", input_info("reordered_Input0"), "reordered_Input1", this->across_spatial));
topology.add(reorder("plane_normalize2", input_info("normalize2"), format::bfyx, this->output_data_type));
network network(engine, topology);
network.set_input_data("Input0", input);
auto outputs = network.execute();
auto output = outputs.at("plane_normalize2").get_memory();
if (this->data_type == data_types::f16) {
cldnn::mem_lock<half_t> output_ptr(output, get_test_stream());
auto expected_results = this->get_expected_result();
for (size_t i = 0; i < expected_results.size(); ++i) {
ASSERT_NEAR(expected_results[i], output_ptr[i], 0.001);
}
} else {
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
auto expected_results = this->get_expected_result();
for (size_t i = 0; i < expected_results.size(); ++i) {
ASSERT_TRUE(are_equal(expected_results[i], output_ptr[i]));
}
}
this->execute(false);
}
#ifdef RUN_ALL_MODEL_CACHING_TESTS
TYPED_TEST(normalize_basic, basic_cached) {
this->execute(true);
}
#else
template <typename NormalizeInput>
struct normalize_basic_cached : public normalize_basic<NormalizeInput> {
};
using format_types_cached = testing::Types<normalize_input_types<format::bfyx, float, false>>;
TYPED_TEST_SUITE(normalize_basic_cached, format_types_cached);
TYPED_TEST(normalize_basic_cached, basic) {
this->execute(true);
}
#endif

View File

@ -66,7 +66,7 @@ VVVVF<T> one_hot_cpu(VVVVF<T> &input, uint16_t axis,
template <typename T>
void generic_one_hot_test_int(cldnn::format test_input_fmt, int input_b, int input_f, int input_y, int input_x, tensor shape,
uint16_t one_hot_axis, int input_padding_y = 0, int input_padding_x = 0, int output_padding_y = 0, int output_padding_x = 0) {
uint16_t one_hot_axis, int input_padding_y, int input_padding_x, int output_padding_y, int output_padding_x, bool is_caching_test) {
std::vector<tensor::value_type> output_dims = { shape.batch[0], shape.feature[0],
shape.spatial[1], shape.spatial[0] };
int32_t one_hot_limit = output_dims[one_hot_axis];
@ -84,9 +84,9 @@ void generic_one_hot_test_int(cldnn::format test_input_fmt, int input_b, int inp
topology.add(input_layout("input", input->get_layout()));
topology.add(one_hot("output", input_info("input"), shape, one_hot_axis, one_hot_limit));
network network(engine, topology);
network.set_input_data("input", input);
auto outputs = network.execute();
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("input", input);
auto outputs = network->execute();
ASSERT_EQ(outputs.size(), size_t(1));
ASSERT_EQ(outputs.begin()->first, "output");
@ -130,17 +130,33 @@ void generic_one_hot_test_int(cldnn::format test_input_fmt, int input_b, int inp
}
TEST(one_hot_gpu_i32, generic) {
generic_one_hot_test_int<int32_t>(format::bfyx, 2, 2, 1, 1, tensor(5, 2, 1, 2), 0);
generic_one_hot_test_int<int32_t>(format::bfyx, 1, 2, 3, 1, tensor(1, 5, 3, 2), 1);
generic_one_hot_test_int<int32_t>(format::bfyx, 2, 2, 1, 1, tensor(2, 2, 1, 4), 2);
generic_one_hot_test_int<int32_t>(format::bfyx, 2, 2, 1, 1, tensor(2, 2, 4, 1), 3);
generic_one_hot_test_int<int32_t>(format::bfyx, 2, 2, 1, 1, tensor(5, 2, 1, 2), 0, 0, 0, 0, 0, false);
generic_one_hot_test_int<int32_t>(format::bfyx, 1, 2, 3, 1, tensor(1, 5, 3, 2), 1, 0, 0, 0, 0, false);
generic_one_hot_test_int<int32_t>(format::bfyx, 2, 2, 1, 1, tensor(2, 2, 1, 4), 2, 0, 0, 0, 0, false);
generic_one_hot_test_int<int32_t>(format::bfyx, 2, 2, 1, 1, tensor(2, 2, 4, 1), 3, 0, 0, 0, 0, false);
}
TEST(one_hot_gpu_i64, generic) {
generic_one_hot_test_int<int64_t>(format::bfyx, 2, 2, 1, 1, tensor(5, 2, 1, 2), 0);
generic_one_hot_test_int<int64_t>(format::bfyx, 1, 2, 3, 1, tensor(1, 5, 3, 2), 1);
generic_one_hot_test_int<int64_t>(format::bfyx, 2, 2, 1, 1, tensor(2, 2, 1, 4), 2);
generic_one_hot_test_int<int64_t>(format::bfyx, 2, 2, 1, 1, tensor(2, 2, 4, 1), 3);
generic_one_hot_test_int<int64_t>(format::bfyx, 2, 2, 1, 1, tensor(5, 2, 1, 2), 0, 0, 0, 0, 0, false);
generic_one_hot_test_int<int64_t>(format::bfyx, 1, 2, 3, 1, tensor(1, 5, 3, 2), 1, 0, 0, 0, 0, false);
generic_one_hot_test_int<int64_t>(format::bfyx, 2, 2, 1, 1, tensor(2, 2, 1, 4), 2, 0, 0, 0, 0, false);
generic_one_hot_test_int<int64_t>(format::bfyx, 2, 2, 1, 1, tensor(2, 2, 4, 1), 3, 0, 0, 0, 0, false);
}
TEST(one_hot_gpu_i32, generic_cached) {
generic_one_hot_test_int<int32_t>(format::bfyx, 2, 2, 1, 1, tensor(5, 2, 1, 2), 0, 0, 0, 0, 0, true);
#ifdef RUN_ALL_MODEL_CACHING_TESTS
generic_one_hot_test_int<int32_t>(format::bfyx, 1, 2, 3, 1, tensor(1, 5, 3, 2), 1, 0, 0, 0, 0, true);
generic_one_hot_test_int<int32_t>(format::bfyx, 2, 2, 1, 1, tensor(2, 2, 1, 4), 2, 0, 0, 0, 0, true);
generic_one_hot_test_int<int32_t>(format::bfyx, 2, 2, 1, 1, tensor(2, 2, 4, 1), 3, 0, 0, 0, 0, true);
}
TEST(one_hot_gpu_i64, generic_cached) {
generic_one_hot_test_int<int64_t>(format::bfyx, 2, 2, 1, 1, tensor(5, 2, 1, 2), 0, 0, 0, 0, 0, true);
generic_one_hot_test_int<int64_t>(format::bfyx, 1, 2, 3, 1, tensor(1, 5, 3, 2), 1, 0, 0, 0, 0, true);
generic_one_hot_test_int<int64_t>(format::bfyx, 2, 2, 1, 1, tensor(2, 2, 1, 4), 2, 0, 0, 0, 0, true);
generic_one_hot_test_int<int64_t>(format::bfyx, 2, 2, 1, 1, tensor(2, 2, 4, 1), 3, 0, 0, 0, 0, true);
#endif
}
TEST(one_hot_gpu_i32, bfzyx_ax4) {

View File

@ -1627,7 +1627,7 @@ public:
template<data_types Data_Type>
void run_test(const std::vector<cldnn::tensor::value_type>& sizes, cldnn::format format_fsv,
const std::string & permute_opt = "permute_tile_8x8_4x4_fsv",
std::vector<uint16_t> permute_order = {});
std::vector<uint16_t> permute_order = {}, bool is_caching_test = false);
};
template<>
@ -1654,7 +1654,7 @@ void TiledPermuteTest::set_random_values<int8_t>(const cldnn::memory::ptr mem) c
template<data_types Data_Type>
void TiledPermuteTest::run_test(const std::vector<cldnn::tensor::value_type>& sizes, cldnn::format format_fsv,
const std::string & permute_opt, std::vector<uint16_t> permute_order)
const std::string & permute_opt, std::vector<uint16_t> permute_order, bool is_caching_test)
{
// convert half_t to FLOAT16
using type_ = typename data_type_to_type<Data_Type>::type;
@ -1690,9 +1690,9 @@ void TiledPermuteTest::run_test(const std::vector<cldnn::tensor::value_type>& si
ov::intel_gpu::ImplementationDesc permute_ref = { format_fsv, "permute_ref" };
config_ref.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"output", permute_ref} }));
cldnn::network network_ref(engine, topology_ref, config_ref);
network_ref.set_input_data("input", input);
auto outputs_ref = network_ref.execute();
cldnn::network::ptr network_ref = get_network(engine, topology_ref, config_ref, get_test_stream_ptr(), is_caching_test);
network_ref->set_input_data("input", input);
auto outputs_ref = network_ref->execute();
auto output_ref = outputs_ref.begin()->second.get_memory();
cldnn::mem_lock<type> output_ref_ptr(output_ref, get_test_stream());
@ -1701,9 +1701,9 @@ void TiledPermuteTest::run_test(const std::vector<cldnn::tensor::value_type>& si
ov::intel_gpu::ImplementationDesc permute_tile_opt = { format_fsv, permute_opt };
config_tile.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"output", permute_tile_opt} }));
cldnn::network network_tile(engine, topology_ref, config_tile);
network_tile.set_input_data("input", input);
auto outputs_tile = network_tile.execute();
cldnn::network::ptr network_tile = get_network(engine, topology_ref, config_tile, get_test_stream_ptr(), is_caching_test);
network_tile->set_input_data("input", input);
auto outputs_tile = network_tile->execute();
auto output_tile = outputs_tile.begin()->second.get_memory();
cldnn::mem_lock<type> output_tile_ptr(output_tile, get_test_stream());
@ -1920,3 +1920,59 @@ TEST_P(permute_bfzyx_to_bfyxz, combined) {
auto p = GetParam();
run_test<cldnn::data_types::f32>(p.sizes, p.format_fsv, "permute_bfzyx_to_bfyxz", {0, 1, 3, 4, 2});
}
#ifdef RUN_ALL_MODEL_CACHING_TESTS
TEST_P(permute_tile_fsv_4d, f16_cached) {
auto p = GetParam();
run_test<cldnn::data_types::f16>(p.sizes, p.format_fsv, "permute_tile_8x8_4x4_fsv", {}, true);
}
TEST_P(permute_tile_fsv_4d, f32_cached) {
auto p = GetParam();
run_test<cldnn::data_types::f32>(p.sizes, p.format_fsv, "permute_tile_8x8_4x4_fsv", {}, true);
}
TEST_P(permute_tile_fsv_4d, i8_cached) {
auto p = GetParam();
run_test<cldnn::data_types::i8>(p.sizes, p.format_fsv, "permute_tile_8x8_4x4_fsv", {}, true);
}
TEST_P(permute_tile_fsv_4d, i32_cached) {
auto p = GetParam();
run_test<cldnn::data_types::i32>(p.sizes, p.format_fsv, "permute_tile_8x8_4x4_fsv", {}, true);
}
TEST_P(permute_tile_fsv_4d, i64_cached) {
auto p = GetParam();
run_test<cldnn::data_types::i64>(p.sizes, p.format_fsv, "permute_tile_8x8_4x4_fsv", {}, true);
}
TEST_P(permute_tile_fsv_5d, f16_cached) {
auto p = GetParam();
run_test<cldnn::data_types::f16>(p.sizes, p.format_fsv, "permute_tile_8x8_4x4_fsv", {}, true);
}
TEST_P(permute_tile_fsv_5d, f32_cached) {
auto p = GetParam();
run_test<cldnn::data_types::f32>(p.sizes, p.format_fsv, "permute_tile_8x8_4x4_fsv", {}, true);
}
TEST_P(permute_tile_fsv_5d, i8_cached) {
auto p = GetParam();
run_test<cldnn::data_types::i8>(p.sizes, p.format_fsv, "permute_tile_8x8_4x4_fsv", {}, true);
}
TEST_P(permute_tile_fsv_5d, i32_cached) {
auto p = GetParam();
run_test<cldnn::data_types::i32>(p.sizes, p.format_fsv, "permute_tile_8x8_4x4_fsv", {}, true);
}
TEST_P(permute_bfzyx_to_bfyxz, combined_cached) {
auto p = GetParam();
run_test<cldnn::data_types::f32>(p.sizes, p.format_fsv, "permute_bfzyx_to_bfyxz", {0, 1, 3, 4, 2}, true);
}
#endif
TEST_P(permute_tile_fsv_5d, i64_cached) {
auto p = GetParam();
run_test<cldnn::data_types::i64>(p.sizes, p.format_fsv, "permute_tile_8x8_4x4_fsv", {}, true);
}

View File

@ -1933,11 +1933,12 @@ public:
return "pool";
}
virtual void run_expect(const VVVVVF<output_t>& expected) {
virtual void run_expect(const VVVVVF<output_t>& expected, bool is_caching_test) {
auto& eng = get_test_engine();
auto topo = build_topology(eng);
ExecutionConfig config(ov::intel_gpu::optimize_data(true));
cldnn::network net(eng, topo, config);
cldnn::network::ptr net = get_network(eng, topo, config, get_test_stream_ptr(), is_caching_test);
auto input_size = tensor(batch(batch_num()), feature(input_features()), spatial(input_x(), input_y(), input_z()));
auto input_lay = layout(input_type(),
@ -1956,20 +1957,22 @@ public:
}
set_values(input_mem, input_flat);
net.set_input_data("input", input_mem);
auto result = net.execute();
net->set_input_data("input", input_mem);
auto result = net->execute();
auto out_mem = result.at(output_id()).get_memory();
auto out_lay = out_mem->get_layout();
cldnn::mem_lock<output_t> out_ptr(out_mem, get_test_stream());
std::string kernel;
for (auto i : net.get_primitives_info()) {
if (i.original_id == "pool") {
kernel = i.kernel_id;
if (!is_caching_test) {
std::string kernel;
for (auto i : net->get_primitives_info()) {
if (i.original_id == "pool") {
kernel = i.kernel_id;
}
}
std::cout << kernel << std::endl;
SCOPED_TRACE("\nkernel: " + kernel);
}
std::cout << kernel << std::endl;
SCOPED_TRACE("\nkernel: " + kernel);
ASSERT_EQ(out_lay.data_type, output_type());
ASSERT_EQ(out_lay.batch(), expected.size());
@ -2115,10 +2118,10 @@ public:
this->set_offsets(o_x, o_y, o_z);
}
void run_random(const pooling_random_test_params& params) {
void run_random(const pooling_random_test_params& params, bool is_caching_test) {
param_set_up(params);
auto reference = calculate_reference();
ASSERT_NO_FATAL_FAILURE(this->run_expect(reference));
ASSERT_NO_FATAL_FAILURE(this->run_expect(reference, is_caching_test));
}
};
@ -2131,22 +2134,22 @@ struct pooling_random_test : public testing::TestWithParam<pooling_random_test_p
TEST_P(pooling_random_test, max_i8) {
auto test_case = max_pooling_i8_random_test();
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam()));
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), false));
}
TEST_P(pooling_random_test, max_u8) {
auto test_case = max_pooling_u8_random_test();
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam()));
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), false));
}
TEST_P(pooling_random_test, avg_i8) {
auto test_case = avg_pooling_i8_random_test();
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam()));
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), false));
}
TEST_P(pooling_random_test, avg_u8) {
auto test_case = avg_pooling_u8_random_test();
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam()));
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), false));
}
INSTANTIATE_TEST_SUITE_P(
@ -2243,22 +2246,22 @@ using pooling_random_test_fp16_fp32 = pooling_random_test;
TEST_P(pooling_random_test_fp16_fp32, avg_fp16) {
auto test_case = pooling_random_test_base<FLOAT16, pooling_mode::average>();
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam()));
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), false));
}
TEST_P(pooling_random_test_fp16_fp32, max_fp16) {
auto test_case = pooling_random_test_base<FLOAT16, pooling_mode::max>();
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam()));
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), false));
}
TEST_P(pooling_random_test_fp16_fp32, avg_fp32) {
auto test_case = pooling_random_test_base<float, pooling_mode::average>();
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam()));
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), false));
}
TEST_P(pooling_random_test_fp16_fp32, max_fp32) {
auto test_case = pooling_random_test_base<float, pooling_mode::max>();
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam()));
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), false));
}
INSTANTIATE_TEST_SUITE_P(
@ -3249,3 +3252,44 @@ TEST(pooling_forward_gpu_onednn, basic_max_pooling_int8) {
}
#endif // ENABLE_ONEDNN_FOR_GPU
#ifdef RUN_ALL_MODEL_CACHING_TESTS
TEST_P(pooling_random_test, max_i8_cached) {
auto test_case = max_pooling_i8_random_test();
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), true));
}
TEST_P(pooling_random_test, max_u8_cached) {
auto test_case = max_pooling_u8_random_test();
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), true));
}
TEST_P(pooling_random_test, avg_i8_cached) {
auto test_case = avg_pooling_i8_random_test();
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), true));
}
TEST_P(pooling_random_test, avg_u8_cached) {
auto test_case = avg_pooling_u8_random_test();
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), true));
}
TEST_P(pooling_random_test_fp16_fp32, avg_fp16_cached) {
auto test_case = pooling_random_test_base<FLOAT16, pooling_mode::average>();
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), true));
}
TEST_P(pooling_random_test_fp16_fp32, max_fp16_cached) {
auto test_case = pooling_random_test_base<FLOAT16, pooling_mode::max>();
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), true));
}
TEST_P(pooling_random_test_fp16_fp32, avg_fp32_cached) {
auto test_case = pooling_random_test_base<float, pooling_mode::average>();
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), true));
}
#endif // RUN_ALL_MODEL_CACHING_TESTS
TEST_P(pooling_random_test_fp16_fp32, max_fp32_cached) {
auto test_case = pooling_random_test_base<float, pooling_mode::max>();
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), true));
}

View File

@ -40,7 +40,7 @@ using prior_box_param = std::tuple<format, // Input and ou
template <class InputType, class OutputType>
class PriorBoxGPUTest : public ::testing::TestWithParam<prior_box_param<InputType, OutputType>> {
public:
void SetUp() override {
void execute(bool is_caching_test) {
const auto input_data_type = type_to_data_type<InputType>::value;
const auto output_data_type = type_to_data_type<OutputType>::value;
const auto plain_format = format::bfyx;
@ -92,8 +92,10 @@ public:
ExecutionConfig config;
config.set_property(ov::intel_gpu::optimize_data(false));
network network(engine, topo, config);
const auto outputs = network.execute();
cldnn::network::ptr network = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);
const auto outputs = network->execute();
const auto output = outputs.at("prior_box").get_memory();
cldnn::mem_lock<OutputType> output_ptr(output, get_test_stream());
@ -107,7 +109,9 @@ public:
};
using prior_box_test_i32_f32 = PriorBoxGPUTest<int32_t, float>;
TEST_P(prior_box_test_i32_f32, prior_box_test_i32_f32) {}
TEST_P(prior_box_test_i32_f32, prior_box_test_i32_f32) {
this->execute(false);
}
INSTANTIATE_TEST_SUITE_P(
prior_box_test_all_formats,
@ -261,4 +265,37 @@ INSTANTIATE_TEST_SUITE_P(
0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1
})));
#ifdef RUN_ALL_MODEL_CACHING_TESTS
TEST_P(prior_box_test_i32_f32, prior_box_test_i32_f32_cached) {
this->execute(true);
}
#else
using prior_box_test_i32_f32_cached = PriorBoxGPUTest<int32_t, float>;
TEST_P(prior_box_test_i32_f32_cached, prior_box_test_i32_f32) {
this->execute(true);
}
INSTANTIATE_TEST_SUITE_P(
prior_box_test_four_variances,
prior_box_test_i32_f32_cached,
testing::Combine(
testing::Values(format::bfyx),
testing::Values(std::vector<int32_t>{2, 2}),
testing::Values(std::vector<int32_t>{10, 10}),
testing::Values(
prior_box_attributes{{2.0f}, {5.0f}, {1.5f}, {}, {}, {}, false, false, 0.0f, 0.0f, {0.1, 0.2, 0.3, 0.4}, true, true}),
testing::Values(std::vector<float>{
0.15, 0.15, 0.35, 0.35, 0.0918861, 0.0918861, 0.408114, 0.408114, 0.127526, 0.16835, 0.372474, 0.33165,
0.65, 0.15, 0.85, 0.35,
0.591886, 0.0918861, 0.908114, 0.408114, 0.627526, 0.16835, 0.872474, 0.33165, 0.15, 0.65, 0.35, 0.85,
0.0918861, 0.591886, 0.408114, 0.908114,
0.127526, 0.66835, 0.372474, 0.83165, 0.65, 0.65, 0.85, 0.85, 0.591886, 0.591886, 0.908114, 0.908114,
0.627526, 0.66835, 0.872474, 0.83165,
0.1, 0.2, 0.3, 0.4, 0.1, 0.2, 0.3, 0.4, 0.1, 0.2, 0.3, 0.4,
0.1, 0.2, 0.3, 0.4, 0.1, 0.2, 0.3, 0.4, 0.1, 0.2, 0.3, 0.4,
0.1, 0.2, 0.3, 0.4, 0.1, 0.2, 0.3, 0.4, 0.1, 0.2, 0.3, 0.4,
0.1, 0.2, 0.3, 0.4, 0.1, 0.2, 0.3, 0.4, 0.1, 0.2, 0.3, 0.4
})));
#endif
} // namespace

View File

@ -15,7 +15,8 @@ using namespace ::tests;
//We expect additional reorder to be added in between "weights1" and "reshape1".
//This situation should be handled properly by propagate constants optimization phase
TEST(propagate_constants, copy_dependecies_from_nodes) {
template <typename T>
void test_copy_dependecies_from_nodes(bool is_caching_test) {
auto& engine = get_test_engine();
ExecutionConfig config;
config.set_property(ov::intel_gpu::optimize_data(true));
@ -24,8 +25,8 @@ TEST(propagate_constants, copy_dependecies_from_nodes) {
auto weights1 = engine.allocate_memory({ data_types::f16, format::yxfb,{ 1, 1, 2, 1 } });
auto weights2 = engine.allocate_memory({ data_types::f32, format::byxf,{ 1, 1, 1, 2 } });
set_values(input, { FLOAT16(1.1f), FLOAT16(1.2f), FLOAT16(1.3f), FLOAT16(1.4f) });
set_values(weights1, { FLOAT16(2.1f), FLOAT16(3.1f) });
set_values(input, { T(1.1f), T(1.2f), T(1.3f), T(1.4f) });
set_values(weights1, { T(2.1f), T(3.1f) });
set_values(weights2, { 1.1f, 0.1f });
topology topology;
@ -37,10 +38,10 @@ TEST(propagate_constants, copy_dependecies_from_nodes) {
topology.add(reorder("reorder1", input_info("reshape1"), layout(data_types::f32, format::byxf, tensor(4))));
topology.add(concatenation("concat", { input_info("reorder1"), input_info("weights2") }, 3));
topology.add(convolution("conv2", { input_info("reorder2") }, { "concat" }));
network network(engine, topology, config);
network.set_input_data("input", input);
cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
network->set_input_data("input", input);
auto outputs = network.execute();
auto outputs = network->execute();
float epsilon = 1e-2f;
for (auto& it : outputs) {
@ -48,3 +49,11 @@ TEST(propagate_constants, copy_dependecies_from_nodes) {
ASSERT_NEAR(7.8f, output[0], epsilon);
}
}
TEST(propagate_constants, copy_dependecies_from_nodes) {
test_copy_dependecies_from_nodes<FLOAT16>(false);
}
TEST(propagate_constants, copy_dependecies_from_nodes_cached) {
test_copy_dependecies_from_nodes<FLOAT16>(true);
}

View File

@ -15,116 +15,123 @@ template <typename T>
struct pyramid_roi_align_typed_test : testing::Test {
static const data_types data_type = type_to_data_type<T>::value;
using Type = T;
void execute(bool is_caching_test) {
auto& engine = get_test_engine();
const int rois_num = 3;
const int output_size = 2;
const int sampling_points = 2;
const int starting_level = 2;
const int P2_scale = 1;
const int P3_scale = 2;
const int P4_scale = 4;
const int P5_scale = 8;
const int P2_size = 8;
const int P3_size = P2_size * P2_scale / P3_scale;
const int P4_size = P2_size * P2_scale / P4_scale;
const int P5_size = P2_size * P2_scale / P5_scale;
std::vector<Type> rois_data = {
Type(0.f), Type(0.f), Type(1.f), Type(1.f),
Type(0.f), Type(0.f), Type(0.5f), Type(0.5f),
Type(0.5f), Type(0.5f), Type(0.75f), Type(0.75f)
};
std::vector<Type> P2_data = {
Type(0.f), Type(1.f), Type(2.f), Type(3.f), Type(4.f), Type(5.f), Type(6.f), Type(7.f),
Type(8.f), Type(9.f), Type(10.f), Type(11.f), Type(12.f), Type(13.f), Type(14.f), Type(15.f),
Type(0.f), Type(1.f), Type(2.f), Type(3.f), Type(4.f), Type(5.f), Type(6.f), Type(7.f),
Type(8.f), Type(9.f), Type(10.f), Type(11.f), Type(12.f), Type(13.f), Type(14.f), Type(15.f),
Type(8.f), Type(9.f), Type(10.f), Type(11.f), Type(12.f), Type(13.f), Type(14.f), Type(15.f),
Type(0.f), Type(1.f), Type(2.f), Type(3.f), Type(4.f), Type(5.f), Type(6.f), Type(7.f),
Type(0.f), Type(1.f), Type(2.f), Type(3.f), Type(4.f), Type(5.f), Type(6.f), Type(7.f),
Type(8.f), Type(9.f), Type(10.f), Type(11.f), Type(12.f), Type(13.f), Type(14.f), Type(15.f),
};
std::vector<Type> P3_data = {
Type(9.f), Type(13.f), Type(17.f), Type(21.f),
Type(9.f), Type(13.f), Type(17.f), Type(21.f),
Type(9.f), Type(13.f), Type(17.f), Type(21.f),
Type(9.f), Type(13.f), Type(17.f), Type(21.f),
};
std::vector<Type> P4_data = {
Type(11.f), Type(19.f),
Type(11.f), Type(19.f),
};
std::vector<Type> P5_data = {
Type(15.f)
};
auto rois_lay = layout(this->data_type, format::bfyx, tensor(batch(rois_num), feature(4)));
auto P2_lay = layout(this->data_type, format::bfyx, tensor(1, 1, P2_size, P2_size));
auto P3_lay = layout(this->data_type, format::bfyx, tensor(1, 1, P3_size, P3_size));
auto P4_lay = layout(this->data_type, format::bfyx, tensor(1, 1, P4_size, P4_size));
auto P5_lay = layout(this->data_type, format::bfyx, tensor(1, 1, P5_size, P5_size));
auto rois_mem = engine.allocate_memory(rois_lay);
auto P2_mem = engine.allocate_memory(P2_lay);
auto P3_mem = engine.allocate_memory(P3_lay);
auto P4_mem = engine.allocate_memory(P4_lay);
auto P5_mem = engine.allocate_memory(P5_lay);
tests::set_values(rois_mem, rois_data);
tests::set_values(P2_mem, P2_data);
tests::set_values(P3_mem, P3_data);
tests::set_values(P4_mem, P4_data);
tests::set_values(P5_mem, P5_data);
topology topo;
topo.add(data("P2", P2_mem));
topo.add(data("P3", P3_mem));
topo.add(data("P4", P4_mem));
topo.add(data("P5", P5_mem));
topo.add(input_layout("rois", rois_lay));
topo.add(pyramid_roi_align("pyramid",
input_info("rois"),
input_info("P2"),
input_info("P3"),
input_info("P4"),
input_info("P5"),
output_size,
sampling_points,
{ P2_scale, P3_scale, P4_scale, P5_scale },
starting_level));
cldnn::network::ptr net = get_network(engine, topo, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
net->set_input_data("rois", rois_mem);
std::vector<float> expected_out = {
// RoI 0,0 - 1,1 from P4
14.f, 18.f, 14.f, 18.f,
// RoI 0,0 - 0.5,0.5 from P3
11.25f, 14.25f, 11.25f, 14.25f,
// RoI 0.5,0.5 - 0.75,0.75 from P2
12.15625f, 13.03125f, 7.40625f, 8.28125f,
};
auto result = net->execute();
auto out_mem = result.at("pyramid").get_memory();
cldnn::mem_lock<Type> out_ptr(out_mem, get_test_stream());
ASSERT_EQ(expected_out.size(), out_ptr.size());
for (size_t i = 0; i < expected_out.size(); ++i) {
ASSERT_EQ(expected_out[i], static_cast<float>(out_ptr[i])) << "at i = " << i;
}
}
};
using pyramid_roi_align_types = testing::Types<float, half_t>;
TYPED_TEST_SUITE(pyramid_roi_align_typed_test, pyramid_roi_align_types);
TYPED_TEST(pyramid_roi_align_typed_test, smoke_4levels) {
using Type = typename pyramid_roi_align_typed_test<TypeParam>::Type;
auto& engine = get_test_engine();
const int rois_num = 3;
const int output_size = 2;
const int sampling_points = 2;
const int starting_level = 2;
const int P2_scale = 1;
const int P3_scale = 2;
const int P4_scale = 4;
const int P5_scale = 8;
const int P2_size = 8;
const int P3_size = P2_size * P2_scale / P3_scale;
const int P4_size = P2_size * P2_scale / P4_scale;
const int P5_size = P2_size * P2_scale / P5_scale;
std::vector<Type> rois_data = {
Type(0.f), Type(0.f), Type(1.f), Type(1.f),
Type(0.f), Type(0.f), Type(0.5f), Type(0.5f),
Type(0.5f), Type(0.5f), Type(0.75f), Type(0.75f)
};
std::vector<Type> P2_data = {
Type(0.f), Type(1.f), Type(2.f), Type(3.f), Type(4.f), Type(5.f), Type(6.f), Type(7.f),
Type(8.f), Type(9.f), Type(10.f), Type(11.f), Type(12.f), Type(13.f), Type(14.f), Type(15.f),
Type(0.f), Type(1.f), Type(2.f), Type(3.f), Type(4.f), Type(5.f), Type(6.f), Type(7.f),
Type(8.f), Type(9.f), Type(10.f), Type(11.f), Type(12.f), Type(13.f), Type(14.f), Type(15.f),
Type(8.f), Type(9.f), Type(10.f), Type(11.f), Type(12.f), Type(13.f), Type(14.f), Type(15.f),
Type(0.f), Type(1.f), Type(2.f), Type(3.f), Type(4.f), Type(5.f), Type(6.f), Type(7.f),
Type(0.f), Type(1.f), Type(2.f), Type(3.f), Type(4.f), Type(5.f), Type(6.f), Type(7.f),
Type(8.f), Type(9.f), Type(10.f), Type(11.f), Type(12.f), Type(13.f), Type(14.f), Type(15.f),
};
std::vector<Type> P3_data = {
Type(9.f), Type(13.f), Type(17.f), Type(21.f),
Type(9.f), Type(13.f), Type(17.f), Type(21.f),
Type(9.f), Type(13.f), Type(17.f), Type(21.f),
Type(9.f), Type(13.f), Type(17.f), Type(21.f),
};
std::vector<Type> P4_data = {
Type(11.f), Type(19.f),
Type(11.f), Type(19.f),
};
std::vector<Type> P5_data = {
Type(15.f)
};
auto rois_lay = layout(this->data_type, format::bfyx, tensor(batch(rois_num), feature(4)));
auto P2_lay = layout(this->data_type, format::bfyx, tensor(1, 1, P2_size, P2_size));
auto P3_lay = layout(this->data_type, format::bfyx, tensor(1, 1, P3_size, P3_size));
auto P4_lay = layout(this->data_type, format::bfyx, tensor(1, 1, P4_size, P4_size));
auto P5_lay = layout(this->data_type, format::bfyx, tensor(1, 1, P5_size, P5_size));
auto rois_mem = engine.allocate_memory(rois_lay);
auto P2_mem = engine.allocate_memory(P2_lay);
auto P3_mem = engine.allocate_memory(P3_lay);
auto P4_mem = engine.allocate_memory(P4_lay);
auto P5_mem = engine.allocate_memory(P5_lay);
tests::set_values(rois_mem, rois_data);
tests::set_values(P2_mem, P2_data);
tests::set_values(P3_mem, P3_data);
tests::set_values(P4_mem, P4_data);
tests::set_values(P5_mem, P5_data);
topology topo;
topo.add(data("P2", P2_mem));
topo.add(data("P3", P3_mem));
topo.add(data("P4", P4_mem));
topo.add(data("P5", P5_mem));
topo.add(input_layout("rois", rois_lay));
topo.add(pyramid_roi_align("pyramid",
input_info("rois"),
input_info("P2"),
input_info("P3"),
input_info("P4"),
input_info("P5"),
output_size,
sampling_points,
{ P2_scale, P3_scale, P4_scale, P5_scale },
starting_level));
cldnn::network net(engine, topo);
net.set_input_data("rois", rois_mem);
std::vector<float> expected_out = {
// RoI 0,0 - 1,1 from P4
14.f, 18.f, 14.f, 18.f,
// RoI 0,0 - 0.5,0.5 from P3
11.25f, 14.25f, 11.25f, 14.25f,
// RoI 0.5,0.5 - 0.75,0.75 from P2
12.15625f, 13.03125f, 7.40625f, 8.28125f,
};
auto result = net.execute();
auto out_mem = result.at("pyramid").get_memory();
cldnn::mem_lock<Type> out_ptr(out_mem, get_test_stream());
ASSERT_EQ(expected_out.size(), out_ptr.size());
for (size_t i = 0; i < expected_out.size(); ++i) {
ASSERT_EQ(expected_out[i], static_cast<float>(out_ptr[i])) << "at i = " << i;
}
this->execute(false);
}
TYPED_TEST(pyramid_roi_align_typed_test, smoke_4levels_cached) {
this->execute(true);
}

View File

@ -794,7 +794,7 @@ struct quantize_random_test : testing::TestWithParam<quantize_random_test_params
}
}
void execute_compare(const quantize_random_test_params& params, bool check_result) {
void execute_compare(const quantize_random_test_params& params, bool check_result, bool is_caching_test) {
auto& engine = get_test_engine();
auto in_layout = layout(params.input_type, params.in_format, params.input_size);
@ -840,10 +840,11 @@ struct quantize_random_test : testing::TestWithParam<quantize_random_test_params
ExecutionConfig config;
config.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{"quantize"}));
network net(engine, topo, config);
net.set_input_data("input", input);
cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);
auto result = net.execute();
net->set_input_data("input", input);
auto result = net->execute();
auto output = result.at("quantize").get_memory();
auto input_opt = engine.allocate_memory(in_layout);
@ -909,7 +910,7 @@ struct quantize_random_test_param_generator : std::vector<quantize_random_test_p
TEST_P(quantize_random_test, random) {
auto param = GetParam();
execute_compare(param, true);
execute_compare(param, true, false);
}
INSTANTIATE_TEST_SUITE_P(quantize_smoke,
@ -919,3 +920,23 @@ INSTANTIATE_TEST_SUITE_P(quantize_smoke,
.simple_params(data_types::f32, data_types::u8, format::bs_fs_yx_bsv32_fsv32, format::bs_fs_yx_bsv32_fsv32, 5)
.simple_params(data_types::f32, data_types::u8, format::b_fs_yx_fsv16, format::b_fs_yx_fsv16, 5)
));
#ifdef RUN_ALL_MODEL_CACHING_TESTS
TEST_P(quantize_random_test, random_cached) {
auto param = GetParam();
execute_compare(param, true, true);
}
#else
using quantize_random_test_cached = quantize_random_test;
TEST_P(quantize_random_test_cached, random) {
auto param = GetParam();
execute_compare(param, true, true);
}
INSTANTIATE_TEST_SUITE_P(quantize_smoke,
quantize_random_test_cached,
testing::Values(
quantize_random_test_params{ data_types::f32, data_types::u8, {1, 16, 10, 10}, format::bs_fs_yx_bsv32_fsv32, format::bs_fs_yx_bsv32_fsv32, 5}
));
#endif

View File

@ -30,7 +30,7 @@ struct RandomUniformParams {
template<typename T>
struct random_uniform_gpu_test : public ::testing::TestWithParam<RandomUniformParams<T> > {
public:
void test() {
void test(bool is_caching_test) {
auto data_type = type_to_data_type<T>::value;
RandomUniformParams<T> params = testing::TestWithParam<RandomUniformParams<T> >::GetParam();
@ -56,13 +56,13 @@ public:
ExecutionConfig config;
config.set_property(ov::intel_gpu::optimize_data(true));
cldnn::network net{engine, topology, config};
cldnn::network::ptr net = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
net.set_input_data("shape", shape);
net.set_input_data("min_val", min_val);
net.set_input_data("max_val", max_val);
net->set_input_data("shape", shape);
net->set_input_data("min_val", min_val);
net->set_input_data("max_val", max_val);
auto result = net.execute();
auto result = net->execute();
auto out_mem = result.at("random_uniform").get_memory();
cldnn::mem_lock<T> out_ptr(out_mem, get_test_stream());
@ -105,20 +105,20 @@ using random_uniform_gpu_test_f32 = random_uniform_gpu_test<float>;
using random_uniform_gpu_test_f16 = random_uniform_gpu_test<half_t>;
TEST_P(random_uniform_gpu_test_i32, random_int32) {
ASSERT_NO_FATAL_FAILURE(test());
ASSERT_NO_FATAL_FAILURE(test(false));
}
TEST_P(random_uniform_gpu_test_i64, random_int64) {
ASSERT_NO_FATAL_FAILURE(test());
ASSERT_NO_FATAL_FAILURE(test(false));
}
TEST_P(random_uniform_gpu_test_f32, random_f32) {
ASSERT_NO_FATAL_FAILURE(test());
ASSERT_NO_FATAL_FAILURE(test(false));
}
TEST_P(random_uniform_gpu_test_f16, random_f16) {
ASSERT_NO_FATAL_FAILURE(test());
ASSERT_NO_FATAL_FAILURE(test(false));
}
INSTANTIATE_TEST_SUITE_P(smoke_random_uniform_int32,
@ -186,3 +186,20 @@ INSTANTIATE_TEST_SUITE_P(smoke_random_uniform_f16,
}
),
PrintToStringParamName());
#ifdef RUN_ALL_MODEL_CACHING_TESTS
TEST_P(random_uniform_gpu_test_i32, random_int32_cached) {
ASSERT_NO_FATAL_FAILURE(test(true));
}
TEST_P(random_uniform_gpu_test_i64, random_int64_cached) {
ASSERT_NO_FATAL_FAILURE(test(true));
}
TEST_P(random_uniform_gpu_test_f32, random_f32_cached) {
ASSERT_NO_FATAL_FAILURE(test(true));
}
#endif
TEST_P(random_uniform_gpu_test_f16, random_f16_cached) {
ASSERT_NO_FATAL_FAILURE(test(true));
}

View File

@ -482,7 +482,7 @@ protected:
}
public:
void execute() {
void execute(bool is_caching_test) {
int input_dim = static_cast<int>(input_format.dimension());
cldnn::format layout_format = input_format;
@ -530,12 +530,11 @@ public:
config.set_property(ov::intel_gpu::optimize_data(true));
ov::intel_gpu::ImplementationDesc reduce_impl = {input_format, kernel_name};
config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{{"reduce", reduce_impl}}));
network network(engine, topology, config);
network.set_input_data("input", input_mem);
cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
network->set_input_data("input", input_mem);
network->execute();
network.execute();
auto out_mem = network.get_output("reduce").get_memory();
auto out_mem = network->get_output("reduce").get_memory();
cldnn::mem_lock<output_t> out_ptr(out_mem, get_test_stream());
auto out_lay = out_mem->get_layout();
@ -573,13 +572,13 @@ public:
};
class general_reduce_gpu_i8_i8 : public ReduceTestBase<data_types::i8, data_types::i8> {};
TEST_P(general_reduce_gpu_i8_i8, base) { execute(); }
TEST_P(general_reduce_gpu_i8_i8, base) { execute(false); }
class general_reduce_gpu_i8_f32 : public ReduceTestBase<data_types::i8, data_types::f32> {};
TEST_P(general_reduce_gpu_i8_f32, base) { execute(); }
TEST_P(general_reduce_gpu_i8_f32, base) { execute(false); }
class general_reduce_gpu_f32_f32 : public ReduceTestBase<data_types::f32, data_types::f32> {};
TEST_P(general_reduce_gpu_f32_f32, base) { execute(); }
TEST_P(general_reduce_gpu_f32_f32, base) { execute(false); }
INSTANTIATE_TEST_SUITE_P(reduce_gpu_b_fs_yx_fsv16_i8_i8,
general_reduce_gpu_i8_i8,
@ -770,7 +769,8 @@ INSTANTIATE_TEST_SUITE_P(DISABLED_reduce_gpu_ref_f32_f32,
),
general_reduce_gpu::PrintToStringParamName);
TEST(reduce_gpu, common_bfyx) {
template <typename T>
void test_common_bfyx(bool is_caching_test) {
auto& engine = get_test_engine();
auto input = engine.allocate_memory({data_types::f32, format::bfyx, {1, 1, 1, 1}});
@ -780,26 +780,30 @@ TEST(reduce_gpu, common_bfyx) {
topology.add(input_layout("input", input->get_layout()));
topology.add(reduce("reduce", input_info("input"), reduce_mode::sum, {0}, 0));
network network(engine, topology);
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network.set_input_data("input", input);
network->set_input_data("input", input);
auto outputs = network.execute();
auto outputs = network->execute();
ASSERT_EQ(outputs.size(), size_t(1));
ASSERT_EQ(outputs.begin()->first, "reduce");
auto output = outputs.at("reduce").get_memory();
std::vector<float> ref_data = {1.0f};
std::vector<T> ref_data = {1.0f};
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
cldnn::mem_lock<T> output_ptr(output, get_test_stream());
for (size_t i = 0; i < ref_data.size(); ++i) {
ASSERT_TRUE(are_equal(ref_data[i], output_ptr[i]));
}
}
TEST(reduce_gpu, common_bfyx) {
test_common_bfyx<float>(false);
}
TEST(reduce_gpu, common_bfyx_keepdims) {
auto& engine = get_test_engine();
auto input = engine.allocate_memory({data_types::f32, format::bfyx, {1, 3, 4, 1}});
@ -1823,7 +1827,7 @@ protected:
}
public:
void execute() {
void execute(bool is_caching_test) {
int input_dim = static_cast<int>(input_format.dimension());
cldnn::format layout_format = input_format;
@ -1891,12 +1895,12 @@ public:
config.set_property(ov::intel_gpu::optimize_data(true));
ov::intel_gpu::ImplementationDesc reduce_impl = {input_format, kernel_name};
config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{{"reduce", reduce_impl}}));
network network(engine, topology, config);
network.set_input_data("input", input_mem);
cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
network->set_input_data("input", input_mem);
network.execute();
network->execute();
auto out_mem = network.get_output("reduce").get_memory();
auto out_mem = network->get_output("reduce").get_memory();
cldnn::mem_lock<output_t> out_ptr(out_mem, get_test_stream());
auto out_lay = out_mem->get_layout();
@ -1939,10 +1943,10 @@ public:
class general_reduce_gpu_xy_f32 : public ReduceXYWithBigTensorTestBase<data_types::f32, data_types::f32> {};
TEST_P(general_reduce_gpu_xy_f32, base) { execute(); }
TEST_P(general_reduce_gpu_xy_f32, base) { execute(false); }
class general_reduce_gpu_xy_i8 : public ReduceXYWithBigTensorTestBase<data_types::i8, data_types::i8> {};
TEST_P(general_reduce_gpu_xy_i8, base) { execute(); }
TEST_P(general_reduce_gpu_xy_i8, base) { execute(false); }
INSTANTIATE_TEST_SUITE_P(reduce_gpu_b_fs_yx_fsv16_xy_f32,
general_reduce_gpu_xy_f32,
@ -2111,7 +2115,7 @@ INSTANTIATE_TEST_SUITE_P(onednn_reduce_gpu_b_fs_yx_fsv16_i8_f32,
TestParamType_general_reduce_gpu(17, 3, 1, 1, 14, 11, format::b_fs_yx_fsv16, reduce_mode::mean, {1}, "reduce_gpu_b_fs_yx_fsv16", true, data_types::i8, false, data_types::f32)
), general_reduce_gpu::PrintToStringParamName);
INSTANTIATE_TEST_SUITE_P(onednn_reduce_gpu_b_fs_yx_fsv16_f16_f16,
INSTANTIATE_TEST_SUITE_P(onednn_reduce_gpu_b_fs_yx_fsv16_f16_f16,
onednn_reduce_gpu_f16_f16,
::testing::Values(
TestParamType_general_reduce_gpu(3, 3, 1, 1, 3, 2, format::b_fs_yx_fsv16, reduce_mode::sum, {3, 2, 1, 0}, "reduce_gpu_b_fs_yx_fsv16", false, data_types::f16, false, data_types::f16),
@ -2134,3 +2138,19 @@ INSTANTIATE_TEST_SUITE_P(onednn_reduce_gpu_b_fs_yx_fsv16_i8_f32,
TestParamType_general_reduce_gpu(17, 3, 1, 1, 14, 11, format::b_fs_yx_fsv16, reduce_mode::mean, {1}, "reduce_gpu_b_fs_yx_fsv16", true, data_types::f16, false, data_types::f16)
), general_reduce_gpu::PrintToStringParamName);
#endif // ENABLE_ONEDNN_FOR_GPU
#ifdef RUN_ALL_MODEL_CACHING_TESTS
TEST_P(general_reduce_gpu_i8_i8, base_cached) { execute(true); }
TEST_P(general_reduce_gpu_i8_f32, base_cached) { execute(true); }
TEST_P(general_reduce_gpu_f32_f32, base_cached) { execute(true); }
TEST_P(general_reduce_gpu_xy_f32, base_cached) { execute(true); }
TEST_P(general_reduce_gpu_xy_i8, base_cached) { execute(true); }
#endif // RUN_ALL_MODEL_CACHING_TESTS
TEST(reduce_gpu, common_bfyx_cached) {
test_common_bfyx<float>(true);
}

View File

@ -165,7 +165,7 @@ struct region_yolo_test_params {
};
template <typename T>
void runRegionTest(region_yolo_test_params& params) {
void runRegionTest(region_yolo_test_params& params, bool is_caching_test = false) {
auto& engine = get_test_engine();
const tensor kInputTensor(params.tensor[0], params.tensor[1], params.tensor[2], params.tensor[3]);
auto inputData = generate_random_1d<T>(params.tensor[0] * params.tensor[1] * params.tensor[2] * params.tensor[3], -1, 1);
@ -180,10 +180,11 @@ void runRegionTest(region_yolo_test_params& params) {
params.regionNum, static_cast<uint32_t>(params.mask.size()), params.softMax));
topology.add(reorder("reorder_post", input_info("region_yolo"), format::bfyx, params.dataType));
network network(engine, topology);
network.set_input_data("InputData", inputPrim);
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
auto outputs = network.execute();
network->set_input_data("InputData", inputPrim);
auto outputs = network->execute();
auto output = outputs.at("reorder_post").get_memory();
cldnn::mem_lock<T> outputData(output, get_test_stream());
@ -239,3 +240,44 @@ TEST(region_yolo_gpu_fp16, byxf_softmax) {
region_yolo_test_params params{{ 1, 33, 52, 52 }, { 0, 1, 2 }, 4, 6, 3, data_types::f16, format::byxf, true};
runRegionTest<FLOAT16>(params);
}
#ifdef RUN_ALL_MODEL_CACHING_TESTS
TEST(region_yolo_gpu_fp32, bfyx_cached) {
region_yolo_test_params params{{ 1, 33, 52, 52 }, { 0, 1, 2 }, 4, 6, 3, data_types::f32, format::bfyx, false};
runRegionTest<float>(params, true);
}
TEST(region_yolo_gpu_fp32, bfyx_softmax_cached) {
region_yolo_test_params params{{ 1, 33, 52, 52 }, { 0, 1, 2 }, 4, 6, 3, data_types::f32, format::bfyx, true};
runRegionTest<float>(params, true);
}
TEST(region_yolo_gpu_fp32, byxf_cached) {
region_yolo_test_params params{{ 1, 33, 52, 52 }, { 0, 1, 2 }, 4, 6, 3, data_types::f32, format::byxf, false};
runRegionTest<float>(params, true);
}
TEST(region_yolo_gpu_fp32, byxf_softmax_cached) {
region_yolo_test_params params{{ 1, 33, 52, 52 }, { 0, 1, 2 }, 4, 6, 3, data_types::f32, format::byxf, true};
runRegionTest<float>(params, true);
}
TEST(region_yolo_gpu_fp16, bfyx_cached) {
region_yolo_test_params params{{ 1, 33, 52, 52 }, { 0, 1, 2 }, 4, 6, 3, data_types::f16, format::bfyx, false};
runRegionTest<FLOAT16>(params, true);
}
TEST(region_yolo_gpu_fp16, bfyx_softmax_cached) {
region_yolo_test_params params{{ 1, 33, 52, 52 }, { 0, 1, 2 }, 4, 6, 3, data_types::f16, format::bfyx, true};
runRegionTest<FLOAT16>(params, true);
}
TEST(region_yolo_gpu_fp16, byxf_cached) {
region_yolo_test_params params{{ 1, 33, 52, 52 }, { 0, 1, 2 }, 4, 6, 3, data_types::f16, format::byxf, false};
runRegionTest<FLOAT16>(params, true);
}
#endif // RUN_ALL_MODEL_CACHING_TESTS
TEST(region_yolo_gpu_fp16, byxf_softmax_cached) {
region_yolo_test_params params{{ 1, 33, 52, 52 }, { 0, 1, 2 }, 4, 6, 3, data_types::f16, format::byxf, true};
runRegionTest<FLOAT16>(params, true);
}

View File

@ -14,7 +14,8 @@ using namespace cldnn;
using namespace ::tests;
using namespace testing;
TEST(removing_output_node, multiple_outputs) {
template <typename T>
void test_multiple_outputs(bool is_caching_test) {
// Tests split with crop implementation
// _ strided_slice(bfyx)
// |
@ -58,19 +59,19 @@ TEST(removing_output_node, multiple_outputs) {
topology.add(data("input4", strides));
topology.add(strided_slice("strided_slice", input_info("shuffle_channels"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, { 1 }, {}, {}, {6, 1, 1, 1}));
std::vector<float> input_vec = { 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f };
std::vector<float> out_vec = { 0.0f, 3.0f, 1.0f, 4.0f, 2.0f, 5.0f };
std::vector<T> input_vec = { 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f };
std::vector<T> out_vec = { 0.0f, 3.0f, 1.0f, 4.0f, 2.0f, 5.0f };
set_values(input, input_vec);
ExecutionConfig config;
config.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{ "shuffle_channels", "reshape", "strided_slice" }));
network network(engine, topology, config);
network.set_input_data("input", input);
auto outputs = network.execute();
cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
network->set_input_data("input", input);
auto outputs = network->execute();
auto output = outputs.at("reshape").get_memory();
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
cldnn::mem_lock<T> output_ptr(output, get_test_stream());
ASSERT_TRUE(output->get_layout().get_tensor() == after_reshape);
@ -80,7 +81,7 @@ TEST(removing_output_node, multiple_outputs) {
// checking the output node has the same name after output node deleting due to StridedSlice optimization
ASSERT_TRUE(outputs.find("strided_slice") != outputs.end());
auto output2 = outputs.at("strided_slice").get_memory();
cldnn::mem_lock<float> output_ptr2(output, get_test_stream());
cldnn::mem_lock<T> output_ptr2(output, get_test_stream());
ASSERT_TRUE(output2->get_layout().get_tensor() == after_strided_slice);
@ -88,7 +89,12 @@ TEST(removing_output_node, multiple_outputs) {
ASSERT_EQ(output_ptr2[i], out_vec[i]);
}
TEST(removing_output_node, output_node_optimization) {
TEST(removing_output_node, multiple_outputs) {
test_multiple_outputs<float>(false);
}
template <typename T>
void test_output_node_optimization(bool is_caching_test) {
// Filter : 2x3
// Stride : 2x1
// Input : 4x5
@ -115,7 +121,7 @@ TEST(removing_output_node, output_node_optimization) {
set_values(input, { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 2.0f, 2.0f, 3.0f, 4.0f, 6.0f, 3.0f, 3.0f, 3.0f, 5.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f });
set_values(weights, { 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f });
VVF<float> output_vec = {
VVF<T> output_vec = {
{ 20.0f, 27.0f, 38.0f },
{ 17.0f, 19.0f, 19.0f } };
@ -125,17 +131,17 @@ TEST(removing_output_node, output_node_optimization) {
topology.add(convolution("conv", input_info("input"), { "weights" }, { 2, 1 }));
topology.add(activation("relu", input_info("conv"), activation_func::relu));
network network(engine, topology);
network.set_input_data("input", input);
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("input", input);
// checking the output node has the same name after output node deleting due to ReLU optimization
auto outputs = network.execute();
auto outputs = network->execute();
ASSERT_EQ(outputs.size(), size_t(1));
ASSERT_EQ(outputs.begin()->first, "relu");
auto output_memory = outputs.at("relu").get_memory();
auto output_layout = output_memory->get_layout();
cldnn::mem_lock<float> output_ptr(output_memory, get_test_stream());
cldnn::mem_lock<T> output_ptr(output_memory, get_test_stream());
int y_size = output_layout.spatial(1);
int x_size = output_layout.spatial(0);
@ -152,3 +158,16 @@ TEST(removing_output_node, output_node_optimization) {
}
}
}
TEST(removing_output_node, output_node_optimization) {
test_output_node_optimization<float>(false);
}
#ifdef RUN_ALL_MODEL_CACHING_TESTS
TEST(removing_output_node, multiple_outputs_cached) {
test_multiple_outputs<float>(true);
}
#endif
TEST(removing_output_node, output_node_optimization_cached) {
test_output_node_optimization<float>(true);
}

View File

@ -41,7 +41,8 @@ static void compare_result(std::map<cldnn::primitive_id, cldnn::network_output>
static void compare_bfyx2blocked_with_ref(const std::string& kernel_name,
const data_types input_data_type, const data_types output_data_type,
cldnn::format input_format, cldnn::format output_format,
int32_t b_in, int32_t f_in, int32_t x_in, int32_t y_in, int32_t z_in = 0, int32_t w_in = 0) {
int32_t b_in, int32_t f_in, int32_t x_in, int32_t y_in, int32_t z_in, int32_t w_in,
bool is_caching_test) {
auto& engine = get_test_engine();
tensor ts;
@ -87,10 +88,11 @@ static void compare_bfyx2blocked_with_ref(const std::string& kernel_name,
ov::intel_gpu::ImplementationDesc reorder_ref = { output_format, "reorder_data" };
config_ref.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"reorder", reorder_ref} }));
network network_ref(engine, topology, config_ref);
network_ref.set_input_data("input", input);
cldnn::network::ptr network_ref = get_network(engine, topology, config_ref, get_test_stream_ptr(), is_caching_test);
auto outputs_ref = network_ref.execute();
network_ref->set_input_data("input", input);
auto outputs_ref = network_ref->execute();
cldnn::event::ptr e1 = outputs_ref.at("reorder").get_event();
e1->wait();
@ -99,10 +101,11 @@ static void compare_bfyx2blocked_with_ref(const std::string& kernel_name,
ov::intel_gpu::ImplementationDesc reorder_optimized = { output_format, kernel_name };
config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"reorder", reorder_optimized} }));
network network(engine, topology, config);
network.set_input_data("input", input);
cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
auto outputs = network.execute();
network->set_input_data("input", input);
auto outputs = network->execute();
cldnn::event::ptr e2 = outputs.at("reorder").get_event();
e2->wait();
@ -123,124 +126,124 @@ static void compare_bfyx2blocked_with_ref(const std::string& kernel_name,
TEST(reorder_gpu_optimization, compare_with_ref__b_fs_yx_fsv32_to_bfyx_f32) {
// b_fs_yx_fsv32 -> bfyx
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfyx, 3, 64 + 5, 16 + 11, 3);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfyx, 3, 96 - 12, 16 + 4, 3);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfyx, 3, 64 + 5, 16 + 11, 3, 0, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfyx, 3, 96 - 12, 16 + 4, 3, 0, 0, false);
// b_fs_zyx_fsv32 -> bfzyx
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfzyx, 3, 64 + 9, 16 - 1, 2, 8);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfzyx, 2, 64 + 30, 16 + 1, 3, 4);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfzyx, 3, 64 + 9, 16 - 1, 2, 8, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfzyx, 2, 64 + 30, 16 + 1, 3, 4, 0, false);
// incremental dims
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfzyx, 2, 64 + 4, 24 - 1, 3);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfwzyx, 2, 64 + 2, 32 - 3, 4);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_zyx_fsv32, format::bfwzyx, 1, 96 + 10, 32 - 3, 4, 3);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfzyx, 2, 64 + 4, 24 - 1, 3, 0, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfwzyx, 2, 64 + 2, 32 - 3, 4, 0, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_zyx_fsv32, format::bfwzyx, 1, 96 + 10, 32 - 3, 4, 3, 0, false);
}
TEST(reorder_gpu_optimization, compare_with_ref__b_fs_yx_fsv32_to_bfyx_different_datatype) {
// f32 -> other types
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::u8, format::b_fs_yx_fsv32, format::bfyx, 2, 64, 8 + 7, 2);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::i64, format::b_fs_yx_fsv32, format::bfyx, 2, 64, 16 + 2, 2);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f16, format::b_fs_yx_fsv32, format::bfyx, 1, 64, 16 + 1, 2);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::u8, format::b_fs_yx_fsv32, format::bfyx, 2, 64, 8 + 7, 2, 0, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::i64, format::b_fs_yx_fsv32, format::bfyx, 2, 64, 16 + 2, 2, 0, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f16, format::b_fs_yx_fsv32, format::bfyx, 1, 64, 16 + 1, 2, 0, 0, false);
// i32 -> other types
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::i8, format::b_fs_yx_fsv32, format::bfyx, 2, 64, 8 + 7, 2);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::i64, format::b_fs_yx_fsv32, format::bfyx, 2, 64, 16 + 2, 2);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::f16, format::b_fs_yx_fsv32, format::bfyx, 1, 64, 16 + 1, 2);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::i8, format::b_fs_yx_fsv32, format::bfyx, 2, 64, 8 + 7, 2, 0, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::i64, format::b_fs_yx_fsv32, format::bfyx, 2, 64, 16 + 2, 2, 0, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::f16, format::b_fs_yx_fsv32, format::bfyx, 1, 64, 16 + 1, 2, 0, 0, false);
}
TEST(reorder_gpu_optimization, compare_with_ref__b_fs_yx_fsv16_to_bfyx_f32) {
// u-net
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx, 1, 64, 388, 388);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx, 1, 64, 388, 388, 0, 0, false);
// b_fs_yx_fsv16 -> bfyx
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx, 3, 48 + 1, 16, 3);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx, 2, 32 - 1, 24 - 1, 3);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx, 3, 48 + 1, 16, 3, 0, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx, 2, 32 - 1, 24 - 1, 3, 0, 0, false);
// b_fs_zyx_fsv16 -> bfzyx
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_zyx_fsv16, format::bfzyx, 5, 48 - 1, 16, 3, 8);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_zyx_fsv16, format::bfzyx, 2, 32 + 1, 24 - 1, 3, 17);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_zyx_fsv16, format::bfzyx, 5, 48 - 1, 16, 3, 8, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_zyx_fsv16, format::bfzyx, 2, 32 + 1, 24 - 1, 3, 17, 0, false);
// incremental dims
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfzyx, 3, 32 - 1, 24 - 1, 3);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfwzyx, 4, 16 + 1, 32 - 3, 4);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_zyx_fsv16, format::bfwzyx, 3, 16 + 2, 32 - 3, 4, 9);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfzyx, 3, 32 - 1, 24 - 1, 3, 0, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfwzyx, 4, 16 + 1, 32 - 3, 4, 0, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_zyx_fsv16, format::bfwzyx, 3, 16 + 2, 32 - 3, 4, 9, 0, false);
}
TEST(reorder_gpu_optimization, compare_with_ref__b_fs_yx_fsv16_to_bfyx_different_datatype) {
// f32 -> other types
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::u8, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::i8, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::i32, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::i64, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f16, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::u8, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::i8, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::i32, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::i64, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f16, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, false);
// i32 -> other types
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::u8, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::i8, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::i64, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::f16, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::u8, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::i8, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::i64, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::f16, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, false);
}
TEST(reorder_gpu_optimization, compare_with_ref__bfyx_to_blocked_f32) {
// bfyx_to_b_fs_yx_fsv4
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv4, 4, 32, 16, 4);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv4, 3, 32 + 2, 32 + 3, 4);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv4, 4, 32, 16, 4, 0, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv4, 3, 32 + 2, 32 + 3, 4, 0, 0, false);
// bfyx_to_b_fs_yx_fsv16
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv16, 2, 48, 8, 4);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv16, 3, 32 + 4, 16 + 7, 2);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv16, 2, 48, 8, 4, 0, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv16, 3, 32 + 4, 16 + 7, 2, 0, 0, false);
// bfyx to b_fs_yx_fsv32
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv32, 2, 64, 64, 4);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv32, 4, 32 + 6, 96 - 4, 2);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv32, 2, 64, 64, 4, 0, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv32, 4, 32 + 6, 96 - 4, 2, 0, 0, false);
// bfyx to fs_b_yx_fsv32
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::fs_b_yx_fsv32, 2, 64, 8, 4);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::fs_b_yx_fsv32, 3, 64 + 5, 8 + 7, 2);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::fs_b_yx_fsv32, 2, 64, 8, 4, 0, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::fs_b_yx_fsv32, 3, 64 + 5, 8 + 7, 2, 0, 0, false);
// bfzyx to b_fs_zyx_fsv16
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::b_fs_zyx_fsv16, 2, 48, 8, 4, 4);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::b_fs_zyx_fsv16, 3, 32 + 5, 16 + 7, 2, 2);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::b_fs_zyx_fsv16, 2, 48, 8, 4, 4, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::b_fs_zyx_fsv16, 3, 32 + 5, 16 + 7, 2, 2, 0, false);
// bfzyx to b_fs_zyx_fsv32
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::b_fs_zyx_fsv32, 2, 64, 8, 4, 4);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::b_fs_zyx_fsv32, 3, 64 + 5, 8 + 7, 2, 2);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::b_fs_zyx_fsv32, 2, 64, 8, 4, 4, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::b_fs_zyx_fsv32, 3, 64 + 5, 8 + 7, 2, 2, 0, false);
}
TEST(reorder_gpu_optimization, compare_with_ref__bfyx_to_double_blocked_f32) {
// bfyx to double blocked format (bs_fs_yx_bsv16_fsv16)
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32, 48, 8, 4); // no
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32 + 2, 48, 16, 4); // b
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32, 48 + 5, 16, 4); // f
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32, 48, 48 + 3, 4); // x
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32 + 2, 48 + 3, 16 + 1, 4); // b-f-x
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32, 48, 8, 4, 0, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32 + 2, 48, 16, 4, 0, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32, 48 + 5, 16, 4, 0, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32, 48, 48 + 3, 4, 0, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32 + 2, 48 + 3, 16 + 1, 4, 0, 0, false);
// bfzyx to double blocked format (bs_fs_zyx_bsv16_fsv16)
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32, 48, 8, 4, 16); // no
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32 + 2, 48, 16, 4, 2); // b
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32, 48 + 5, 16, 4, 3); // f
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32, 48, 48 + 3, 4, 4); // x
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32 + 2, 48 + 3, 16 + 1, 4, 2); // b-f-x
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32, 48, 8, 4, 16, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32 + 2, 48, 16, 4, 2, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32, 48 + 5, 16, 4, 3, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32, 48, 48 + 3, 4, 4, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32 + 2, 48 + 3, 16 + 1, 4, 2, 0, false);
}
TEST(reorder_gpu_optimization, compare_with_ref__bfyx_to_double_blocked_f32_bsv16_fsv32) {
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 3, 16, 4, 5, 7);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 1, 1, 1, 1, 1);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 32 + 2, 48, 16, 4, 2);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 32 + 1, 1, 1, 1, 1);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 32, 48 + 5, 16, 4, 3);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 32, 48, 48 + 3, 4, 4);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 32 + 2, 48 + 3, 16 + 1, 4, 2);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 3, 16, 4, 5, 7, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 1, 1, 1, 1, 1, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 32 + 2, 48, 16, 4, 2, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 32 + 1, 1, 1, 1, 1, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 32, 48 + 5, 16, 4, 3, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 32, 48, 48 + 3, 4, 4, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 32 + 2, 48 + 3, 16 + 1, 4, 2, 0, false);
}
TEST(reorder_gpu_optimization, compare_with_ref__bfyx_to_double_blocked_f32_bsv32_fsv16) {
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv16, 1, 1, 1, 1, 1);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv16, 32 + 2, 48, 16, 4, 2);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv16, 32, 48 + 5, 16, 4, 3);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv16, 32, 48, 48 + 3, 4, 4);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv16, 32 + 2, 48 + 3, 16 + 1, 4, 2);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv16, 1, 1, 1, 1, 1, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv16, 32 + 2, 48, 16, 4, 2, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv16, 32, 48 + 5, 16, 4, 3, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv16, 32, 48, 48 + 3, 4, 4, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv16, 32 + 2, 48 + 3, 16 + 1, 4, 2, 0, false);
}
TEST(reorder_gpu_optimization, compare_with_ref__bfyx_to_double_blocked_f32_bsv32_fsv32) {
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv32, 1, 1, 1, 1, 1);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv32, 32 + 2, 48, 16, 4, 2);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv32, 32, 48 + 5, 16, 4, 3);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv32, 32, 48, 48 + 3, 4, 4);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv32, 32 + 2, 48 + 3, 16 + 1, 4, 2);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv32, 1, 1, 1, 1, 1, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv32, 32 + 2, 48, 16, 4, 2, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv32, 32, 48 + 5, 16, 4, 3, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv32, 32, 48, 48 + 3, 4, 4, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv32, 32 + 2, 48 + 3, 16 + 1, 4, 2, 0, false);
}
TEST(reorder_gpu_optimization, compare_with_ref__bfyx_to_blocked_format_different_datatype) {
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f16, format::bfyx, format::b_fs_yx_fsv16, 3, 32 + 4, 16 + 7, 2);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::i8, data_types::f32, format::bfyx, format::b_fs_yx_fsv16, 3, 32 + 4, 16 + 7, 2);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::i64, data_types::f32, format::bfyx, format::b_fs_yx_fsv16, 3, 32 + 4, 16 + 7, 2);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f16, format::bfyx, format::b_fs_yx_fsv16, 3, 32 + 4, 16 + 7, 2, 0, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::i8, data_types::f32, format::bfyx, format::b_fs_yx_fsv16, 3, 32 + 4, 16 + 7, 2, 0, 0, false);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::i64, data_types::f32, format::bfyx, format::b_fs_yx_fsv16, 3, 32 + 4, 16 + 7, 2, 0, 0, false);
}
TEST(reorder_gpu_optimization, bfyx_to_fsv16_without_f_remainder) {
@ -2575,12 +2578,14 @@ public:
static const int max_random = 200;
std::vector<primitive_id> executed_prims;
void execute(T& p) {
void execute(T& p, bool is_caching_test) {
auto input_prim = this->get_mem(get_input_layout(p));
network network_test(this->engine, this->topology_test, this->config);
network_test.set_input_data("input", input_prim);
executed_prims = network_test.get_executed_primitive_ids();
cldnn::network::ptr network_test = get_network(this->engine, this->topology_test, this->config, get_test_stream_ptr(), is_caching_test);
network_test->set_input_data("input", input_prim);
executed_prims = network_test->get_executed_primitive_ids();
}
bool check_optimized_out(T& p, primitive_id target_id) {
@ -2659,7 +2664,7 @@ TEST_P(testing_removal_reorder, removal_reorder_1d_along_f) {
reorder("reorder_bfyx", input_info("add_bias1"), p.default_format, data_types::f16)
);
execute(p);
execute(p, false);
ASSERT_EQ(check_optimized_out(p, "reorder_bias1"), true);
}
@ -2682,7 +2687,7 @@ TEST_P(testing_removal_reorder, only_remove_reorder_shallow_depth_input) {
reorder("reorder_output", input_info("resample"), p.default_format, data_types::f32)
);
execute(p);
execute(p, false);
ASSERT_EQ(check_optimized_out(p, "reorder_conv"), false);
}
@ -2713,7 +2718,7 @@ TEST_P(testing_removal_reorder, removal_no_padded_reorder) {
setup_with_build_ops(config);
execute(p);
execute(p, false);
ASSERT_EQ(check_optimized_out(p, "reorder_conv"), true);
}
@ -2743,7 +2748,7 @@ TEST_P(testing_removal_reorder, removal_padded_reorder) {
setup_with_build_ops(config);
execute(p);
execute(p, false);
ASSERT_EQ(check_optimized_out(p, "reorder_conv"), false);
}
@ -2810,3 +2815,166 @@ TEST(reorder_onednn_gpu, basic_convert_int8) {
}
}
#endif // ENABLE_ONEDNN_FOR_GPU
#ifdef RUN_ALL_MODEL_CACHING_TESTS
TEST(reorder_gpu_optimization, compare_with_ref__b_fs_yx_fsv32_to_bfyx_f32_cached) {
// b_fs_yx_fsv32 -> bfyx
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfyx, 3, 64 + 5, 16 + 11, 3, 0, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfyx, 3, 96 - 12, 16 + 4, 3, 0, 0, true);
// b_fs_zyx_fsv32 -> bfzyx
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfzyx, 3, 64 + 9, 16 - 1, 2, 8, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfzyx, 2, 64 + 30, 16 + 1, 3, 4, 0, true);
// incremental dims
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfzyx, 2, 64 + 4, 24 - 1, 3, 0, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfwzyx, 2, 64 + 2, 32 - 3, 4, 0, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_zyx_fsv32, format::bfwzyx, 1, 96 + 10, 32 - 3, 4, 3, 0, true);
}
TEST(reorder_gpu_optimization, compare_with_ref__b_fs_yx_fsv32_to_bfyx_different_datatype_cached) {
// f32 -> other types
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::u8, format::b_fs_yx_fsv32, format::bfyx, 2, 64, 8 + 7, 2, 0, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::i64, format::b_fs_yx_fsv32, format::bfyx, 2, 64, 16 + 2, 2, 0, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f16, format::b_fs_yx_fsv32, format::bfyx, 1, 64, 16 + 1, 2, 0, 0, true);
// i32 -> other types
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::i8, format::b_fs_yx_fsv32, format::bfyx, 2, 64, 8 + 7, 2, 0, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::i64, format::b_fs_yx_fsv32, format::bfyx, 2, 64, 16 + 2, 2, 0, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::f16, format::b_fs_yx_fsv32, format::bfyx, 1, 64, 16 + 1, 2, 0, 0, true);
}
TEST(reorder_gpu_optimization, compare_with_ref__b_fs_yx_fsv16_to_bfyx_f32_cached) {
// u-net
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx, 1, 64, 388, 388, 0, 0, true);
// b_fs_yx_fsv16 -> bfyx
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx, 3, 48 + 1, 16, 3, 0, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx, 2, 32 - 1, 24 - 1, 3, 0, 0, true);
// b_fs_zyx_fsv16 -> bfzyx
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_zyx_fsv16, format::bfzyx, 5, 48 - 1, 16, 3, 8, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_zyx_fsv16, format::bfzyx, 2, 32 + 1, 24 - 1, 3, 17, 0, true);
// incremental dims
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfzyx, 3, 32 - 1, 24 - 1, 3, 0, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfwzyx, 4, 16 + 1, 32 - 3, 4, 0, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_zyx_fsv16, format::bfwzyx, 3, 16 + 2, 32 - 3, 4, 9, 0, true);
}
TEST(reorder_gpu_optimization, compare_with_ref__b_fs_yx_fsv16_to_bfyx_different_datatype_cached) {
// f32 -> other types
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::u8, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::i8, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::i32, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::i64, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f16, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, true);
// i32 -> other types
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::u8, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::i8, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::i64, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::f16, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, true);
}
TEST(reorder_gpu_optimization, compare_with_ref__bfyx_to_blocked_f32_cached) {
// bfyx_to_b_fs_yx_fsv4
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv4, 4, 32, 16, 4, 0, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv4, 3, 32 + 2, 32 + 3, 4, 0, 0, true);
// bfyx_to_b_fs_yx_fsv16
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv16, 2, 48, 8, 4, 0, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv16, 3, 32 + 4, 16 + 7, 2, 0, 0, true);
// bfyx to b_fs_yx_fsv32
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv32, 2, 64, 64, 4, 0, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv32, 4, 32 + 6, 96 - 4, 2, 0, 0, true);
// bfyx to fs_b_yx_fsv32
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::fs_b_yx_fsv32, 2, 64, 8, 4, 0, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::fs_b_yx_fsv32, 3, 64 + 5, 8 + 7, 2, 0, 0, true);
// bfzyx to b_fs_zyx_fsv16
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::b_fs_zyx_fsv16, 2, 48, 8, 4, 4, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::b_fs_zyx_fsv16, 3, 32 + 5, 16 + 7, 2, 2, 0, true);
// bfzyx to b_fs_zyx_fsv32
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::b_fs_zyx_fsv32, 2, 64, 8, 4, 4, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::b_fs_zyx_fsv32, 3, 64 + 5, 8 + 7, 2, 2, 0, true);
}
TEST(reorder_gpu_optimization, compare_with_ref__bfyx_to_double_blocked_f32_cached) {
// bfyx to double blocked format (bs_fs_yx_bsv16_fsv16)
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32, 48, 8, 4, 0, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32 + 2, 48, 16, 4, 0, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32, 48 + 5, 16, 4, 0, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32, 48, 48 + 3, 4, 0, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32 + 2, 48 + 3, 16 + 1, 4, 0, 0, true);
// bfzyx to double blocked format (bs_fs_zyx_bsv16_fsv16)
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32, 48, 8, 4, 16, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32 + 2, 48, 16, 4, 2, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32, 48 + 5, 16, 4, 3, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32, 48, 48 + 3, 4, 4, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32 + 2, 48 + 3, 16 + 1, 4, 2, 0, true);
}
TEST(reorder_gpu_optimization, compare_with_ref__bfyx_to_double_blocked_f32_bsv16_fsv32_cached) {
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 3, 16, 4, 5, 7, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 1, 1, 1, 1, 1, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 32 + 2, 48, 16, 4, 2, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 32 + 1, 1, 1, 1, 1, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 32, 48 + 5, 16, 4, 3, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 32, 48, 48 + 3, 4, 4, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 32 + 2, 48 + 3, 16 + 1, 4, 2, 0, true);
}
TEST(reorder_gpu_optimization, compare_with_ref__bfyx_to_double_blocked_f32_bsv32_fsv16_cached) {
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv16, 1, 1, 1, 1, 1, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv16, 32 + 2, 48, 16, 4, 2, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv16, 32, 48 + 5, 16, 4, 3, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv16, 32, 48, 48 + 3, 4, 4, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv16, 32 + 2, 48 + 3, 16 + 1, 4, 2, 0, true);
}
TEST(reorder_gpu_optimization, compare_with_ref__bfyx_to_double_blocked_f32_bsv32_fsv32_cached) {
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv32, 1, 1, 1, 1, 1, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv32, 32 + 2, 48, 16, 4, 2, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv32, 32, 48 + 5, 16, 4, 3, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv32, 32, 48, 48 + 3, 4, 4, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv32, 32 + 2, 48 + 3, 16 + 1, 4, 2, 0, true);
}
TEST(reorder_gpu_optimization, compare_with_ref__bfyx_to_blocked_format_different_datatype_cached) {
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f16, format::bfyx, format::b_fs_yx_fsv16, 3, 32 + 4, 16 + 7, 2, 0, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::i8, data_types::f32, format::bfyx, format::b_fs_yx_fsv16, 3, 32 + 4, 16 + 7, 2, 0, 0, true);
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::i64, data_types::f32, format::bfyx, format::b_fs_yx_fsv16, 3, 32 + 4, 16 + 7, 2, 0, 0, true);
}
TEST_P(testing_removal_reorder, removal_reorder_1d_along_f_cached) {
auto p = GetParam();
create_topologies(input_layout("input", get_input_layout(p)),
reorder("reorder_input", input_info("input"), format::b_fs_yx_fsv16, data_types::f16),
data("weights", get_mem(get_weights_layout(p))),
data("bias1", get_mem(get_bias_layout(p))),
reorder("reorder_bias1", input_info("bias1"), format::b_fs_yx_fsv16, data_types::f16),
convolution("conv_prim", input_info("reorder_input"), {"weights"}, std::vector<primitive_id>{}, 1, p.stride, p.pad),
reorder("reorder_conv", input_info("conv_prim"), format::b_fs_yx_fsv16, data_types::f16),
eltwise("add_bias1", { input_info("reorder_conv"), input_info("reorder_bias1") }, eltwise_mode::sum),
reorder("reorder_bfyx", input_info("add_bias1"), p.default_format, data_types::f16)
);
execute(p, true);
ASSERT_EQ(check_optimized_out(p, "reorder_bias1"), true);
}
#endif
TEST_P(testing_removal_reorder, only_remove_reorder_shallow_depth_input_cached) {
auto p = GetParam();
layout reorder_layout(data_types::u8, format::b_fs_yx_fsv32, p.in_shape, padding({0, }, 0));
create_topologies(input_layout("input", get_input_layout(p)),
data("weights", get_mem(get_weights_layout(p))),
data("bias", get_mem(get_bias_layout(p))),
data("weights_sec", get_mem(get_weights_layout(p))),
reorder("reorder_fp32", input_info("input"), format::bfyx, data_types::f32),
convolution("conv_prim", input_info("reorder_fp32"), { "weights" }, { "bias" }, 1, p.stride, p.pad, {1, 1}, p.in_shape, data_types::u8, false),
reorder("reorder_conv", input_info("conv_prim"), reorder_layout),
convolution("conv_output", input_info("reorder_conv"), { "weights_sec" }, 1, p.stride, p.pad),
reorder("reorder_bfyx", input_info("conv_output"), format::b_fs_yx_fsv32, data_types::f32),
resample("resample", input_info("reorder_bfyx"), p.out_shape, 1),
reorder("reorder_output", input_info("resample"), p.default_format, data_types::f32)
);
execute(p, true);
ASSERT_EQ(check_optimized_out(p, "reorder_conv"), false);
}

View File

@ -290,21 +290,21 @@ template<typename T>
struct reorg_yolo_test
: public ::testing::TestWithParam<ReorgYoloParamsWithLayout<T> > {
public:
void test() {
void test(bool is_caching_test) {
ReorgYoloParams<T> params;
format::type target_format;
bool should_fail;
std::tie(params, target_format, should_fail) = this->GetParam();
if (should_fail) {
ASSERT_ANY_THROW(run_test(params, target_format));
ASSERT_ANY_THROW(run_test(params, target_format, is_caching_test));
} else {
ASSERT_NO_FATAL_FAILURE(run_test(params, target_format));
ASSERT_NO_FATAL_FAILURE(run_test(params, target_format, is_caching_test));
}
}
private:
void run_test(const ReorgYoloParams<T>& params, const format::type target_format) {
void run_test(const ReorgYoloParams<T>& params, const format::type target_format, bool is_caching_test) {
const auto data_type = type_to_data_type<T>::value;
const format::type plain_format = format::bfyx;
@ -320,9 +320,9 @@ private:
topology.add(reorg_yolo("reorg_yolo", input_info("input_reordered"), params.stride));
topology.add(reorder("reorg_yolo_reordered", input_info("reorg_yolo"), plain_format, data_type));
network network(engine, topology);
network.set_input_data("input", input);
const auto result = network.execute();
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("input", input);
const auto result = network->execute();
auto out_mem = result.at("reorg_yolo_reordered").get_memory();
cldnn::mem_lock<T> out_ptr(out_mem, get_test_stream());
@ -339,11 +339,11 @@ using test_f32 = reorg_yolo_test<float>;
using test_f16 = reorg_yolo_test<half_t>;
TEST_P(test_f32, basic) {
test();
test(false);
}
TEST_P(test_f16, basic) {
test();
test(false);
}
@ -371,3 +371,12 @@ INSTANTIATE_TEST_SUITE_P(reorg_yolo_invalid_input,
::testing::Values(format::bfyx),
::testing::Values(true)),
PrintToStringParamName());
#ifdef RUN_ALL_MODEL_CACHING_TESTS
TEST_P(test_f32, basic_cached) {
test(true);
}
#endif
TEST_P(test_f16, basic_cached) {
test(true);
}

View File

@ -12,7 +12,8 @@
using namespace cldnn;
using namespace ::tests;
TEST(resample_gpu, basic_in2x3x2x2_nearest) {
template <typename T>
void test_basic_in2x3x2x2_nearest(bool is_caching_test) {
// Input : 2x2x3x2
// Output : 2x2x6x4
// Sample Type: Nearest
@ -46,16 +47,16 @@ TEST(resample_gpu, basic_in2x3x2x2_nearest) {
12.f, 9.f, -17.f,
});
cldnn::network net{ engine, topology };
cldnn::network::ptr net = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
net.set_input_data("input", input);
net->set_input_data("input", input);
auto outputs = net.execute();
auto outputs = net->execute();
auto output = outputs.at("upsampling").get_memory();
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
cldnn::mem_lock<T> output_ptr(output, get_test_stream());
float answers[96] = {
T answers[96] = {
1.f, 1.f, 2.f, 2.f, -10.f, -10.f,
1.f, 1.f, 2.f, 2.f, -10.f, -10.f,
3.f, 3.f, 4.f, 4.f, -14.f, -14.f,
@ -86,6 +87,10 @@ TEST(resample_gpu, basic_in2x3x2x2_nearest) {
}
}
TEST(resample_gpu, basic_in2x3x2x2_nearest) {
test_basic_in2x3x2x2_nearest<float>(false);
}
TEST(resample_gpu, basic_in2x3x2x2_bilinear) {
// Input : 1x1x2x2
// Output : 1x1x4x4
@ -456,7 +461,7 @@ struct resample_random_test : testing::TestWithParam<resample_random_test_params
}
}
void execute(const resample_random_test_params& params) {
void execute(const resample_random_test_params& params, bool is_caching_test) {
auto& engine = get_test_engine();
auto in_layout = layout(params.input_type, params.in_format, params.input_size);
@ -467,26 +472,27 @@ struct resample_random_test : testing::TestWithParam<resample_random_test_params
topo.add(prim);
ExecutionConfig config(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"resample", {params.out_format, ""}} }));
cldnn::network net(engine, topo, config);
cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);
auto in_mem = engine.allocate_memory(in_layout);
fill_random(in_mem);
net.set_input_data("in", in_mem);
net->set_input_data("in", in_mem);
auto result = net.execute();
auto result = net->execute();
auto output = result.at("resample").get_memory();
std::string kernel = "";
for (auto& info : net.get_primitives_info()) {
if (info.original_id == "resample")
kernel = info.kernel_id;
if (!is_caching_test) {
for (auto& info : net->get_primitives_info()) {
if (info.original_id == "resample")
kernel = info.kernel_id;
}
}
}
};
TEST_P(resample_random_test, random) {
execute(GetParam());
execute(GetParam(), false);
}
struct resample_random_test_param_generator : std::vector<resample_random_test_params> {
@ -611,7 +617,7 @@ struct caffe_resample_random_test : testing::TestWithParam<caffe_resample_random
}
}
void execute_compare(const caffe_resample_random_test_params& params, bool check_result) {
void execute_compare(const caffe_resample_random_test_params& params, bool check_result, bool is_caching_test) {
auto& engine = get_test_engine();
auto in_layout = layout(params.input_type, params.in_format, params.input_size);
@ -647,12 +653,12 @@ struct caffe_resample_random_test : testing::TestWithParam<caffe_resample_random
config_opt.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{"resample_opt"}));
config_opt.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"resample_opt", {params.in_format, "resample_opt"}} }));
cldnn::network net_opt(engine, topo_opt, config_opt);
cldnn::network::ptr net_opt = get_network(engine, topo_opt, config_opt, get_test_stream_ptr(), is_caching_test);
// Use in_mem from ref network
net_opt.set_input_data("in", in_mem);
net_opt->set_input_data("in", in_mem);
auto result_opt = net_opt.execute();
auto result_opt = net_opt->execute();
auto output_opt = result_opt.at("resample_opt").get_memory();
if (check_result == true) {
@ -695,7 +701,7 @@ struct caffe_resample_random_test_param_generator : std::vector<caffe_resample_r
TEST_P(caffe_resample_random_test, random) {
auto param = GetParam();
execute_compare(param, true);
execute_compare(param, true, false);
}
INSTANTIATE_TEST_SUITE_P(caffe_smoke_caffe_fsv16,
@ -2004,7 +2010,8 @@ struct resample_opt_random_test : testing::TestWithParam<resample_opt_random_tes
}
}
void execute_compare(const resample_opt_random_test_params& params, bool check_result, const std::string& kernel = "resample_opt") {
void execute_compare(const resample_opt_random_test_params& params, bool check_result,
bool is_caching_test, const std::string& kernel = "resample_opt") {
auto& engine = get_test_engine();
const format origin_format = format::dimension(params.in_format) == 4 ? format::bfyx : format::bfzyx;
@ -2042,13 +2049,13 @@ struct resample_opt_random_test : testing::TestWithParam<resample_opt_random_tes
ExecutionConfig config_opt;
config_opt.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{"resample_opt", "res_to_bfyx"}));
network net_opt(engine, topo_opt, config_opt);
cldnn::network::ptr net_opt = get_network(engine, topo_opt, config_opt, get_test_stream_ptr(), is_caching_test);
// Use in_mem from ref network
net_opt.set_input_data("in", in_mem);
net_opt->set_input_data("in", in_mem);
// first execution of opt
auto result_opt = net_opt.execute();
auto result_opt = net_opt->execute();
auto output_opt = result_opt.at("res_to_bfyx").get_memory();
if (!format::is_simple_data_format(params.in_format)) {
ASSERT_FALSE(format::is_simple_data_format(result_opt.at("resample_opt").get_memory()->get_layout().format));
@ -2176,7 +2183,7 @@ struct resample_opt_random_test_ext : resample_opt_random_test
TEST_P(resample_opt_random_test, random) {
auto param = GetParam();
execute_compare(param, true);
execute_compare(param, true, false);
}
TEST_P(resample_opt_random_test_ext, DISABLED_random) {
@ -2329,3 +2336,22 @@ INSTANTIATE_TEST_SUITE_P(resample_opt_smoke_linear_onnx_5d_3axes_simple,
{ data_types::f16, {1, 16, 13, 13, 13}, {1, 16, 26, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_yx_fsv16, format::b_fs_yx_fsv32, {}, {}},
}
));
#ifdef RUN_ALL_MODEL_CACHING_TESTS
TEST_P(resample_random_test, random_cached) {
execute(GetParam(), true);
}
TEST_P(caffe_resample_random_test, random_cached) {
auto param = GetParam();
execute_compare(param, true, true);
}
TEST_P(resample_opt_random_test, random_cached) {
auto param = GetParam();
execute_compare(param, true, true);
}
#endif
TEST(resample_gpu, basic_in2x3x2x2_nearest_cached) {
test_basic_in2x3x2x2_nearest<float>(true);
}

View File

@ -26,7 +26,7 @@ void verify_int(const int32_t& output_value, const int32_t& value) {
template <class ElemType>
void generic_reshape_test(format fmt, tensor const& input_size, tensor const& reshape_size,
bool /* in_place */, padding const& input_padd = padding(),
padding const& output_padd = padding()) {
padding const& output_padd = padding(), bool is_caching_test = false) {
auto& engine = get_test_engine();
//allocate input memory
@ -68,9 +68,9 @@ void generic_reshape_test(format fmt, tensor const& input_size, tensor const& re
ExecutionConfig config;
config.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{reshape_input, "reshape"}));
network net(engine, tpl, config);
net.set_input_data("input", input);
auto outputs = net.execute();
cldnn::network::ptr net = get_network(engine, tpl, config, get_test_stream_ptr(), is_caching_test);
net->set_input_data("input", input);
auto outputs = net->execute();
ASSERT_TRUE(outputs.size() == 2 && outputs.count("reshape") == 1 && outputs.count(reshape_input) == 1);
auto net_input = outputs.at(reshape_input).get_memory();
@ -411,7 +411,8 @@ TEST(reshape_gpu_f32, basic_5dim_in_place) {
true);
}
TEST(reshape_gpu_f32, multiple_users_with_reorder) {
template <typename T>
void test_multiple_users_with_reorder(bool is_caching_test) {
// Tests split with crop implementation
// _ REORDER(yxfb) --> RELU(yxfb)
// |
@ -452,29 +453,34 @@ TEST(reshape_gpu_f32, multiple_users_with_reorder) {
topology.add(activation("relu1", input_info("reorder1"), activation_func::relu));
topology.add(activation("relu2", input_info("reshape"), activation_func::relu));
std::vector<float> input_vec = {-1.f, 2.f, -3.f, 4.f};
std::vector<float> out1 = {0.f, 2.f, 0.f, 4.0f};
std::vector<float> out2 = {0.f, 2.f, 0.f, 4.0f};
std::vector<T> input_vec = {-1.f, 2.f, -3.f, 4.f};
std::vector<T> out1 = {0.f, 2.f, 0.f, 4.0f};
std::vector<T> out2 = {0.f, 2.f, 0.f, 4.0f};
set_values(input, input_vec);
network network(engine, topology);
network.set_input_data("input", input);
auto outputs = network.execute();
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("input", input);
auto outputs = network->execute();
auto output = outputs.at("relu1").get_memory();
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
cldnn::mem_lock<T> output_ptr(output, get_test_stream());
for (size_t i = 0; i < out1.size(); i++)
ASSERT_EQ(output_ptr[i], out1[i]);
auto output_2 = outputs.at("relu2").get_memory();
cldnn::mem_lock<float> output_ptr_2(output_2, get_test_stream());
cldnn::mem_lock<T> output_ptr_2(output_2, get_test_stream());
for (size_t i = 0; i < out2.size(); i++)
ASSERT_EQ(output_ptr_2[i], out2[i]);
}
TEST(reshape_gpu_f32, calc_output_shape) {
TEST(reshape_gpu_f32, multiple_users_with_reorder) {
test_multiple_users_with_reorder<float>(false);
}
template <typename T>
void test_calc_output_shape(bool is_caching_test) {
// INPUT(bfyx,2x2x1x1) -- RESHAPE(1, 1, 0, -1)
// Input:
@ -495,9 +501,9 @@ TEST(reshape_gpu_f32, calc_output_shape) {
set_values(input, {-1.f, 2.f, -3.f, 4.f});
network network(engine, topology);
network.set_input_data("input", input);
auto outputs = network.execute();
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("input", input);
auto outputs = network->execute();
ASSERT_EQ(outputs.size(), size_t(1));
ASSERT_EQ(outputs.begin()->first, "reshape");
@ -509,15 +515,20 @@ TEST(reshape_gpu_f32, calc_output_shape) {
ASSERT_TRUE(output->get_layout().get_tensor() == tensor(1, 1, 1, 4));
float answers[4] = {-1.f, 2.f, -3.f, 4.f};
T answers[4] = {-1.f, 2.f, -3.f, 4.f};
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
cldnn::mem_lock<T> output_ptr(output, get_test_stream());
for (int i = 0; i < 4; i++) {
ASSERT_TRUE(are_equal(answers[i], output_ptr[i]));
}
}
TEST(reshape_gpu_f32, basic_bfwzyx) {
TEST(reshape_gpu_f32, calc_output_shape) {
test_calc_output_shape<float>(false);
}
template <typename T>
void test_basic_bfwzyx(bool is_caching_test) {
// input: bfwzyx, (3, 3, 2, 2, 1, 1)
// reshape: (1, 1, 2, 2, 3, 3), pad (0, 0, 0, 0, 0, 1)
@ -562,9 +573,9 @@ TEST(reshape_gpu_f32, basic_bfwzyx) {
set_values(input, input_data);
network network(engine, topology);
network.set_input_data("input", input);
auto outputs = network.execute();
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("input", input);
auto outputs = network->execute();
ASSERT_EQ(outputs.size(), size_t(1));
ASSERT_EQ(outputs.begin()->first, "reshape");
@ -582,7 +593,12 @@ TEST(reshape_gpu_f32, basic_bfwzyx) {
}
}
TEST(reshape_gpu_f32, shrink_chain_partial) {
TEST(reshape_gpu_f32, basic_bfwzyx) {
test_basic_bfwzyx<float>(false);
}
template <typename T>
void test_shrink_chain_partial(bool is_caching_test) {
auto& engine = get_test_engine();
auto batch_num = 2;
auto feature_num = 2;
@ -592,8 +608,8 @@ TEST(reshape_gpu_f32, shrink_chain_partial) {
auto scale_in = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }});
auto shift_in = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }});
std::vector<float> scale_vals = {0.f, 1.f, 2.f, 3.f};
std::vector<float> scale_shifts = {5.f, 10.f, 15.f, 20.0f};
std::vector<T> scale_vals = {0.f, 1.f, 2.f, 3.f};
std::vector<T> scale_shifts = {5.f, 10.f, 15.f, 20.0f};
set_values(scale_in, scale_vals);
set_values(shift_in, scale_shifts);
@ -609,8 +625,53 @@ TEST(reshape_gpu_f32, shrink_chain_partial) {
topology.add(eltwise("shift", { input_info("scale"), input_info("shift_in") }, eltwise_mode::sum));
topology.add(reorder("out_reorder", input_info("shift"), format::yxfb, data_types::f32));
std::vector<float> input_vec = {-1.f, 2.f, -3.f, 4.f};
std::vector<float> out = {5.f, 12.f, 15.f, 32.0f};
std::vector<T> input_vec = {-1.f, 2.f, -3.f, 4.f};
std::vector<T> out = {5.f, 12.f, 15.f, 32.0f};
set_values(input, input_vec);
ExecutionConfig config;
config.set_property(ov::intel_gpu::optimize_data(true));
cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
network->set_input_data("input", input);
auto outputs = network->execute();
auto output = outputs.at("out_reorder").get_memory();
cldnn::mem_lock<T> output_ptr(output, get_test_stream());
for (size_t i = 0; i < out.size(); i++)
ASSERT_EQ(output_ptr[i], out[i]) << " i=" << i;
}
TEST(reshape_gpu_f32, shrink_chain_partial) {
test_shrink_chain_partial<float>(false);
}
template <typename T>
void test_shrink_chain_full(bool is_caching_test) {
auto& engine = get_test_engine();
auto input = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }});
auto scale_in = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }});
auto shift_in = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }});
std::vector<T> scale_vals = {0.f, 1.f, 2.f, 3.f};
std::vector<T> scale_shifts = {5.f, 10.f, 15.f, 20.0f};
set_values(scale_in, scale_vals);
set_values(shift_in, scale_shifts);
topology topology;
topology.add(input_layout("input", input->get_layout()));
topology.add(data("scale_in", scale_in));
topology.add(data("shift_in", shift_in));
topology.add(activation("relu", input_info("input"), activation_func::relu));
topology.add(reshape("reshape", input_info("relu"), tensor(spatial(2, 2))));
topology.add(reorder("reorder", input_info("reshape"), format::bfyx, data_types::f32));
topology.add(reshape("reshape1", input_info("reorder"), tensor(feature(4))));
topology.add(eltwise("scale", { input_info("reshape1"), input_info("scale_in") }, eltwise_mode::prod));
topology.add(eltwise("shift", { input_info("scale"), input_info("shift_in") }, eltwise_mode::sum));
topology.add(reorder("out_reorder", input_info("shift"), format::yxfb, data_types::f32));
std::vector<T> input_vec = {-1.f, 2.f, -3.f, 4.f};
std::vector<T> out = {5.f, 12.f, 15.f, 32.0f};
set_values(input, input_vec);
ExecutionConfig config;
@ -620,85 +681,54 @@ TEST(reshape_gpu_f32, shrink_chain_partial) {
auto outputs = network.execute();
auto output = outputs.at("out_reorder").get_memory();
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
cldnn::mem_lock<T> output_ptr(output, get_test_stream());
for (size_t i = 0; i < out.size(); i++)
ASSERT_EQ(output_ptr[i], out[i]) << " i=" << i;
}
TEST(reshape_gpu_f32, shrink_chain_full) {
test_shrink_chain_full<float>(false);
}
template <typename T>
void test_shrink_chain_out(bool is_caching_test) {
auto& engine = get_test_engine();
auto input = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }});
auto scale_in = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }});
auto shift_in = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }});
std::vector<float> scale_vals = {0.f, 1.f, 2.f, 3.f};
std::vector<float> scale_shifts = {5.f, 10.f, 15.f, 20.0f};
std::vector<T> scale_vals = {0.f, 1.f, 2.f, 3.f};
std::vector<T> scale_shifts = {5.f, 10.f, 15.f, 20.0f};
set_values(scale_in, scale_vals);
set_values(shift_in, scale_shifts);
topology topology;
topology.add(input_layout("input", input->get_layout()));
topology.add(data("scale_in", scale_in));
topology.add(data("shift_in", shift_in));
topology.add(activation("relu", input_info("input"), activation_func::relu));
topology.add(reshape("reshape", input_info("relu"), tensor(spatial(2, 2))));
topology.add(reorder("reorder", input_info("reshape"), format::bfyx, data_types::f32));
topology.add(reshape("reshape1", input_info("reorder"), tensor(feature(4))));
topology.add(eltwise("scale", { input_info("reshape1"), input_info("scale_in") }, eltwise_mode::prod));
topology.add(eltwise("shift", { input_info("scale"), input_info("shift_in") }, eltwise_mode::sum));
topology.add(reorder("out_reorder", input_info("shift"), format::yxfb, data_types::f32));
std::vector<float> input_vec = {-1.f, 2.f, -3.f, 4.f};
std::vector<float> out = {5.f, 12.f, 15.f, 32.0f};
std::vector<T> input_vec = {-1.f, 2.f, -3.f, 4.f};
std::vector<T> out = {0.f, 2.f, 0.f, 4.0f};
set_values(input, input_vec);
ExecutionConfig config;
config.set_property(ov::intel_gpu::optimize_data(true));
network network(engine, topology, config);
network.set_input_data("input", input);
auto outputs = network.execute();
cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
network->set_input_data("input", input);
auto outputs = network->execute();
auto output = outputs.at("out_reorder").get_memory();
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
auto output = outputs.at("reshape1").get_memory();
cldnn::mem_lock<T> output_ptr(output, get_test_stream());
for (size_t i = 0; i < out.size(); i++)
ASSERT_EQ(output_ptr[i], out[i]) << " i=" << i;
}
TEST(reshape_gpu_f32, shrink_chain_out) {
auto& engine = get_test_engine();
auto input = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }});
auto scale_in = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }});
auto shift_in = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }});
std::vector<float> scale_vals = {0.f, 1.f, 2.f, 3.f};
std::vector<float> scale_shifts = {5.f, 10.f, 15.f, 20.0f};
set_values(scale_in, scale_vals);
set_values(shift_in, scale_shifts);
topology topology;
topology.add(input_layout("input", input->get_layout()));
topology.add(activation("relu", input_info("input"), activation_func::relu));
topology.add(reshape("reshape", input_info("relu"), tensor(spatial(2, 2))));
topology.add(reorder("reorder", input_info("reshape"), format::bfyx, data_types::f32));
topology.add(reshape("reshape1", input_info("reorder"), tensor(feature(4))));
std::vector<float> input_vec = {-1.f, 2.f, -3.f, 4.f};
std::vector<float> out = {0.f, 2.f, 0.f, 4.0f};
set_values(input, input_vec);
ExecutionConfig config;
config.set_property(ov::intel_gpu::optimize_data(true));
network network(engine, topology, config);
network.set_input_data("input", input);
auto outputs = network.execute();
auto output = outputs.at("reshape1").get_memory();
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
for (size_t i = 0; i < out.size(); i++)
ASSERT_EQ(output_ptr[i], out[i]) << " i=" << i;
test_shrink_chain_out<float>(false);
}
TEST(reshape_gpu_f32, basic_runtime_static_shape) {
@ -910,3 +940,369 @@ TEST(reshape_gpu_f32, basic_runtime_dynamic_shape_with_const_optimized_out) {
ASSERT_TRUE(are_equal(input_data[i], output_ptr[i]));
}
}
#ifdef RUN_ALL_MODEL_CACHING_TESTS
TEST(reshape_gpu_f32, basic_2dim_in_place_cached) {
generic_reshape_test<float>(
format::bfyx,
tensor(1, 1, 2, 2),
tensor(1, 1, 4, 1),
true,
padding(),
padding(),
true);
}
TEST(reshape_gpu_f16, basic_2dim_in_place_cached) {
generic_reshape_test<FLOAT16>(
format::bfyx,
tensor(1, 1, 2, 2),
tensor(1, 1, 1, 4),
true,
padding(),
padding(),
true);
}
TEST(reshape_gpu_i8, basic_2dim_in_place_cached) {
generic_reshape_test<int8_t>(
format::bfyx,
tensor(1, 1, 2, 2),
tensor(1, 1, 1, 4),
true,
padding(),
padding(),
true);
}
TEST(reshape_gpu_i32, basic_2dim_in_place_cached) {
generic_reshape_test<int32_t>(
format::bfyx,
tensor(1, 1, 2, 2),
tensor(1, 1, 1, 4),
true,
padding(),
padding(),
true);
}
TEST(reshape_gpu_i64, basic_2dim_in_place_cached) {
generic_reshape_test<int64_t>(
format::bfyx,
tensor(1, 1, 2, 2),
tensor(1, 1, 1, 4),
true,
padding(),
padding(),
true);
}
TEST(reshape_gpu_f32, basic_4dim_in_place_cached) {
generic_reshape_test<float>(
format::yxfb,
tensor(9, 9, 2, 4),
tensor(27, 2, 3, 4),
true,
padding(),
padding(),
true);
}
TEST(reshape_gpu_f16, basic_4dim_in_place_cached) {
generic_reshape_test<FLOAT16>(
format::yxfb,
tensor(9, 9, 2, 4),
tensor(3, 4, 27, 2),
true,
padding(),
padding(),
true);
}
TEST(reshape_gpu_i32, basic_4dim_in_place_cached) {
generic_reshape_test<int32_t>(
format::yxfb,
tensor(9, 9, 2, 4),
tensor(3, 4, 27, 2),
true,
padding(),
padding(),
true);
}
TEST(reshape_gpu_i64, basic_4dim_in_place_cached) {
generic_reshape_test<int64_t>(
format::yxfb,
tensor(9, 9, 2, 4),
tensor(3, 4, 27, 2),
true,
padding(),
padding(),
true);
}
TEST(reshpape_gpu_f32, basic_2dim_output_padd_cached) {
generic_reshape_test<float>(
format::byxf,
tensor(1, 1, 4, 2),
tensor(1, 1, 8, 1),
false,
padding(),
padding(std::vector<int>{0, 0, 1, 1}),
true);
}
TEST(reshape_gpu_f16, basic_2dim_output_padd_cached) {
generic_reshape_test<FLOAT16>(
format::byxf,
tensor(1, 1, 3, 4),
tensor(1, 1, 2, 6),
false,
padding(),
padding(std::vector<int>{0, 0, 2, 2}),
true);
}
TEST(reshape_gpu_i8, basic_2dim_output_padd_cached) {
generic_reshape_test<int8_t>(
format::byxf,
tensor(1, 1, 3, 4),
tensor(1, 1, 2, 6),
false,
padding(),
padding(std::vector<int>{0, 0, 2, 2}),
true);
}
TEST(reshape_gpu_i32, basic_2dim_output_padd_cached) {
generic_reshape_test<int32_t>(
format::byxf,
tensor(1, 1, 3, 4),
tensor(1, 1, 2, 6),
false,
padding(),
padding(std::vector<int>{0, 0, 2, 2}),
true);
}
TEST(reshape_gpu_i64, basic_2dim_output_padd_cached) {
generic_reshape_test<int64_t>(
format::byxf,
tensor(1, 1, 3, 4),
tensor(1, 1, 2, 6),
false,
padding(),
padding(std::vector<int>{0, 0, 2, 2}),
true);
}
TEST(reshape_gpu_f32, basic_2dim_input_padd_cached) {
generic_reshape_test<float>(
format::fyxb,
tensor(1, 1, 2, 5),
tensor(1, 1, 5, 2),
false,
padding({0, 0, 3, 2}, {0, 0, 1, 4}),
padding(),
true);
}
TEST(reshape_gpu_f16, basic_2dim_input_padd_cached) {
generic_reshape_test<FLOAT16>(
format::fyxb,
tensor(1, 1, 3, 3),
tensor(1, 1, 1, 9),
false,
padding({0, 0, 4, 1}, {0, 0, 2, 3}),
padding(),
true);
}
TEST(reshape_gpu_i8, basic_2dim_input_padd_cached) {
generic_reshape_test<int8_t>(
format::fyxb,
tensor(1, 1, 3, 3),
tensor(1, 1, 1, 9),
false,
padding({0, 0, 4, 1}, {0, 0, 2, 3}),
padding(),
true);
}
TEST(reshape_gpu_i32, basic_2dim_input_padd_cached) {
generic_reshape_test<int32_t>(
format::fyxb,
tensor(1, 1, 3, 3),
tensor(1, 1, 1, 9),
false,
padding({0, 0, 4, 1}, {0, 0, 2, 3}),
padding(),
true);
}
TEST(reshape_gpu_i64, basic_2dim_input_padd_cached) {
generic_reshape_test<int64_t>(
format::fyxb,
tensor(1, 1, 3, 3),
tensor(1, 1, 1, 9),
false,
padding({0, 0, 4, 1}, {0, 0, 2, 3}),
padding(),
true);
}
TEST(reshape_gpu_f32, basic_2dim_input_output_padd_cached) {
generic_reshape_test<float>(
format::byxf,
tensor(1, 1, 5, 7),
tensor(1, 1, 7, 5),
false,
padding({0, 0, 4, 4}, {0, 0, 1, 1}),
padding({0, 0, 0, 0}, {0, 0, 3, 0}),
true);
}
TEST(reshape_gpu_f16, basic_2dim_input_output_padd_cached) {
generic_reshape_test<FLOAT16>(
format::byxf,
tensor(1, 1, 6, 6),
tensor(1, 1, 3, 12),
false,
padding({0, 0, 1, 1}, {0, 0, 0, 0}),
padding({0, 0, 2, 1}, {0, 0, 1, 2}),
true);
}
TEST(reshape_gpu_i8, basic_2dim_input_output_padd_cached) {
generic_reshape_test<int8_t>(
format::byxf,
tensor(1, 1, 5, 7),
tensor(1, 1, 7, 5),
false,
padding({0, 0, 4, 4}, {0, 0, 1, 1}),
padding({0, 0, 0, 0}, {0, 0, 3, 0}),
true);
}
TEST(reshape_gpu_i32, basic_2dim_input_output_padd_cached) {
generic_reshape_test<int32_t>(
format::byxf,
tensor(1, 1, 5, 7),
tensor(1, 1, 7, 5),
false,
padding({0, 0, 4, 4}, {0, 0, 1, 1}),
padding({0, 0, 0, 0}, {0, 0, 3, 0}),
true);
}
TEST(reshape_gpu_i64, basic_2dim_input_output_padd_cached) {
generic_reshape_test<int64_t>(
format::byxf,
tensor(1, 1, 5, 7),
tensor(1, 1, 7, 5),
false,
padding({0, 0, 4, 4}, {0, 0, 1, 1}),
padding({0, 0, 0, 0}, {0, 0, 3, 0}),
true);
}
TEST(reshpape_gpu_f32, basic_4dim_output_padd_cached) {
generic_reshape_test<float>(
format::bfyx,
tensor(2, 5, 7, 3),
tensor(1, 14, 15, 1),
false,
padding(),
padding({1, 0, 0, 1}, {0, 2, 3, 0}),
true);
}
TEST(reshape_gpu_f16, basic_4dim_output_padd_cached) {
generic_reshape_test<FLOAT16>(
format::bfyx,
tensor(5, 4, 2, 2),
tensor(40, 2, 1, 1),
false,
padding(),
padding({0, 2, 0, 1}, {0, 2, 3, 0}),
true);
}
TEST(reshape_gpu_f32, basic_4dim_input_padd_cached) {
generic_reshape_test<float>(
format::yxfb,
tensor(8, 128, 3, 3),
tensor(16, 8, 8, 9),
false,
padding({0, 1, 3, 3}, {0, 1, 1, 1}),
padding(),
true);
}
TEST(reshape_gpu_f16, basic_4dim_input_padd_cached) {
generic_reshape_test<FLOAT16>(
format::yxfb,
tensor(2, 32, 8, 8),
tensor(8, 128, 1, 4),
false,
padding({2, 2, 1, 0}, {1, 2, 2, 0}),
padding(),
true);
}
TEST(reshape_gpu_f32, basic_4dim_input_output_padd_cached) {
generic_reshape_test<float>(
format::fyxb,
tensor(8, 1024, 25, 25),
tensor(8, 64, 100, 100),
false,
padding({2, 0, 2, 1}, {0, 1, 4, 0}),
padding({1, 2, 3, 4}, {0, 4, 1, 1}),
true);
}
TEST(reshape_gpu_f16, basic_4dim_input_output_padd_cached) {
generic_reshape_test<FLOAT16>(
format::byxf,
tensor(32, 3, 227, 227),
tensor(8, 12, 227, 227),
false,
padding({0, 1, 4, 4}, {0, 1, 1, 1}),
padding({0, 29, 29, 0}, {0, 0, 0, 0}),
true);
}
TEST(reshape_gpu_f32, basic_5dim_in_place_cached) {
generic_reshape_test<float>(
format::bfzyx,
tensor(9, 9, 2, 4, 2),
tensor(27, 2, 1, 4, 6),
true,
padding(),
padding(),
true);
}
TEST(reshape_gpu_f32, multiple_users_with_reorder_cached) {
test_multiple_users_with_reorder<float>(true);
}
TEST(reshape_gpu_f32, calc_output_shape_cached) {
test_calc_output_shape<float>(true);
}
TEST(reshape_gpu_f32, basic_bfwzyx_cached) {
test_basic_bfwzyx<float>(true);
}
TEST(reshape_gpu_f32, shrink_chain_partial_cached) {
test_shrink_chain_partial<float>(true);
}
TEST(reshape_gpu_f32, shrink_chain_full_cached) {
test_shrink_chain_full<float>(true);
}
#endif
TEST(reshape_gpu_f32, shrink_chain_out_cached) {
test_shrink_chain_out<float>(true);
}

View File

@ -43,7 +43,7 @@ struct ReverseParams {
template <typename T, reverse_mode mode>
struct reverse_gpu_test : public ::testing::TestWithParam<ReverseParams<T, mode>> {
public:
void test() {
void test(bool is_caching_test = false) {
auto data_type = type_to_data_type<T>::value;
ReverseParams<T, mode> params = testing::TestWithParam<ReverseParams<T, mode>>::GetParam();
auto& engine = get_test_engine();
@ -76,10 +76,10 @@ public:
tp.add(reverse(reverse_id, input_info(reverse_input_id), input_info(axes_id), mode));
}
network network(engine, tp);
network.set_input_data(reverse_input_id, reverse_input);
network.set_input_data(axes_id, reverse_axes);
auto result = network.execute();
cldnn::network::ptr network = get_network(engine, tp, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data(reverse_input_id, reverse_input);
network->set_input_data(axes_id, reverse_axes);
auto result = network->execute();
auto out_mem = result.at(ouput_op_name).get_memory();
cldnn::mem_lock<T> out_ptr(out_mem, get_test_stream());
@ -422,3 +422,52 @@ INSTANTIATE_TEST_SUITE_P(smoke_reverse_f16_index,
reverse_gpu_test_f16_index,
::testing::ValuesIn(generateIndexParams<half_t>()),
PrintToStringParamName());
#ifdef RUN_ALL_MODEL_CACHING_TESTS
TEST_P(reverse_gpu_test_int32_mask, reverse_i32_mask_cached) {
ASSERT_NO_FATAL_FAILURE(test(true));
}
TEST_P(reverse_gpu_test_int32_index, reverse_i32_index_cached) {
ASSERT_NO_FATAL_FAILURE(test(true));
}
TEST_P(reverse_gpu_test_int64_mask, reverse_i64_mask_cached) {
ASSERT_NO_FATAL_FAILURE(test(true));
}
TEST_P(reverse_gpu_test_int64_index, reverse_i64_index_cached) {
ASSERT_NO_FATAL_FAILURE(test(true));
}
TEST_P(reverse_gpu_test_float_mask, reverse_float_mask_cached) {
ASSERT_NO_FATAL_FAILURE(test(true));
}
TEST_P(reverse_gpu_test_float_index, reverse_float_index_cached) {
ASSERT_NO_FATAL_FAILURE(test(true));
}
TEST_P(reverse_gpu_test_int8_mask, reverse_int8_mask_cached) {
ASSERT_NO_FATAL_FAILURE(test(true));
}
TEST_P(reverse_gpu_test_int8_index, reverse_int8_index_cached) {
ASSERT_NO_FATAL_FAILURE(test(true));
}
TEST_P(reverse_gpu_test_uint8_mask, reverse_uint8_mask_cached) {
ASSERT_NO_FATAL_FAILURE(test(true));
}
TEST_P(reverse_gpu_test_uint8_index, reverse_uint8_index_cached) {
ASSERT_NO_FATAL_FAILURE(test(true));
}
TEST_P(reverse_gpu_test_f16_mask, reverse_f16_mask_cached) {
ASSERT_NO_FATAL_FAILURE(test(true));
}
#endif
TEST_P(reverse_gpu_test_f16_index, reverse_f16_index_cached) {
ASSERT_NO_FATAL_FAILURE(test(true));
}

View File

@ -12,7 +12,8 @@
using namespace cldnn;
using namespace ::tests;
TEST(reverese_sequence_gpu_test, fp32_d2_2_ba1_sa0) {
template <typename T>
void test_fp32_d2_2_ba1_sa0(bool is_caching_test) {
auto& engine = get_test_engine();
auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 1 } });
@ -35,17 +36,17 @@ TEST(reverese_sequence_gpu_test, fp32_d2_2_ba1_sa0) {
reverse_sequence("reverse_sequence", input_info("input"), input_info("seq_lengths"), seq_axis, batch_axis)
);
network network(engine, topology);
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network.set_input_data("input", input);
network.set_input_data("seq_lengths", seq_lengths);
network->set_input_data("input", input);
network->set_input_data("seq_lengths", seq_lengths);
auto outputs = network.execute();
auto outputs = network->execute();
auto output = outputs.at("reverse_sequence").get_memory();
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
cldnn::mem_lock<T> output_ptr(output, get_test_stream());
std::vector<float> expected_results = {
std::vector<T> expected_results = {
0.0f, 3.0f, 2.0f, 1.0f
};
@ -54,7 +55,12 @@ TEST(reverese_sequence_gpu_test, fp32_d2_2_ba1_sa0) {
}
}
TEST(reverese_sequence_gpu_test, fp32_d3_3_3_ba0_sa1) {
TEST(reverese_sequence_gpu_test, fp32_d2_2_ba1_sa0) {
test_fp32_d2_2_ba1_sa0<float>(false);
}
template <typename T>
void test_fp32_d3_3_3_ba0_sa1(bool is_caching_test) {
auto& engine = get_test_engine();
auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 3, 3, 1, 3 } });
@ -79,17 +85,17 @@ TEST(reverese_sequence_gpu_test, fp32_d3_3_3_ba0_sa1) {
reverse_sequence("reverse_sequence", input_info("input"), input_info("seq_lengths"), seq_axis, batch_axis)
);
network network(engine, topology);
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network.set_input_data("input", input);
network.set_input_data("seq_lengths", seq_lengths);
network->set_input_data("input", input);
network->set_input_data("seq_lengths", seq_lengths);
auto outputs = network.execute();
auto outputs = network->execute();
auto output = outputs.at("reverse_sequence").get_memory();
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
cldnn::mem_lock<T> output_ptr(output, get_test_stream());
std::vector<float> expected_results = {
std::vector<T> expected_results = {
3.0f, 4.0f, 5.0f, 0.0f, 1.0f, 2.0f, 6.0f, 7.0f, 8.0f,
12.0f, 13.0f, 14.0f, 9.0f, 10.0f, 11.0f, 15.0f, 16.0f, 17.0f,
21.0f, 22.0f, 23.0f, 18.0f, 19.0f, 20.0f, 24.0f, 25.0f, 26.0f
@ -100,6 +106,10 @@ TEST(reverese_sequence_gpu_test, fp32_d3_3_3_ba0_sa1) {
}
}
TEST(reverese_sequence_gpu_test, fp32_d3_3_3_ba0_sa1) {
test_fp32_d3_3_3_ba0_sa1<float>(false);
}
TEST(reverese_sequence_gpu_test, fp32_d3_3_3_ba2_sa0) {
auto& engine = get_test_engine();
@ -603,3 +613,12 @@ TEST(reverese_sequence_gpu_test, fp16_d2_2_3_2ba2_sa0) {
ASSERT_EQ(expected_results[i], half_to_float(output_ptr[i]));
}
}
#ifdef RUN_ALL_MODEL_CACHING_TESTS
TEST(reverese_sequence_gpu_test, fp32_d2_2_ba1_sa0_cached) {
test_fp32_d2_2_ba1_sa0<float>(true);
}
#endif
TEST(reverese_sequence_gpu_test, fp32_d3_3_3_ba0_sa1_cached) {
test_fp32_d3_3_3_ba0_sa1<float>(true);
}

View File

@ -66,7 +66,8 @@ struct roi_align_test : public testing::Test {
void execute(const std::vector<TD>& expected_output,
roi_align::PoolingMode pooling_mode,
roi_align::AlignedMode aligned_mode) const {
roi_align::AlignedMode aligned_mode,
bool is_caching_test) const {
auto& engine = get_test_engine();
auto input = get_memory(engine, input_lt, input_data);
@ -90,12 +91,13 @@ struct roi_align_test : public testing::Test {
aligned_mode));
topology.add(reorder("out", input_info("roi_align"), plain_format, device_data_type));
network network(engine, topology);
network.set_input_data("input", input);
network.set_input_data("coords", coords);
network.set_input_data("roi_ind", roi_ind);
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
auto outputs = network.execute();
network->set_input_data("input", input);
network->set_input_data("coords", coords);
network->set_input_data("roi_ind", roi_ind);
auto outputs = network->execute();
auto output = outputs.at("out").get_memory();
cldnn::mem_lock<TD> output_ptr(output, get_test_stream());
@ -158,19 +160,41 @@ TYPED_TEST(roi_align_test, avg_asymmetric) {
using TD = typename TypeParam::DataType;
const std::vector<TD>
expected_output{TD(3.f), TD(3.75f), TD(4.75f), TD(5.f), TD(3.f), TD(5.5f), TD(2.75f), TD(3.75f)};
this->execute(expected_output, roi_align::PoolingMode::avg, roi_align::AlignedMode::asymmetric);
this->execute(expected_output, roi_align::PoolingMode::avg, roi_align::AlignedMode::asymmetric, false);
}
TYPED_TEST(roi_align_test, avg_half_pixel_for_nn) {
using TD = typename TypeParam::DataType;
const std::vector<TD> expected_output =
{TD(3.14f), TD(2.16f), TD(2.86f), TD(5.03f), TD(1.83f), TD(5.84f), TD(2.77f), TD(3.44f)};
this->execute(expected_output, roi_align::PoolingMode::avg, roi_align::AlignedMode::half_pixel_for_nn);
this->execute(expected_output, roi_align::PoolingMode::avg, roi_align::AlignedMode::half_pixel_for_nn, false);
}
TYPED_TEST(roi_align_test, max_half_pixel) {
using TD = typename TypeParam::DataType;
const std::vector<TD> expected_output =
{TD(4.375f), TD(4.9375f), TD(5.6875f), TD(5.625f), TD(4.625f), TD(7.125f), TD(3.3125f), TD(4.3125f)};
this->execute(expected_output, roi_align::PoolingMode::max, roi_align::AlignedMode::half_pixel);
this->execute(expected_output, roi_align::PoolingMode::max, roi_align::AlignedMode::half_pixel, false);
}
#ifdef RUN_ALL_MODEL_CACHING_TESTS
TYPED_TEST(roi_align_test, avg_asymmetric_cached) {
using TD = typename TypeParam::DataType;
const std::vector<TD>
expected_output{TD(3.f), TD(3.75f), TD(4.75f), TD(5.f), TD(3.f), TD(5.5f), TD(2.75f), TD(3.75f)};
this->execute(expected_output, roi_align::PoolingMode::avg, roi_align::AlignedMode::asymmetric, true);
}
TYPED_TEST(roi_align_test, avg_half_pixel_for_nn_cached) {
using TD = typename TypeParam::DataType;
const std::vector<TD> expected_output =
{TD(3.14f), TD(2.16f), TD(2.86f), TD(5.03f), TD(1.83f), TD(5.84f), TD(2.77f), TD(3.44f)};
this->execute(expected_output, roi_align::PoolingMode::avg, roi_align::AlignedMode::half_pixel_for_nn, true);
}
#endif
TYPED_TEST(roi_align_test, max_half_pixel_cached) {
using TD = typename TypeParam::DataType;
const std::vector<TD> expected_output =
{TD(4.375f), TD(4.9375f), TD(5.6875f), TD(5.625f), TD(4.625f), TD(7.125f), TD(3.3125f), TD(4.3125f)};
this->execute(expected_output, roi_align::PoolingMode::max, roi_align::AlignedMode::half_pixel, true);
}

View File

@ -116,7 +116,7 @@ using roi_pooling_test_params = std::tuple<roi_pooling_test_inputs<T>,
template <class T>
struct roi_pooling_gpu_test : public testing::TestWithParam<roi_pooling_test_params<T>> {
public:
void test() {
void test(bool is_caching_test) {
format::type fmt;
pooling_mode mode;
bool position_sensitive;
@ -185,11 +185,12 @@ public:
topology.add(reorder("reordered_roi_pooling", input_info("roi_pooling"), plane_format, type_to_data_type<T>::value));
network network(engine, topology);
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
for (auto& input : inputs) {
network.set_input_data(input.first, input.second);
network->set_input_data(input.first, input.second);
}
const auto outputs = network.execute();
const auto outputs = network->execute();
ASSERT_EQ(outputs.size(), size_t(1));
ASSERT_EQ(outputs.begin()->first, "reordered_roi_pooling");
@ -236,7 +237,11 @@ public:
using roi_pooling_gpu_test_float = roi_pooling_gpu_test<float>;
TEST_P(roi_pooling_gpu_test_float, test) {
ASSERT_NO_FATAL_FAILURE(test());
ASSERT_NO_FATAL_FAILURE(test(false));
}
TEST_P(roi_pooling_gpu_test_float, test_cached) {
ASSERT_NO_FATAL_FAILURE(test(true));
}
const std::vector<roi_pooling_test_inputs<float>> roi_pooling_max_inputs = {

View File

@ -37,7 +37,7 @@ using roll_test_params = std::tuple<roll_test_input<T>, format::type>;
template <class T>
struct roll_test : testing::TestWithParam<roll_test_params<T>> {
void test() {
void test(bool is_caching_test) {
roll_test_input<T> p;
format::type input_format;
std::tie(p, input_format) = testing::TestWithParam<roll_test_params<T>>::GetParam();
@ -54,9 +54,9 @@ struct roll_test : testing::TestWithParam<roll_test_params<T>> {
topology.add(roll("roll", input_info("reordered_input"), tensor(input_format, p.shift)));
topology.add(reorder("reordered_roll", input_info("roll"), plane_format, type_to_data_type<T>::value));
network network(engine, topology);
network.set_input_data("input", input);
const auto outputs = network.execute();
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
network->set_input_data("input", input);
const auto outputs = network->execute();
auto output = outputs.at("reordered_roll").get_memory();
cldnn::mem_lock<T> output_ptr(output, get_test_stream());
@ -226,7 +226,7 @@ std::vector<format::type> formats6d = {format::bfwzyx};
#define INSTANTIATE_ROLL_TEST_SUITE(type, func, formats) \
class roll_test_##type##func : public roll_test<type> {}; \
TEST_P(roll_test_##type##func, roll_##type##func) { \
test(); \
test(false); \
} \
INSTANTIATE_TEST_SUITE_P(roll_smoke_##type##func, \
roll_test_##type##func, \
@ -257,4 +257,33 @@ INSTANTIATE_ROLL_TEST_SUITE(float, getRollFloatingPointAdditionalLogic, {format:
#undef INSTANTIATE_ROLL_TEST_SUITE
#define INSTANTIATE_ROLL_TEST_SUITE_CACHED(type, func) \
TEST_P(roll_test_##type##func, roll_##type##func##_cached) { \
test(true); \
}
#ifdef RUN_ALL_MODEL_CACHING_TESTS
INSTANTIATE_ROLL_TEST_SUITE_CACHED(int8_t, getRollParamsToCheckLogic)
INSTANTIATE_ROLL_TEST_SUITE_CACHED(uint8_t, getRollParamsToCheckLogic)
INSTANTIATE_ROLL_TEST_SUITE_CACHED(int32_t, getRollParamsToCheckLogic)
INSTANTIATE_ROLL_TEST_SUITE_CACHED(int64_t, getRollParamsToCheckLogic)
INSTANTIATE_ROLL_TEST_SUITE_CACHED(int8_t, getRollParamsToCheckLayouts)
INSTANTIATE_ROLL_TEST_SUITE_CACHED(uint8_t, getRollParamsToCheckLayouts)
INSTANTIATE_ROLL_TEST_SUITE_CACHED(int32_t, getRollParamsToCheckLayouts)
INSTANTIATE_ROLL_TEST_SUITE_CACHED(int64_t, getRollParamsToCheckLayouts)
INSTANTIATE_ROLL_TEST_SUITE_CACHED(int8_t, getRollParams5D)
INSTANTIATE_ROLL_TEST_SUITE_CACHED(uint8_t, getRollParams5D)
INSTANTIATE_ROLL_TEST_SUITE_CACHED(int32_t, getRollParams5D)
INSTANTIATE_ROLL_TEST_SUITE_CACHED(int64_t, getRollParams5D)
INSTANTIATE_ROLL_TEST_SUITE_CACHED(int8_t, getRollParams6D)
INSTANTIATE_ROLL_TEST_SUITE_CACHED(uint8_t, getRollParams6D)
INSTANTIATE_ROLL_TEST_SUITE_CACHED(int32_t, getRollParams6D)
INSTANTIATE_ROLL_TEST_SUITE_CACHED(int64_t, getRollParams6D)
INSTANTIATE_ROLL_TEST_SUITE_CACHED(FLOAT16, getRollFloatingPointParams)
INSTANTIATE_ROLL_TEST_SUITE_CACHED(float, getRollFloatingPointParams)
INSTANTIATE_ROLL_TEST_SUITE_CACHED(FLOAT16, getRollFloatingPointAdditionalLogic)
#endif
INSTANTIATE_ROLL_TEST_SUITE_CACHED(float, getRollFloatingPointAdditionalLogic)
#undef INSTANTIATE_ROLL_TEST_SUITE_CACHED
} // namespace

Some files were not shown because too many files have changed in this diff Show More