[GPU] Model caching unit tests (#15413)
* gpu model caching unit tests * added serialization unit tests * added save and load for quantize primitive_inst * reduced the range of inputs for Gemm tests * updated the copyright year
This commit is contained in:
parent
d464f38788
commit
a6ff809ad7
@ -43,6 +43,9 @@ public:
|
||||
void setKernlImplParams(void* impl_params) { _impl_params = impl_params; }
|
||||
void* getKernlImplParams() const { return _impl_params; }
|
||||
|
||||
std::streampos tellg() { return stream.tellg(); }
|
||||
void seekg(std::streampos pos) { stream.seekg(pos); }
|
||||
|
||||
private:
|
||||
std::istream& stream;
|
||||
void* _impl_params;
|
||||
|
@ -0,0 +1,31 @@
|
||||
// Copyright (C) 2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <type_traits>
|
||||
#include "buffer.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
struct input_info;
|
||||
|
||||
template <typename BufferType>
|
||||
class Serializer<BufferType, input_info, typename std::enable_if<std::is_base_of<OutputBuffer<BufferType>, BufferType>::value>::type> {
|
||||
public:
|
||||
static void save(BufferType& buffer, const input_info& input) {
|
||||
buffer << input.pid;
|
||||
buffer << input.idx;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename BufferType>
|
||||
class Serializer<BufferType, input_info, typename std::enable_if<std::is_base_of<InputBuffer<BufferType>, BufferType>::value>::type> {
|
||||
public:
|
||||
static void load(BufferType& buffer, input_info& input) {
|
||||
buffer >> input.pid;
|
||||
buffer >> input.idx;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace cldnn
|
@ -5,6 +5,7 @@
|
||||
#pragma once
|
||||
#include "primitive.hpp"
|
||||
#include <vector>
|
||||
#include "intel_gpu/graph/serialization/string_serializer.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
|
||||
@ -74,6 +75,10 @@ struct activation_additional_params {
|
||||
struct activation : public primitive_base<activation> {
|
||||
CLDNN_DECLARE_PRIMITIVE(activation)
|
||||
|
||||
activation() : primitive_base("", {}) {}
|
||||
|
||||
DECLARE_OBJECT_TYPE_SERIALIZATION
|
||||
|
||||
/// @brief Constructs Relu primitive.
|
||||
/// @param id This primitive id.
|
||||
/// @param input Input primitive id.
|
||||
@ -137,6 +142,18 @@ struct activation : public primitive_base<activation> {
|
||||
additional_params_input.empty() == rhs_casted.additional_params_input.empty();
|
||||
}
|
||||
|
||||
void save(BinaryOutputBuffer& ob) const override {
|
||||
ob << make_data(&activation_function, sizeof(activation_func));
|
||||
ob << make_data(&additional_params, sizeof(activation_additional_params));
|
||||
ob << additional_params_input;
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
ib >> make_data(&activation_function, sizeof(activation_func));
|
||||
ib >> make_data(&additional_params, sizeof(activation_additional_params));
|
||||
ib >> additional_params_input;
|
||||
}
|
||||
|
||||
protected:
|
||||
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
|
||||
if (additional_params_input.empty())
|
||||
|
@ -5,6 +5,9 @@
|
||||
#pragma once
|
||||
#include "primitive.hpp"
|
||||
#include "openvino/op/util/attr_types.hpp"
|
||||
#include "intel_gpu/graph/serialization/input_info_serializer.hpp"
|
||||
#include "intel_gpu/graph/serialization/string_serializer.hpp"
|
||||
#include "intel_gpu/graph/serialization/vector_serializer.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
@ -19,6 +22,10 @@ namespace cldnn {
|
||||
struct arg_max_min : public primitive_base<arg_max_min> {
|
||||
CLDNN_DECLARE_PRIMITIVE(arg_max_min)
|
||||
|
||||
arg_max_min() : primitive_base("", {}) {}
|
||||
|
||||
DECLARE_OBJECT_TYPE_SERIALIZATION
|
||||
|
||||
/// @brief Constructs arg_max_min primitive.
|
||||
/// @param id This primitive id.
|
||||
/// @param input Input primitive id.
|
||||
@ -95,5 +102,29 @@ struct arg_max_min : public primitive_base<arg_max_min> {
|
||||
sort == rhs_casted.sort &&
|
||||
values_first == rhs_casted.values_first;
|
||||
}
|
||||
|
||||
uint32_t get_output_nums() const { return (input_size() == 3 ? 2 : output_size()); }
|
||||
bool has_second_output() const { return get_output_nums() == 2; }
|
||||
bool use_multiple_outputs() const { return input_size() != 3; }
|
||||
|
||||
void save(BinaryOutputBuffer& ob) const override {
|
||||
ob << input;
|
||||
ob << num_outputs;
|
||||
ob << make_data(&mode, sizeof(ov::op::TopKMode));
|
||||
ob << top_k;
|
||||
ob << axis;
|
||||
ob << make_data(&sort, sizeof(ov::op::TopKSortType));
|
||||
ob << values_first;
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
ib >> input;
|
||||
ib >> num_outputs;
|
||||
ib >> make_data(&mode, sizeof(ov::op::TopKMode));
|
||||
ib >> top_k;
|
||||
ib >> axis;
|
||||
ib >> make_data(&sort, sizeof(ov::op::TopKSortType));
|
||||
ib >> values_first;
|
||||
}
|
||||
};
|
||||
} // namespace cldnn
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include "activation.hpp"
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include "intel_gpu/graph/serialization/string_serializer.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
|
||||
@ -189,6 +190,11 @@ protected:
|
||||
|
||||
struct lstm_gemm : public primitive_base<lstm_gemm> {
|
||||
CLDNN_DECLARE_PRIMITIVE(lstm_gemm)
|
||||
|
||||
lstm_gemm() : primitive_base("", {}) {}
|
||||
|
||||
DECLARE_OBJECT_TYPE_SERIALIZATION
|
||||
|
||||
/// @brief Constructs lstm layer.
|
||||
/// @param id This primitive id.
|
||||
/// @param input input primitive id.
|
||||
@ -242,6 +248,22 @@ struct lstm_gemm : public primitive_base<lstm_gemm> {
|
||||
hidden.empty() == rhs_casted.hidden.empty();
|
||||
}
|
||||
|
||||
void save(BinaryOutputBuffer& ob) const override {
|
||||
ob << weights;
|
||||
ob << recurrent;
|
||||
ob << bias;
|
||||
ob << hidden;
|
||||
ob << direction;
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
ib >> weights;
|
||||
ib >> recurrent;
|
||||
ib >> bias;
|
||||
ib >> hidden;
|
||||
ib >> direction;
|
||||
}
|
||||
|
||||
protected:
|
||||
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
|
||||
std::vector<std::reference_wrapper<const primitive_id>> ret;
|
||||
@ -257,6 +279,11 @@ protected:
|
||||
|
||||
struct lstm_elt : public primitive_base<lstm_elt> {
|
||||
CLDNN_DECLARE_PRIMITIVE(lstm_elt)
|
||||
|
||||
lstm_elt() : primitive_base("", {}) {}
|
||||
|
||||
DECLARE_OBJECT_TYPE_SERIALIZATION
|
||||
|
||||
using vec_activation = std::vector<activation_func>;
|
||||
using vec_activation_param = std::vector<activation_additional_params>;
|
||||
|
||||
@ -342,6 +369,22 @@ struct lstm_elt : public primitive_base<lstm_elt> {
|
||||
#undef cmp_fields
|
||||
}
|
||||
|
||||
void save(BinaryOutputBuffer& ob) const override {
|
||||
ob << cell;
|
||||
ob << clip;
|
||||
ob << input_forget;
|
||||
ob << make_data(&offset_order, sizeof(lstm_weights_order));
|
||||
ob << direction;
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
ib >> cell;
|
||||
ib >> clip;
|
||||
ib >> input_forget;
|
||||
ib >> make_data(&offset_order, sizeof(lstm_weights_order));
|
||||
ib >> direction;
|
||||
}
|
||||
|
||||
protected:
|
||||
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
|
||||
std::vector<std::reference_wrapper<const primitive_id>> ret;
|
||||
|
@ -6,6 +6,8 @@
|
||||
#include "primitive.hpp"
|
||||
#include "intel_gpu/runtime/memory.hpp"
|
||||
#include <vector>
|
||||
#include "intel_gpu/graph/serialization/string_serializer.hpp"
|
||||
#include "intel_gpu/graph/serialization/vector_serializer.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
|
||||
@ -24,6 +26,10 @@ enum class reorder_mean_mode {
|
||||
struct reorder : public primitive_base<reorder> {
|
||||
CLDNN_DECLARE_PRIMITIVE(reorder)
|
||||
|
||||
reorder() : primitive_base("", {}), output_format(format::any) {}
|
||||
|
||||
DECLARE_OBJECT_TYPE_SERIALIZATION
|
||||
|
||||
/// @brief reorder memory types
|
||||
enum class memory_type {
|
||||
buffer,
|
||||
@ -178,6 +184,24 @@ struct reorder : public primitive_base<reorder> {
|
||||
mean.empty() == rhs_casted.mean.empty();
|
||||
}
|
||||
|
||||
void save(BinaryOutputBuffer& ob) const override {
|
||||
ob << make_data(&output_format, sizeof(format));
|
||||
ob << mean;
|
||||
ob << subtract_per_feature;
|
||||
ob << make_data(&mean_mode, sizeof(reorder_mean_mode));
|
||||
ob << make_data(&input_mem_type, sizeof(memory_type));
|
||||
ob << truncate;
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
ib >> make_data(&output_format, sizeof(format));
|
||||
ib >> mean;
|
||||
ib >> subtract_per_feature;
|
||||
ib >> make_data(&mean_mode, sizeof(reorder_mean_mode));
|
||||
ib >> make_data(&input_mem_type, sizeof(memory_type));
|
||||
ib >> truncate;
|
||||
}
|
||||
|
||||
protected:
|
||||
std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override {
|
||||
if (mean.empty())
|
||||
|
@ -13,6 +13,10 @@ namespace cldnn {
|
||||
struct roi_pooling : public primitive_base<roi_pooling> {
|
||||
CLDNN_DECLARE_PRIMITIVE(roi_pooling)
|
||||
|
||||
roi_pooling() : primitive_base("", {}) {}
|
||||
|
||||
DECLARE_OBJECT_TYPE_SERIALIZATION
|
||||
|
||||
roi_pooling(const primitive_id& id,
|
||||
const input_info& input_data,
|
||||
const input_info& input_rois,
|
||||
@ -118,6 +122,36 @@ struct roi_pooling : public primitive_base<roi_pooling> {
|
||||
cmp_fields(spatial_bins_y);
|
||||
#undef cmp_fields
|
||||
}
|
||||
|
||||
void save(BinaryOutputBuffer& ob) const override {
|
||||
ob << make_data(&mode, sizeof(pooling_mode));
|
||||
ob << position_sensitive;
|
||||
ob << pooled_width;
|
||||
ob << pooled_height;
|
||||
ob << spatial_scale;
|
||||
ob << trans_std;
|
||||
ob << no_trans;
|
||||
ob << output_dim;
|
||||
ob << part_size;
|
||||
ob << group_size;
|
||||
ob << spatial_bins_x;
|
||||
ob << spatial_bins_y;
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
ib >> make_data(&mode, sizeof(pooling_mode));
|
||||
ib >> position_sensitive;
|
||||
ib >> pooled_width;
|
||||
ib >> pooled_height;
|
||||
ib >> spatial_scale;
|
||||
ib >> trans_std;
|
||||
ib >> no_trans;
|
||||
ib >> output_dim;
|
||||
ib >> part_size;
|
||||
ib >> group_size;
|
||||
ib >> spatial_bins_x;
|
||||
ib >> spatial_bins_y;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace cldnn
|
||||
|
@ -114,58 +114,6 @@ struct kernel_arguments_data {
|
||||
const scalars_desc* scalars = nullptr;
|
||||
};
|
||||
|
||||
struct kernel_arguments_data_idx {
|
||||
std::vector<int32_t> inputs;
|
||||
int32_t weights;
|
||||
int32_t recurrent;
|
||||
int32_t hidden;
|
||||
int32_t cell;
|
||||
int32_t bias;
|
||||
int32_t weights_zero_points;
|
||||
int32_t activations_zero_points;
|
||||
int32_t compensation;
|
||||
int32_t lookup_table;
|
||||
int32_t scale_table;
|
||||
int32_t slope;
|
||||
|
||||
std::vector<int32_t> fused_op_inputs;
|
||||
scalars_desc scalars;
|
||||
|
||||
template <typename BufferType>
|
||||
void save(BufferType& ob) const {
|
||||
ob << inputs;
|
||||
ob << weights;
|
||||
ob << recurrent;
|
||||
ob << hidden;
|
||||
ob << cell;
|
||||
ob << bias;
|
||||
ob << weights_zero_points;
|
||||
ob << activations_zero_points;
|
||||
ob << compensation;
|
||||
ob << lookup_table;
|
||||
ob << scale_table;
|
||||
ob << slope;
|
||||
ob << fused_op_inputs;
|
||||
}
|
||||
|
||||
template <typename BufferType>
|
||||
void load(BufferType& ib) {
|
||||
ib >> inputs;
|
||||
ib >> weights;
|
||||
ib >> recurrent;
|
||||
ib >> hidden;
|
||||
ib >> cell;
|
||||
ib >> bias;
|
||||
ib >> weights_zero_points;
|
||||
ib >> activations_zero_points;
|
||||
ib >> compensation;
|
||||
ib >> lookup_table;
|
||||
ib >> scale_table;
|
||||
ib >> slope;
|
||||
ib >> fused_op_inputs;
|
||||
}
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// KernelString
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -31,7 +31,6 @@ public:
|
||||
|
||||
void init_kernels(const kernels_cache&) override {}
|
||||
void set_arguments(primitive_inst& /*instance*/) override {}
|
||||
void set_arguments(kernel_arguments_data_idx& /*instance*/) override {}
|
||||
kernel_arguments_data get_arguments(const primitive_inst& /*instance*/) const override {
|
||||
kernel_arguments_data args;
|
||||
return args;
|
||||
|
@ -163,3 +163,4 @@ attach_activation_impl::attach_activation_impl() {
|
||||
} // namespace cldnn
|
||||
|
||||
BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::activation_impl)
|
||||
BIND_BINARY_BUFFER_WITH_TYPE(cldnn::activation)
|
||||
|
@ -51,7 +51,7 @@ protected:
|
||||
kernel_arguments_data get_arguments(const typed_primitive_inst<arg_max_min>& instance) const override {
|
||||
kernel_arguments_data args = parent::get_arguments(instance);
|
||||
|
||||
if (instance.node->has_second_output()) {
|
||||
if (instance.get_typed_desc<arg_max_min>()->has_second_output()) {
|
||||
if (args.inputs.size() > 1) {
|
||||
args.inputs.erase(args.inputs.begin() + 1); // erase constant input in case of TOP_K
|
||||
}
|
||||
@ -138,3 +138,4 @@ attach_arg_max_min_impl::attach_arg_max_min_impl() {
|
||||
} // namespace cldnn
|
||||
|
||||
BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::arg_max_min_impl)
|
||||
BIND_BINARY_BUFFER_WITH_TYPE(cldnn::arg_max_min)
|
||||
|
@ -102,3 +102,4 @@ attach_lstm_elt_impl::attach_lstm_elt_impl() {
|
||||
} // namespace cldnn
|
||||
|
||||
BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::lstm_elt_impl)
|
||||
BIND_BINARY_BUFFER_WITH_TYPE(cldnn::lstm_elt)
|
||||
|
@ -103,3 +103,4 @@ attach_lstm_gemm_impl::attach_lstm_gemm_impl() {
|
||||
} // namespace cldnn
|
||||
|
||||
BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::lstm_gemm_impl)
|
||||
BIND_BINARY_BUFFER_WITH_TYPE(cldnn::lstm_gemm)
|
||||
|
@ -31,19 +31,19 @@ protected:
|
||||
args.inputs.push_back(instance.input_memory_ptr(i));
|
||||
}
|
||||
|
||||
if (instance.has_num_select_per_class() && !instance.node->num_select_per_class_node().is_constant()) {
|
||||
if (instance.has_num_select_per_class() && !instance.num_select_per_class_inst()->is_constant()) {
|
||||
args.inputs.push_back(instance.num_select_per_class_mem());
|
||||
}
|
||||
|
||||
if (instance.has_iou_threshold() && !instance.node->iou_threshold_node().is_constant()) {
|
||||
if (instance.has_iou_threshold() && !instance.iou_threshold_inst()->is_constant()) {
|
||||
args.inputs.push_back(instance.iou_threshold_mem());
|
||||
}
|
||||
|
||||
if (instance.has_score_threshold() && !instance.node->score_threshold_node().is_constant()) {
|
||||
if (instance.has_score_threshold() && !instance.score_threshold_inst()->is_constant()) {
|
||||
args.inputs.push_back(instance.score_threshold_mem());
|
||||
}
|
||||
|
||||
if (instance.has_soft_nms_sigma() && !instance.node->soft_nms_sigma_node().is_constant()) {
|
||||
if (instance.has_soft_nms_sigma() && !instance.soft_nms_sigma_inst()->is_constant()) {
|
||||
args.inputs.push_back(instance.soft_nms_sigma_mem());
|
||||
}
|
||||
|
||||
|
@ -33,7 +33,6 @@ struct typed_primitive_impl_ocl : public typed_primitive_impl<PType> {
|
||||
kernel_selector::kernel_data _kernel_data;
|
||||
std::vector<kernel_id> _kernel_ids;
|
||||
std::vector<kernel::ptr> _kernels;
|
||||
kernel_arguments_data_idx _kernel_args;
|
||||
|
||||
typed_primitive_impl_ocl() : _kernel_data({}), _kernel_ids({}), _kernels({}) {
|
||||
_kernel_data.weightsReorderParams.engine = kernel_selector::generic_kernel_params::Engine::NONE;
|
||||
@ -75,7 +74,6 @@ struct typed_primitive_impl_ocl : public typed_primitive_impl<PType> {
|
||||
ob << _kernel_data.internalBufferSizes;
|
||||
ob << _kernel_data.kernels;
|
||||
ob << _kernel_ids;
|
||||
ob << _kernel_args;
|
||||
}
|
||||
|
||||
void load(BinaryInputBuffer& ib) override {
|
||||
@ -83,7 +81,6 @@ struct typed_primitive_impl_ocl : public typed_primitive_impl<PType> {
|
||||
ib >> _kernel_data.internalBufferSizes;
|
||||
ib >> _kernel_data.kernels;
|
||||
ib >> _kernel_ids;
|
||||
ib >> _kernel_args;
|
||||
}
|
||||
|
||||
template<typename ImplType>
|
||||
@ -126,38 +123,6 @@ protected:
|
||||
return args;
|
||||
}
|
||||
|
||||
kernel_arguments_data get_arguments_by_idx(const typed_primitive_inst<PType>& instance) const {
|
||||
kernel_arguments_data args;
|
||||
|
||||
for (uint32_t i = 0; i < _kernel_args.inputs.size(); i++) {
|
||||
args.inputs.push_back(instance.dep_memory_ptr(_kernel_args.inputs[i]));
|
||||
}
|
||||
|
||||
args.weights = (_kernel_args.weights >= 0) ? instance.dep_memory_ptr(_kernel_args.weights) : args.weights;
|
||||
args.recurrent = (_kernel_args.recurrent >= 0) ? instance.dep_memory_ptr(_kernel_args.recurrent) : args.recurrent;
|
||||
args.hidden = (_kernel_args.hidden >= 0) ? instance.dep_memory_ptr(_kernel_args.hidden) : args.hidden;
|
||||
args.cell = (_kernel_args.cell >= 0) ? instance.dep_memory_ptr(_kernel_args.cell) : args.cell;
|
||||
args.bias = (_kernel_args.bias >= 0) ? instance.dep_memory_ptr(_kernel_args.bias) : args.bias;
|
||||
args.weights_zero_points = (_kernel_args.weights_zero_points >= 0) ?
|
||||
instance.dep_memory_ptr(_kernel_args.weights_zero_points) : args.weights_zero_points;
|
||||
args.activations_zero_points = (_kernel_args.activations_zero_points >= 0) ?
|
||||
instance.dep_memory_ptr(_kernel_args.activations_zero_points) : args.activations_zero_points;
|
||||
args.compensation = (_kernel_args.compensation >= 0) ? instance.dep_memory_ptr(_kernel_args.compensation) : args.compensation;
|
||||
args.lookup_table = (_kernel_args.lookup_table >= 0) ? instance.dep_memory_ptr(_kernel_args.lookup_table) : args.lookup_table;
|
||||
args.scale_table = (_kernel_args.scale_table >= 0) ? instance.dep_memory_ptr(_kernel_args.scale_table) : args.scale_table;
|
||||
args.slope = (_kernel_args.slope >= 0) ? instance.dep_memory_ptr(_kernel_args.slope) : args.slope;
|
||||
|
||||
for (size_t i = 0; i < _kernel_args.fused_op_inputs.size(); i++) {
|
||||
args.fused_op_inputs.push_back(instance.dep_memory_ptr(_kernel_args.fused_op_inputs[i]));
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < instance.outputs_memory_count(); i++) {
|
||||
args.outputs.push_back(instance.output_memory_ptr(i));
|
||||
}
|
||||
|
||||
return args;
|
||||
}
|
||||
|
||||
event::ptr aggregate_events(const std::vector<event::ptr>& events, stream& stream, bool group = false, bool is_output = false) const {
|
||||
if (events.size() == 1 && !is_output)
|
||||
return events[0];
|
||||
@ -211,31 +176,21 @@ protected:
|
||||
stream& stream = instance.get_network().get_stream();
|
||||
size_t k_idx = 0;
|
||||
for (size_t kd_idx = 0; kd_idx < _kernel_data.kernels.size(); ++kd_idx) {
|
||||
kernel_arguments_data args;
|
||||
if (_kernel_data.kernels[kd_idx].skip_execution) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (_kernel_args.inputs.size() > 0) {
|
||||
args = get_arguments_by_idx(instance);
|
||||
} else {
|
||||
args = get_arguments(instance);
|
||||
}
|
||||
auto args = get_arguments(instance);
|
||||
args.scalars = &_kernel_data.kernels[kd_idx].params.scalars;
|
||||
|
||||
for (const auto& m : instance.get_intermediates_memories()) {
|
||||
args.intermediates.push_back(m);
|
||||
}
|
||||
|
||||
args.scalars = &_kernel_data.kernels[kd_idx].params.scalars;
|
||||
|
||||
stream.set_arguments(*_kernels[k_idx++], _kernel_data.kernels[kd_idx].params, args);
|
||||
}
|
||||
}
|
||||
|
||||
void set_arguments_impl(kernel_arguments_data_idx& args_idx) override {
|
||||
this->_kernel_args = args_idx;
|
||||
}
|
||||
|
||||
kernel_arguments_data get_arguments_impl(const typed_primitive_inst<PType>& instance) const override {
|
||||
for (size_t k = 0; k < _kernels.size(); ++k) {
|
||||
auto args = get_arguments(instance);
|
||||
@ -274,20 +229,13 @@ protected:
|
||||
is_output_event = instance.is_output_event();
|
||||
}
|
||||
|
||||
kernel_arguments_data args;
|
||||
|
||||
if (_kernel_args.inputs.size() > 0) {
|
||||
args = get_arguments_by_idx(instance);
|
||||
} else {
|
||||
args = get_arguments(instance);
|
||||
|
||||
for (const auto& m : instance.get_intermediates_memories()) {
|
||||
args.intermediates.push_back(m);
|
||||
}
|
||||
}
|
||||
|
||||
auto args = get_arguments(instance);
|
||||
args.scalars = &_kernel_data.kernels[kd_idx].params.scalars;
|
||||
|
||||
for (const auto& m : instance.get_intermediates_memories()) {
|
||||
args.intermediates.push_back(m);
|
||||
}
|
||||
|
||||
auto ev = stream.enqueue_kernel(*_kernels[k_idx++], _kernel_data.kernels[kd_idx].params, args, tmp_events, is_output_event);
|
||||
new_events.push_back(ev);
|
||||
all_events.push_back(ev);
|
||||
|
@ -34,8 +34,8 @@ protected:
|
||||
for (size_t i = 0; i < instance.inputs_memory_count(); i++) {
|
||||
args.inputs.push_back(instance.input_memory_ptr(i));
|
||||
}
|
||||
if (instance.node->get_scale_shift_opt()) {
|
||||
if (instance.node->get_dependencies().size() == 9) {
|
||||
if (instance.scale_shift_opt) {
|
||||
if (instance.dependencies().size() == 9) {
|
||||
args.inputs.push_back(instance.dep_memory_ptr(5));
|
||||
args.inputs.push_back(instance.dep_memory_ptr(6));
|
||||
args.inputs.push_back(instance.dep_memory_ptr(7));
|
||||
|
@ -140,3 +140,4 @@ attach_reorder_impl::attach_reorder_impl() {
|
||||
} // namespace cldnn
|
||||
|
||||
BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::reorder_impl)
|
||||
BIND_BINARY_BUFFER_WITH_TYPE(cldnn::reorder)
|
||||
|
@ -49,7 +49,7 @@ protected:
|
||||
kernel_arguments_data get_arguments(const typed_primitive_inst<roi_pooling>& instance) const override {
|
||||
kernel_arguments_data args;
|
||||
|
||||
if (instance.argument->mode == pooling_mode::deformable_bilinear && !instance.argument->no_trans)
|
||||
if (instance.get_typed_desc<roi_pooling>()->mode == pooling_mode::deformable_bilinear && !instance.get_typed_desc<roi_pooling>()->no_trans)
|
||||
args.inputs = {
|
||||
instance.input_memory_ptr(),
|
||||
instance.rois_memory(),
|
||||
@ -109,3 +109,4 @@ attach_roi_pooling_impl::attach_roi_pooling_impl() {
|
||||
} // namespace cldnn
|
||||
|
||||
BIND_BINARY_BUFFER_WITH_TYPE(cldnn::ocl::roi_pooling_impl)
|
||||
BIND_BINARY_BUFFER_WITH_TYPE(cldnn::roi_pooling)
|
||||
|
@ -58,7 +58,7 @@ public:
|
||||
|
||||
memory::ptr slope_memory() const { return dep_memory_ptr(1); }
|
||||
|
||||
bool is_parameterized() const { return !argument->additional_params_input.empty(); }
|
||||
bool is_parameterized() const { return !get_typed_desc<activation>()->additional_params_input.empty(); }
|
||||
};
|
||||
|
||||
using activation_inst = typed_primitive_inst<activation>;
|
||||
|
@ -19,12 +19,6 @@ public:
|
||||
typed_program_node(std::shared_ptr<primitive> prim, program& prog) : parent(prim, prog) {}
|
||||
program_node& input() const { return get_dependency(0); }
|
||||
|
||||
uint32_t get_output_nums() const {
|
||||
return (get_primitive()->input_size() == 3 ? 2 : get_primitive()->output_size());
|
||||
}
|
||||
bool has_second_output() const { return get_output_nums() == 2; }
|
||||
bool use_multiple_outputs() const { return get_primitive()->input_size() != 3; }
|
||||
|
||||
std::vector<size_t> get_shape_infer_dependencies() const override { return {1}; }
|
||||
};
|
||||
|
||||
|
@ -45,16 +45,16 @@ public:
|
||||
typed_primitive_inst(network& network, lstm_elt_node const& node);
|
||||
|
||||
memory::ptr cell_memory() const { return dep_memory_ptr(1); }
|
||||
bool cell_term() const { return !argument->cell.empty(); }
|
||||
lstm_weights_order offset_order() const { return argument->offset_order; }
|
||||
bool cell_term() const { return !get_typed_desc<lstm_elt>()->cell.empty(); }
|
||||
lstm_weights_order offset_order() const { return get_typed_desc<lstm_elt>()->offset_order; }
|
||||
float clip() const {
|
||||
float clip_val = argument->clip;
|
||||
float clip_val = get_typed_desc<lstm_elt>()->clip;
|
||||
if (clip_val < 0)
|
||||
throw std::range_error("Clip value < 0");
|
||||
return clip_val;
|
||||
}
|
||||
bool input_forget() const { return argument->input_forget; }
|
||||
uint32_t direction() const { return argument->direction; }
|
||||
bool input_forget() const { return get_typed_desc<lstm_elt>()->input_forget; }
|
||||
uint32_t direction() const { return get_typed_desc<lstm_elt>()->direction; }
|
||||
};
|
||||
|
||||
using lstm_elt_inst = typed_primitive_inst<lstm_elt>;
|
||||
|
@ -44,9 +44,9 @@ public:
|
||||
memory::ptr recurrent_memory() const { return dep_memory_ptr(2); }
|
||||
memory::ptr bias_memory() const { return dep_memory_ptr(3); }
|
||||
memory::ptr hidden_memory() const { return bias_term() ? dep_memory_ptr(4) : dep_memory_ptr(3); }
|
||||
bool bias_term() const { return !argument->bias.empty(); }
|
||||
bool hidden_term() const { return !argument->hidden.empty(); }
|
||||
uint32_t direction() const { return argument->direction; }
|
||||
bool bias_term() const { return !get_typed_desc<lstm_gemm>()->bias.empty(); }
|
||||
bool hidden_term() const { return !get_typed_desc<lstm_gemm>()->hidden.empty(); }
|
||||
uint32_t direction() const { return get_typed_desc<lstm_gemm>()->direction; }
|
||||
};
|
||||
|
||||
using lstm_gemm_inst = typed_primitive_inst<lstm_gemm>;
|
||||
|
@ -64,10 +64,10 @@ public:
|
||||
typed_primitive_inst(network& network, const multiclass_nms_node& node) : parent(network, node) {}
|
||||
|
||||
memory::ptr output_indices_memory() const {
|
||||
return dep_memory_ptr(node->get_dependencies().size() - 2);
|
||||
return dep_memory_ptr(dependencies().size() - 2);
|
||||
}
|
||||
memory::ptr output_num_memory() const {
|
||||
return dep_memory_ptr(node->get_dependencies().size() - 1);
|
||||
return dep_memory_ptr(dependencies().size() - 1);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -91,6 +91,27 @@ class typed_primitive_inst<non_max_suppression> : public typed_primitive_inst_ba
|
||||
using parent = typed_primitive_inst_base<non_max_suppression>;
|
||||
using parent::parent;
|
||||
|
||||
size_t get_iou_threshold_offset() const {
|
||||
size_t offset = 2;
|
||||
offset += has_num_select_per_class();
|
||||
return offset;
|
||||
}
|
||||
|
||||
size_t get_score_threshold_offset() const {
|
||||
size_t offset = 2;
|
||||
offset += has_num_select_per_class();
|
||||
offset += has_iou_threshold();
|
||||
return offset;
|
||||
}
|
||||
|
||||
size_t get_soft_nms_sigma_offset() const {
|
||||
size_t offset = 2;
|
||||
offset += has_num_select_per_class();
|
||||
offset += has_iou_threshold();
|
||||
offset += has_score_threshold();
|
||||
return offset;
|
||||
}
|
||||
|
||||
public:
|
||||
typed_primitive_inst(network& network, non_max_suppression_node const& node)
|
||||
: parent(network, node)
|
||||
@ -113,29 +134,32 @@ public:
|
||||
memory::ptr num_select_per_class_mem() const {
|
||||
return dep_memory_ptr(2);
|
||||
}
|
||||
std::shared_ptr<const primitive_inst> num_select_per_class_inst() const {
|
||||
return dependencies().at(2).first;
|
||||
}
|
||||
|
||||
bool has_iou_threshold() const { return !get_typed_desc<non_max_suppression>()->iou_threshold.empty(); }
|
||||
memory::ptr iou_threshold_mem() const {
|
||||
size_t offset = 2;
|
||||
offset += has_num_select_per_class();
|
||||
return dep_memory_ptr(offset);
|
||||
return dep_memory_ptr(get_iou_threshold_offset());
|
||||
}
|
||||
std::shared_ptr<const primitive_inst> iou_threshold_inst() const {
|
||||
return dependencies().at(get_iou_threshold_offset()).first;
|
||||
}
|
||||
|
||||
bool has_score_threshold() const { return !get_typed_desc<non_max_suppression>()->score_threshold.empty(); }
|
||||
memory::ptr score_threshold_mem() const {
|
||||
size_t offset = 2;
|
||||
offset += has_num_select_per_class();
|
||||
offset += has_iou_threshold();
|
||||
return dep_memory_ptr(offset);
|
||||
return dep_memory_ptr(get_score_threshold_offset());
|
||||
}
|
||||
std::shared_ptr<const primitive_inst> score_threshold_inst() const {
|
||||
return dependencies().at(get_score_threshold_offset()).first;
|
||||
}
|
||||
|
||||
bool has_soft_nms_sigma() const { return !get_typed_desc<non_max_suppression>()->soft_nms_sigma.empty(); }
|
||||
memory::ptr soft_nms_sigma_mem() const {
|
||||
size_t offset = 2;
|
||||
offset += has_num_select_per_class();
|
||||
offset += has_iou_threshold();
|
||||
offset += has_score_threshold();
|
||||
return dep_memory_ptr(offset);
|
||||
return dep_memory_ptr(get_soft_nms_sigma_offset());
|
||||
}
|
||||
std::shared_ptr<const primitive_inst> soft_nms_sigma_inst() const {
|
||||
return dependencies().at(get_soft_nms_sigma_offset()).first;
|
||||
}
|
||||
|
||||
bool has_second_output() const { return !get_typed_desc<non_max_suppression>()->second_output.empty(); }
|
||||
|
@ -49,7 +49,6 @@ struct primitive_impl {
|
||||
virtual void set_node_params(const program_node&) {}
|
||||
virtual std::string get_type() const = 0;
|
||||
virtual void set_arguments(primitive_inst& instance) = 0;
|
||||
virtual void set_arguments(kernel_arguments_data_idx& args_idx) = 0;
|
||||
virtual kernel_arguments_data get_arguments(const primitive_inst& instance) const = 0;
|
||||
virtual event::ptr execute(const std::vector<event::ptr>& events, primitive_inst& instance) = 0;
|
||||
std::string get_kernel_name() const { return _kernel_name; }
|
||||
@ -288,7 +287,6 @@ protected:
|
||||
memory::ptr allocate_internal_buffer(size_t idx);
|
||||
static std::vector<std::shared_ptr<primitive_inst>> build_exec_deps(
|
||||
std::vector<std::pair<std::shared_ptr<primitive_inst>, int32_t>> const& mem_deps);
|
||||
void convert_args(const kernel_arguments_data& args, kernel_arguments_data_idx& args_idx) const;
|
||||
int32_t get_index_in_deps(memory::cptr arg) const;
|
||||
|
||||
// event function called by primitive_inst::execute after checking if primitive should rerun and before calling
|
||||
@ -382,16 +380,11 @@ private:
|
||||
return set_arguments_impl(reinterpret_cast<typed_primitive_inst<PType>&>(instance));
|
||||
}
|
||||
|
||||
void set_arguments(kernel_arguments_data_idx& args_idx) override {
|
||||
return set_arguments_impl(args_idx);
|
||||
}
|
||||
|
||||
kernel_arguments_data get_arguments(const primitive_inst& instance) const override {
|
||||
return get_arguments_impl(reinterpret_cast<const typed_primitive_inst<PType>&>(instance));
|
||||
}
|
||||
|
||||
virtual void set_arguments_impl(typed_primitive_inst<PType>& /*instance*/) {}
|
||||
virtual void set_arguments_impl(kernel_arguments_data_idx& /*args_idx*/) {}
|
||||
virtual kernel_arguments_data get_arguments_impl(const typed_primitive_inst<PType>& /*instance*/) const {
|
||||
kernel_arguments_data args;
|
||||
return args;
|
||||
|
@ -167,8 +167,12 @@ public:
|
||||
static std::vector<layout> calc_output_layouts(quantize_node const& node, kernel_impl_params const& impl_param);
|
||||
static layout calc_output_layout(quantize_node const& node, kernel_impl_params const& impl_param);
|
||||
static std::string to_string(quantize_node const& node);
|
||||
void save(BinaryOutputBuffer& ob) const override;
|
||||
void load(BinaryInputBuffer& ib) override;
|
||||
|
||||
typed_primitive_inst(network& network, quantize_node const& desc);
|
||||
|
||||
bool scale_shift_opt; // This is for serialization. Please do not remove it.
|
||||
};
|
||||
|
||||
using quantize_inst = typed_primitive_inst<quantize>;
|
||||
|
@ -65,7 +65,7 @@ public:
|
||||
memory::ptr mean_nv12_memory() const { return dep_memory_ptr(2); }
|
||||
memory::ptr mean_memory() const { return dep_memory_ptr(1); }
|
||||
|
||||
bool has_mean() const { return !argument->mean.empty(); }
|
||||
bool has_mean() const { return !get_typed_desc<reorder>()->mean.empty(); }
|
||||
|
||||
void update_output_memory() override;
|
||||
bool requires_reinterpret() const { return _req_reinterpr; }
|
||||
|
@ -409,7 +409,8 @@ network::network(cldnn::BinaryInputBuffer& ib, const ExecutionConfig& config, st
|
||||
for (const auto& p_inst : _exec_order) {
|
||||
ib >> *p_inst;
|
||||
_primitives[p_inst->id()] = p_inst;
|
||||
p_inst->init_kernels(kernels_cache);
|
||||
if (p_inst->get_impl() != nullptr)
|
||||
p_inst->init_kernels(kernels_cache);
|
||||
}
|
||||
|
||||
for (auto& item : _primitives) {
|
||||
|
@ -1142,44 +1142,12 @@ void primitive_inst::save(cldnn::BinaryOutputBuffer& ob) const {
|
||||
|
||||
if (_impl != nullptr) {
|
||||
ob << true;
|
||||
kernel_arguments_data args = _impl->get_arguments(*this);
|
||||
kernel_arguments_data_idx args_idx;
|
||||
convert_args(args, args_idx);
|
||||
_impl->set_arguments(args_idx);
|
||||
ob << _impl;
|
||||
} else {
|
||||
ob << false;
|
||||
}
|
||||
}
|
||||
|
||||
void primitive_inst::convert_args(const kernel_arguments_data& args, kernel_arguments_data_idx& args_idx) const {
|
||||
if (args.inputs.size() > 0) {
|
||||
args_idx.inputs.resize(args.inputs.size());
|
||||
for (uint32_t idx = 0; idx < args.inputs.size(); ++idx) {
|
||||
args_idx.inputs[idx] = get_index_in_deps(args.inputs[idx]);
|
||||
}
|
||||
}
|
||||
|
||||
args_idx.weights = (args.weights == nullptr) ? -1 : get_index_in_deps(args.weights);
|
||||
args_idx.recurrent = (args.recurrent == nullptr) ? -1 : get_index_in_deps(args.recurrent);
|
||||
args_idx.hidden = (args.hidden == nullptr) ? -1 : get_index_in_deps(args.hidden);
|
||||
args_idx.cell = (args.cell == nullptr) ? -1 : get_index_in_deps(args.cell);
|
||||
args_idx.bias = (args.bias == nullptr) ? -1 : get_index_in_deps(args.bias);
|
||||
args_idx.weights_zero_points = (args.weights_zero_points == nullptr) ? -1 : get_index_in_deps(args.weights_zero_points);
|
||||
args_idx.activations_zero_points = (args.activations_zero_points == nullptr) ? -1 : get_index_in_deps(args.activations_zero_points);
|
||||
args_idx.compensation = (args.compensation == nullptr) ? -1 : get_index_in_deps(args.compensation);
|
||||
args_idx.lookup_table = (args.lookup_table == nullptr) ? -1 : get_index_in_deps(args.lookup_table);
|
||||
args_idx.scale_table = (args.scale_table == nullptr) ? -1 : get_index_in_deps(args.scale_table);
|
||||
args_idx.slope = (args.slope == nullptr) ? -1 : get_index_in_deps(args.slope);
|
||||
|
||||
if (args.fused_op_inputs.size() > 0) {
|
||||
args_idx.fused_op_inputs.resize(args.fused_op_inputs.size());
|
||||
for (uint32_t idx = 0; idx < args.fused_op_inputs.size(); ++idx) {
|
||||
args_idx.fused_op_inputs[idx] = get_index_in_deps(args.fused_op_inputs[idx]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int32_t primitive_inst::get_index_in_deps(memory::cptr arg) const {
|
||||
for (uint32_t idx = 0; idx < _deps.size(); ++idx) {
|
||||
if (arg == dep_memory_ptr(idx))
|
||||
|
@ -74,6 +74,17 @@ std::string quantize_inst::to_string(quantize_node const& node) {
|
||||
return primitive_description.str();
|
||||
}
|
||||
|
||||
quantize_inst::typed_primitive_inst(network& network, quantize_node const& node) : parent(network, node) {}
|
||||
quantize_inst::typed_primitive_inst(network& network, quantize_node const& node) : parent(network, node) {
|
||||
scale_shift_opt = node.get_scale_shift_opt();
|
||||
}
|
||||
|
||||
void quantize_inst::save(cldnn::BinaryOutputBuffer& ob) const {
|
||||
parent::save(ob);
|
||||
ob << scale_shift_opt;
|
||||
}
|
||||
|
||||
void quantize_inst::load(BinaryInputBuffer& ib) {
|
||||
parent::load(ib);
|
||||
ib >> scale_shift_opt;
|
||||
}
|
||||
} // namespace cldnn
|
||||
|
@ -229,9 +229,10 @@ CompiledModel::CompiledModel(std::istream& networkModel, InferenceEngine::Remote
|
||||
setOutputs(new_results);
|
||||
}
|
||||
|
||||
auto graph_base = std::make_shared<Graph>(ib, context_impl, m_config, 0);
|
||||
auto pos = ib.tellg();
|
||||
for (uint16_t n = 0; n < m_config.get_property(ov::num_streams); n++) {
|
||||
auto graph = n == 0 ? graph_base : std::make_shared<Graph>(graph_base, n);
|
||||
ib.seekg(pos);
|
||||
auto graph = std::make_shared<Graph>(ib, context_impl, m_config, n);
|
||||
m_graphs.push_back(graph);
|
||||
}
|
||||
}
|
||||
@ -317,10 +318,6 @@ IInferRequestInternal::Ptr CompiledModel::CreateInferRequest() {
|
||||
}
|
||||
|
||||
bool CompiledModel::is_serializable() {
|
||||
// Model with multiple graphs is not yet supported.
|
||||
if (m_graphs.size() != 1)
|
||||
return false;
|
||||
|
||||
// Dynamic model serialization is not yet supported.
|
||||
if (m_graphs[0]->GetNetwork()->is_dynamic())
|
||||
return false;
|
||||
|
@ -84,7 +84,11 @@ Graph::Graph(cldnn::BinaryInputBuffer &ib, RemoteContextImpl::Ptr context, const
|
||||
ib >> primitiveIDs;
|
||||
ib >> outputDims;
|
||||
|
||||
m_networks.emplace_back(std::make_shared<cldnn::network>(ib, get_engine().create_stream(config), get_engine(), m_stream_id));
|
||||
size_t num_networks;
|
||||
ib >> num_networks;
|
||||
for (size_t i = 0; i < num_networks; ++i) {
|
||||
m_networks.emplace_back(std::make_shared<cldnn::network>(ib, get_engine().create_stream(config), get_engine(), m_stream_id));
|
||||
}
|
||||
}
|
||||
|
||||
Graph::Graph(std::shared_ptr<Graph> graph, uint16_t stream_id)
|
||||
@ -500,9 +504,10 @@ void Graph::Export(cldnn::BinaryOutputBuffer &ob) {
|
||||
ob << primitiveIDs;
|
||||
ob << outputDims;
|
||||
|
||||
auto m_network = m_networks.back();
|
||||
|
||||
m_network->save(ob);
|
||||
ob << m_networks.size();
|
||||
for (auto net : m_networks) {
|
||||
net->save(ob);
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Function> Graph::GetExecGraphInfo() {
|
||||
|
@ -1695,24 +1695,7 @@ struct activation_random_test : testing::TestWithParam<activation_random_test_pa
|
||||
|
||||
ExecutionConfig config{ov::intel_gpu::custom_outputs(std::vector<std::string>{"activation"})};
|
||||
|
||||
std::shared_ptr<cldnn::network> net;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topo, config);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
net = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
net = std::make_shared<cldnn::network>(engine, topo, config);
|
||||
}
|
||||
cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
net->set_input_data("in", in_mem);
|
||||
|
||||
|
@ -135,24 +135,7 @@ public:
|
||||
topology.add(adaptive_pooling("adaptive_avg_pooling_blocked", input_info("input_reordered"), params.outputTensor));
|
||||
topology.add(reorder("adaptive_avg_pooling", input_info("adaptive_avg_pooling_blocked"), plain_layout, data_type));
|
||||
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine, topology);
|
||||
}
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data("input", input);
|
||||
|
||||
|
@ -162,25 +162,7 @@ public:
|
||||
result_id = reorder_result_id;
|
||||
}
|
||||
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology);
|
||||
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine, topology);
|
||||
}
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data(input_data_id, input_mem);
|
||||
|
||||
|
@ -123,24 +123,7 @@ void test_add_reorders_gpu_basic_reshape_and_tile(bool is_caching_test) {
|
||||
set_values(input, input_vec);
|
||||
tile_ref<T>(input, output_ref, 2, 4);
|
||||
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine, topology);
|
||||
}
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data("input", input);
|
||||
|
||||
|
@ -701,24 +701,7 @@ void test_top_k_layer_tests_sort_probabilities_by_indices(bool is_caching_test)
|
||||
|
||||
set_values(input, input_vec);
|
||||
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine, topology);
|
||||
}
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data("input", input);
|
||||
auto outputs = network->execute();
|
||||
@ -868,24 +851,7 @@ void test_top_k_layer_md_sync(bool is_caching_test) {
|
||||
true));
|
||||
topology.add(mutable_data("arg_max.1", { input_info("arg_max.0") }, shared_memory));
|
||||
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine, topology);
|
||||
}
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data("input1", input1);
|
||||
auto outputs = network->execute();
|
||||
|
@ -677,24 +677,7 @@ void test_batch_to_space_fp32_gpu_i41021_bs1221_cb0201_ce0810_b_fs_yx_fsv16(bool
|
||||
tensor(format::bfyx, {1,8,3,1}, 1)));
|
||||
topology.add(reorder("bts_to_bfyx", input_info("batch_to_space"), format::bfyx, data_types::f32));
|
||||
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine, topology);
|
||||
}
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data("Input", input);
|
||||
|
||||
|
@ -230,24 +230,7 @@ TEST_P(binary_convolution_test, conv) {
|
||||
topology_bin.add(binary_convolution(output_name, input_info(input_name), {output_name + weights_suffix},
|
||||
stride, pad, dilation, os_size, 1, p.pad_value, p.dt));
|
||||
|
||||
cldnn::network::ptr network_bin;
|
||||
|
||||
if (p.is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology_bin, config);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network_bin = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network_bin = std::make_shared<cldnn::network>(engine, topology_bin, config);
|
||||
}
|
||||
cldnn::network::ptr network_bin = get_network(engine, topology_bin, config, get_test_stream_ptr(), p.is_caching_test);
|
||||
|
||||
network_bin->set_input_data(input_name, input);
|
||||
|
||||
|
@ -85,25 +85,7 @@ public:
|
||||
pad_mode,
|
||||
pad_value),
|
||||
reorder("output", input_info("border"), cldnn::format::bfyx, T_dt));
|
||||
std::shared_ptr<cldnn::network> target_network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, target_topology);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
target_network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
target_network = std::make_shared<cldnn::network>(engine, target_topology);
|
||||
}
|
||||
|
||||
cldnn::network::ptr target_network = get_network(engine, target_topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
target_network->set_input_data("input", input);
|
||||
auto target_output = target_network->execute().at("output").get_memory();
|
||||
cldnn::mem_lock<T> target_output_ptr(target_output, get_test_stream());
|
||||
|
@ -212,24 +212,7 @@ void start_broadcast_test_5d(format cldnn_format, data_types cldnn_data_type, st
|
||||
|
||||
set_values(input, input_data);
|
||||
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine, topology);
|
||||
}
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data("input", input);
|
||||
auto outputs = network->execute();
|
||||
|
@ -59,24 +59,7 @@ struct bucketize_test : testing::TestWithParam<bucketize_test_params<I, B, O>> {
|
||||
topology.add(
|
||||
reorder("plane_bucketize_left_bound", input_info("bucketize_left_bound"), format::bfyx, type_to_data_type<O>::value));
|
||||
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine, topology);
|
||||
}
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data("input", input);
|
||||
network->set_input_data("buckets", buckets);
|
||||
|
@ -127,24 +127,7 @@ void start_cl_mem_check_2_inputs(bool is_caching_test) {
|
||||
topology.add(input2);
|
||||
topology.add(reorder("reorder", input_info("input"), input_info("input2"), output_layout));
|
||||
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(*engine, topology);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, *engine);
|
||||
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), *engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(*engine, topology);
|
||||
}
|
||||
cldnn::network::ptr network = get_network(*engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data("input", input_memory);
|
||||
network->set_input_data("input2", input_memory2);
|
||||
|
@ -36,24 +36,7 @@ void exexute_network(cldnn::engine& engine, const ExecutionConfig& cfg, bool is_
|
||||
};
|
||||
set_values(input, input_vec);
|
||||
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology, cfg);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, cfg, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine, topology, cfg);
|
||||
}
|
||||
cldnn::network::ptr network = get_network(engine, topology, cfg, get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data("input", input);
|
||||
auto outputs = network->execute();
|
||||
|
@ -981,24 +981,7 @@ public:
|
||||
topology.add(pooling("pool_final", input_info("conv"), pooling_mode::max, {1, 1}, {1, 1}));
|
||||
topology.add(reorder("reorder", input_info("pool_final"), layout(data_type, format::bfyx, {(int32_t)batch_num, (int32_t)output_f, (int32_t)input_y, (int32_t)input_x})));
|
||||
|
||||
std::shared_ptr<cldnn::network> concat_network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology, config);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
concat_network = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
concat_network = std::make_shared<cldnn::network>(engine, topology, config);
|
||||
}
|
||||
cldnn::network::ptr concat_network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
for (size_t i = 0; i < in_features.size(); i++) {
|
||||
concat_network->set_input_data(input_ids[i], in_memory[i]);
|
||||
|
@ -593,24 +593,7 @@ void test_convert_color_i420_to_rgb_three_planes_surface_u8(bool is_caching_test
|
||||
topology.add(convert_color("convert_color", { input_info("input"), input_info("input2"), input_info("input3") }, cldnn::convert_color::color_format::I420, cldnn::convert_color::color_format::RGB,
|
||||
cldnn::convert_color::memory_type::image, output_layout));
|
||||
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(*engine, topology);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, *engine);
|
||||
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), *engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(*engine, topology);
|
||||
}
|
||||
cldnn::network::ptr network = get_network(*engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data("input", input_memory);
|
||||
network->set_input_data("input2", input_memory2);
|
||||
|
@ -9422,24 +9422,7 @@ void test_convolution_f32_gpu_convolution_gpu_bfyx_f16_depthwise_x_bloxk_size_1(
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "convolution_gpu_bfyx_f16_depthwise" };
|
||||
config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_fsv", conv_impl } }));
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology, config);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine, topology, config);
|
||||
}
|
||||
cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data("input", input_mem);
|
||||
|
||||
|
@ -1213,24 +1213,7 @@ TEST_P(crop_gpu, pad_test) {
|
||||
ExecutionConfig config;
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology, config);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine, topology, config);
|
||||
}
|
||||
cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data("input", input);
|
||||
auto outputs = network->execute();
|
||||
|
@ -104,24 +104,7 @@ public:
|
||||
topology.add(ctc_loss("ctc_loss", inputs_ids, p.preprocess_collapse_repeated, p.ctc_merge_repeated, p.unique));
|
||||
topology.add(reorder("reordered_ctc_loss", input_info("ctc_loss"), plane_format, float_data_type));
|
||||
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine, topology);
|
||||
}
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
for (auto& input : inputs) {
|
||||
network->set_input_data(std::get<0>(input), std::get<1>(input));
|
||||
|
@ -185,24 +185,7 @@ public:
|
||||
topology.add(input_layout("Input0", input->get_layout()));
|
||||
topology.add(cum_sum("cum_sum", input_info("Input0"), axis, exclusive, reverse));
|
||||
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine, topology);
|
||||
}
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data("Input0", input);
|
||||
|
||||
|
@ -547,24 +547,7 @@ void test_custom_gpu_primitive_u8_add_basic_in2x2x2x2(bool is_caching_test) {
|
||||
2, 60, 0, 20
|
||||
});
|
||||
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine, topology);
|
||||
}
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data("input", input);
|
||||
network->set_input_data("input2", input2);
|
||||
|
@ -2360,24 +2360,7 @@ void test_deconvolution_f16_fw_gpu_basic_wsiz2x2_in1x2x2x2_fs_b_yx_fsv32_stride1
|
||||
ov::intel_gpu::ExecutionConfig config;
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology, config);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine, topology, config);
|
||||
}
|
||||
cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data("input", input);
|
||||
|
||||
|
@ -992,24 +992,7 @@ void test_depth_concatenate_f32_gpu_basic_bfwzyx_along_w(bool is_caching_test) {
|
||||
|
||||
ExecutionConfig config;
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology, config);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine, topology, config);
|
||||
}
|
||||
cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data("input1", input1);
|
||||
|
||||
|
@ -393,24 +393,7 @@ void test_depth_to_space_fp32_gpu_d1822_bs2_depth_first(bool is_caching_test) {
|
||||
depth_to_space("depth_to_space", input_info("Input0"), block_size, depth_to_space_mode::depth_first)
|
||||
);
|
||||
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine, topology);
|
||||
}
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data("Input0", input1);
|
||||
|
||||
|
@ -147,25 +147,7 @@ public:
|
||||
|
||||
topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k));
|
||||
|
||||
ExecutionConfig config;
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology, config);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine, topology, config);
|
||||
}
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data("input_location", input_location);
|
||||
network->set_input_data("input_confidence", input_confidence);
|
||||
@ -200,25 +182,7 @@ public:
|
||||
topology.add(detection_output("detection_output_1", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k));
|
||||
topology.add(detection_output("detection_output_2", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k));
|
||||
|
||||
ExecutionConfig config;
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology, config);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine, topology, config);
|
||||
}
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data("input_location", input_location);
|
||||
network->set_input_data("input_confidence", input_confidence);
|
||||
@ -260,25 +224,7 @@ public:
|
||||
|
||||
topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold));
|
||||
|
||||
ExecutionConfig config;
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology, config);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine, topology, config);
|
||||
}
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data("input_location", input_location);
|
||||
network->set_input_data("input_confidence", input_confidence);
|
||||
@ -326,25 +272,7 @@ public:
|
||||
|
||||
topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold));
|
||||
|
||||
ExecutionConfig config;
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology, config);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine, topology, config);
|
||||
}
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data("input_location", input_location);
|
||||
network->set_input_data("input_confidence", input_confidence);
|
||||
@ -386,25 +314,7 @@ public:
|
||||
|
||||
topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold));
|
||||
|
||||
ExecutionConfig config;
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology, config);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine, topology, config);
|
||||
}
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data("input_location", input_location);
|
||||
network->set_input_data("input_confidence", input_confidence);
|
||||
@ -457,25 +367,7 @@ public:
|
||||
|
||||
topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k));
|
||||
|
||||
ExecutionConfig config;
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology, config);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine, topology, config);
|
||||
}
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data("input_location", input_location);
|
||||
network->set_input_data("input_confidence", input_confidence);
|
||||
@ -538,25 +430,7 @@ public:
|
||||
prior_coordinates_offset, prior_is_normalized, input_width, input_height, decrease_label_id
|
||||
));
|
||||
|
||||
ExecutionConfig config;
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology, config);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine, topology, config);
|
||||
}
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data("input_location", input_location);
|
||||
network->set_input_data("input_confidence", input_confidence);
|
||||
@ -606,25 +480,7 @@ public:
|
||||
|
||||
topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold));
|
||||
|
||||
ExecutionConfig config;
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology, config);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine, topology, config);
|
||||
}
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data("input_location", input_location);
|
||||
network->set_input_data("input_confidence", input_confidence);
|
||||
@ -685,25 +541,7 @@ public:
|
||||
|
||||
topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k));
|
||||
|
||||
ExecutionConfig config;
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology, config);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine, topology, config);
|
||||
}
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data("input_location", input_location);
|
||||
network->set_input_data("input_confidence", input_confidence);
|
||||
@ -751,25 +589,7 @@ public:
|
||||
|
||||
topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold));
|
||||
|
||||
ExecutionConfig config;
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology, config);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine, topology, config);
|
||||
}
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data("input_location", input_location);
|
||||
network->set_input_data("input_confidence", input_confidence);
|
||||
@ -820,25 +640,7 @@ public:
|
||||
|
||||
topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k));
|
||||
|
||||
ExecutionConfig config;
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology, config);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine, topology, config);
|
||||
}
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data("input_location", input_location);
|
||||
network->set_input_data("input_confidence", input_confidence);
|
||||
@ -884,25 +686,7 @@ public:
|
||||
|
||||
topology.add(detection_output("detection_output", input_info("input_location_padded"), input_info("input_confidence_padded"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k));
|
||||
|
||||
ExecutionConfig config;
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology, config);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine, topology, config);
|
||||
}
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data("input_location", input_location);
|
||||
network->set_input_data("input_confidence", input_confidence);
|
||||
@ -965,25 +749,7 @@ public:
|
||||
prior_is_normalized, this->img_size, this->img_size
|
||||
));
|
||||
|
||||
ExecutionConfig config;
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology, config);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine, topology, config);
|
||||
}
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data("input_location", input_location);
|
||||
network->set_input_data("input_confidence", input_confidence);
|
||||
|
@ -118,24 +118,7 @@ public:
|
||||
// It's simpler to use "bfwzyx" format for all cases, as input and output can have different ranks
|
||||
topology.add(reorder("out", input_info("dft"), format::bfwzyx, data_type));
|
||||
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine, topology);
|
||||
}
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data("input", input);
|
||||
const auto outputs = network->execute();
|
||||
|
@ -4155,24 +4155,7 @@ struct eltwise_random_test : testing::TestWithParam<eltwise_random_test_params>
|
||||
ExecutionConfig config_opt;
|
||||
config_opt.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{"eltwise_opt"}));
|
||||
|
||||
std::shared_ptr<cldnn::network> net_opt;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topo_opt, config_opt);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
net_opt = std::make_shared<cldnn::network>(ib, config_opt, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
net_opt = std::make_shared<cldnn::network>(engine, topo_opt, config_opt);
|
||||
}
|
||||
cldnn::network::ptr net_opt = get_network(engine, topo_opt, config_opt, get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
net_opt->set_input_data("input1", input1);
|
||||
net_opt->set_input_data("input2", input2);
|
||||
|
@ -1394,24 +1394,7 @@ void test_embedding_bag_fp32_gpu_extended5_6(bool is_caching_test) {
|
||||
embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1"), input_info("Input2") }, type, output_shape)
|
||||
);
|
||||
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine, topology);
|
||||
}
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data("Input0", emb_table);
|
||||
network->set_input_data("Input1", indices);
|
||||
|
@ -143,24 +143,7 @@ public:
|
||||
const primitive_id eddo_id = "experimental_detectron_detection_output";
|
||||
topology.add(reorder(eddo_id, input_info(b_eddo_primitive) /*b_eddo_id*/, format::bfyx, data_type));
|
||||
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine, topology);
|
||||
}
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data(input_boxes_id, input_boxes);
|
||||
network->set_input_data(input_deltas_id, input_deltas);
|
||||
|
@ -241,24 +241,7 @@ public:
|
||||
const primitive_id reorder_result_id = edgpsi_id + "Reordered";
|
||||
topology.add(reorder(reorder_result_id, input_info(edgpsi_primitive), format::bfyx, data_type));
|
||||
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine, topology);
|
||||
}
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data(input_im_info_id, input_im_info);
|
||||
network->set_input_data(input_anchors_id, input_anchors);
|
||||
|
@ -62,24 +62,7 @@ public:
|
||||
params.imageShape.first,
|
||||
params.imageShape.second));
|
||||
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (params.is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine, topology);
|
||||
}
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), params.is_caching_test);
|
||||
|
||||
network->set_input_data(priors_id, prior_input);
|
||||
|
||||
|
@ -53,24 +53,7 @@ void test_experimental_detectron_roi_feature_extractor_gpu_fp32_one_level(bool i
|
||||
topology.add(activation(activation_abs_id, feature_extractor_id, activation_func::abs));
|
||||
topology.add(mutable_data(second_output_r_id, {feature_extractor_id}, second_output));
|
||||
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine, topology);
|
||||
}
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data(input_rois_id, roi_input);
|
||||
network->set_input_data(input_level_1_id, level_1);
|
||||
|
@ -159,22 +159,7 @@ TEST(experimental_detectron_topk_rois_gpu_test, export_import) {
|
||||
rois_num));
|
||||
topology.add(reorder("plane_output", input_info(experimental_detectron_topk_rois_id), format::bfyx, test_data_type));
|
||||
|
||||
cldnn::network::ptr network;
|
||||
|
||||
{
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
|
||||
}
|
||||
}
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), true);
|
||||
|
||||
network->set_input_data(input_rois_id, roi_input);
|
||||
network->set_input_data(input_indices_id, roi_indices);
|
||||
|
@ -518,24 +518,7 @@ void test_extract_image_patches_gpu_basic5(bool is_caching_test) {
|
||||
topology.add(input_layout("Input0", input->get_layout()));
|
||||
topology.add(extract_image_patches("extract_image_patches", input_info("Input0"), sizes, strides, rates, auto_pad, output_shape));
|
||||
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine, topology);
|
||||
}
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data("Input0", input);
|
||||
auto outputs = network->execute();
|
||||
|
@ -85,24 +85,7 @@ public:
|
||||
tp.add(reorder("output", input_info("eye"), oupput_fmt, type_to_data_type<OutputType>::value));
|
||||
}
|
||||
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine_, tp);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine_);
|
||||
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine_);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine_, tp);
|
||||
}
|
||||
cldnn::network::ptr network = get_network(engine_, tp, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
auto outputs = network->execute();
|
||||
|
||||
|
@ -30,24 +30,7 @@ inline void DoTest(engine& engine,
|
||||
gather_elements("gather_elements", input_info("InputData"), input_info("InputIndices"), input1->get_layout().format, output_tensor, axis)
|
||||
);
|
||||
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine, topology);
|
||||
}
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data("InputData", input0);
|
||||
network->set_input_data("InputIndices", input1);
|
||||
|
@ -1938,24 +1938,7 @@ void test_gather_gpu_u8_322_axisF(bool is_caching_test) {
|
||||
topology.add(
|
||||
gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{3, 2, 2, 1}));
|
||||
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine, topology);
|
||||
}
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data("InputDictionary", input1);
|
||||
network->set_input_data("InputText", input2);
|
||||
|
@ -39,24 +39,7 @@ inline void DoTestBase(engine& engine,
|
||||
topology.add(input_layout("InputIndices", input1->get_layout()));
|
||||
topology.add(gather_nd_inst);
|
||||
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine, topology);
|
||||
}
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data("InputData", input0);
|
||||
network->set_input_data("InputIndices", input1);
|
||||
|
@ -213,24 +213,7 @@ public:
|
||||
const primitive_id reorder_result_id = result_id + "_reordered";
|
||||
topology.add(reorder(reorder_result_id, input_info(result_id), plain_layout, data_type));
|
||||
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine, topology);
|
||||
}
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data(step_id, step_input);
|
||||
network->set_input_data(parent_id, parent_input);
|
||||
|
@ -95,7 +95,7 @@ protected:
|
||||
|
||||
public:
|
||||
virtual ~GemmGPUTest() {}
|
||||
void test() {
|
||||
void test(bool is_caching_test = false) {
|
||||
|
||||
fill_gemm_params();
|
||||
|
||||
@ -124,12 +124,31 @@ public:
|
||||
tp.add(g);
|
||||
tp.add(reorder("output", input_info("gemm_output"), format::bfyx, data_types::f32));
|
||||
|
||||
network network(engine, tp);
|
||||
for (auto &input : network_inputs) {
|
||||
network.set_input_data(input.first, input.second);
|
||||
cldnn::network::ptr network;
|
||||
if (is_caching_test) {
|
||||
std::cout << "cached" << std::endl;
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, tp);
|
||||
process_program(_network.get_program());
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine, tp);
|
||||
process_program(network->get_program());
|
||||
}
|
||||
auto outputs = network.execute();
|
||||
process_program(network.get_program());
|
||||
|
||||
for (auto &input : network_inputs) {
|
||||
network->set_input_data(input.first, input.second);
|
||||
}
|
||||
auto outputs = network->execute();
|
||||
auto output = outputs.at("output").get_memory();
|
||||
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
|
||||
|
||||
@ -160,7 +179,7 @@ class GemmGPUTestRandom : public GemmGPUTest {
|
||||
auto &v = input_data[i];
|
||||
v.resize(size);
|
||||
for(size_t i = 0; i < size; ++i) {
|
||||
v[i] = generate_random_value() / 10.f;
|
||||
v[i] = generate_random_value() / 20.f;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -233,12 +252,13 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(false), ::testing::Values(true),
|
||||
::testing::Values(1.0f), ::testing::Values(0.0f)));
|
||||
|
||||
TEST(gemm_gpu, basic_bfyx_t2_inplace_crop_with_pad) {
|
||||
template <typename T>
|
||||
void test_basic_bfyx_t2_inplace_crop_with_pad(bool is_caching_test) {
|
||||
auto& engine = get_test_engine();
|
||||
auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 4, 3 } });
|
||||
auto input2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 4, 1 } });
|
||||
|
||||
std::vector<float> input_data = {
|
||||
std::vector<T> input_data = {
|
||||
1.f, -2.f, 3.f, -4.f,
|
||||
5.f, 6.f, 1.f, 2.f,
|
||||
3.f, 3.f, 2.f, -1.f,
|
||||
@ -248,13 +268,13 @@ TEST(gemm_gpu, basic_bfyx_t2_inplace_crop_with_pad) {
|
||||
3.f, 3.f, 2.f, -1.f,
|
||||
};
|
||||
|
||||
std::vector<float> input_data2 = {
|
||||
std::vector<T> input_data2 = {
|
||||
2.f, 5.f, -4.f, -7.f,
|
||||
};
|
||||
set_values(input, input_data);
|
||||
set_values(input2, input_data2);
|
||||
|
||||
std::vector<float> out_data = {
|
||||
std::vector<T> out_data = {
|
||||
8.f, 22.f, 20.f
|
||||
};
|
||||
|
||||
@ -274,13 +294,13 @@ TEST(gemm_gpu, basic_bfyx_t2_inplace_crop_with_pad) {
|
||||
|
||||
ExecutionConfig config;
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
network network(engine, topology, config);
|
||||
network.set_input_data("input", input);
|
||||
network.set_input_data("input2", input2);
|
||||
auto outputs = network.execute();
|
||||
cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
|
||||
network->set_input_data("input", input);
|
||||
network->set_input_data("input2", input2);
|
||||
auto outputs = network->execute();
|
||||
|
||||
auto output = outputs.at("output").get_memory();
|
||||
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
|
||||
cldnn::mem_lock<T> output_ptr(output, get_test_stream());
|
||||
|
||||
ASSERT_EQ(output_ptr.size(), (uint32_t)3);
|
||||
for (uint32_t i = 0; i < out_data.size(); ++i) {
|
||||
@ -288,6 +308,10 @@ TEST(gemm_gpu, basic_bfyx_t2_inplace_crop_with_pad) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(gemm_gpu, basic_bfyx_t2_inplace_crop_with_pad) {
|
||||
test_basic_bfyx_t2_inplace_crop_with_pad<float>(false);
|
||||
}
|
||||
|
||||
TEST(gemm_gpu, dynamic) {
|
||||
auto& engine = get_test_engine();
|
||||
ov::Shape in1_shape = { 1, 1, 3, 4 };
|
||||
@ -745,7 +769,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
GemmGPUTestRandom,
|
||||
::testing::Combine(
|
||||
::testing::Values(std::vector<std::vector<int32_t>>{{ 5, 1, 500, 9 }, { 5, 1, 1, 500 }}),
|
||||
::testing::Values(std::vector<std::vector<float>>{{}, {}}),
|
||||
::testing::Values(std::vector<std::vector<float>>{{}, {}}),
|
||||
::testing::ValuesIn(planar_formats),
|
||||
::testing::ValuesIn(float_types),
|
||||
::testing::Values(std::vector<float>{}),
|
||||
@ -1182,7 +1206,7 @@ public:
|
||||
return (x % x_size) * x_pitch + (y % y_size) * y_pitch + (f % f_num) * f_pitch + (b % b_num) * b_pitch;
|
||||
}
|
||||
|
||||
void execute(gemm_params& p) {
|
||||
void execute(gemm_params& p, bool is_caching_test = false) {
|
||||
auto& engine = get_test_engine();
|
||||
if (!engine.get_device_info().supports_immad)
|
||||
return;
|
||||
@ -1294,13 +1318,13 @@ public:
|
||||
#endif
|
||||
cfg.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"gemm_bfyx", gemm_impl} }));
|
||||
|
||||
network network(engine, topology, cfg);
|
||||
network.set_input_data("input0", input0_mem);
|
||||
network.set_input_data("input1", input1_mem);
|
||||
cldnn::network::ptr network = get_network(engine, topology, cfg, get_test_stream_ptr(), is_caching_test);
|
||||
network->set_input_data("input0", input0_mem);
|
||||
network->set_input_data("input1", input1_mem);
|
||||
if (p.beta != 0) {
|
||||
network.set_input_data("input2", input2_mem);
|
||||
network->set_input_data("input2", input2_mem);
|
||||
}
|
||||
auto outputs = network.execute();
|
||||
auto outputs = network->execute();
|
||||
auto output = outputs.at("reorder_bfyx").get_memory();
|
||||
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
|
||||
|
||||
@ -1649,4 +1673,50 @@ INSTANTIATE_TEST_SUITE_P(gemm_gpu, gemm_fp16_tiled_nn_broadcast_tests, ::testing
|
||||
|
||||
#endif // ENABLE_ONEDNN_FOR_GPU
|
||||
|
||||
#ifdef RUN_ALL_MODEL_CACHING_TESTS
|
||||
TEST_P(GemmGPUTest, basic_cached) {
|
||||
ASSERT_NO_FATAL_FAILURE(test(true));
|
||||
}
|
||||
|
||||
TEST_P(GemmGPUTestRandom, basic_cached) {
|
||||
ASSERT_NO_FATAL_FAILURE(test(true));
|
||||
}
|
||||
|
||||
#ifdef ENABLE_ONEDNN_FOR_GPU
|
||||
TEST_P(gemm_int8_simple_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); }
|
||||
TEST_P(gemm_uint8_simple_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); }
|
||||
TEST_P(gemm_fp16_simple_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); }
|
||||
TEST_P(gemm_fp32_simple_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); }
|
||||
TEST_P(gemm_int8_transposition_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); }
|
||||
TEST_P(gemm_uint8_transposition_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); }
|
||||
TEST_P(gemm_fp16_transposition_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); }
|
||||
TEST_P(gemm_fp32_transposition_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); }
|
||||
TEST_P(gemm_int8_broadcasting_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); }
|
||||
TEST_P(gemm_fp16_broadcasting_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); }
|
||||
TEST_P(gemm_fp32_broadcasting_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); }
|
||||
TEST_P(gemm_int8_combo_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); }
|
||||
TEST_P(gemm_uint8_combo_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); }
|
||||
TEST_P(gemm_fp16_combo_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); }
|
||||
TEST_P(gemm_fp32_combo_tests_onednn, basic_cached) { auto p = GetParam(); execute(p, true); }
|
||||
#else
|
||||
TEST_P(gemm_int8_transposition_tests, basic_cached) { auto p = GetParam(); execute(p, true); }
|
||||
TEST_P(gemm_int8_broadcast_tests, basic_cached) { auto p = GetParam(); execute(p, true); }
|
||||
TEST_P(gemm_int8_leftovers_tests, basic_cached) { auto p = GetParam(); execute(p, true); }
|
||||
TEST_P(gemm_int8_combo_tests, basic_cached) { auto p = GetParam(); execute(p, true); }
|
||||
TEST_P(gemm_int8_slm_combo_tests, basic_cached) { auto p = GetParam(); execute(p, true); }
|
||||
TEST_P(gemm_fp32_tiled_nn_tests, basic_cached) { auto p = GetParam(); execute(p, true); }
|
||||
TEST_P(gemm_fp32_tiled_nt_tests, basic_cached) { auto p = GetParam(); execute(p, true); }
|
||||
TEST_P(gemm_fp32_tiled_tn_tests, basic_cached) { auto p = GetParam(); execute(p, true); }
|
||||
TEST_P(gemm_fp32_tiled_tt_tests, basic_cached) { auto p = GetParam(); execute(p, true); }
|
||||
TEST_P(gemm_fp32_tiled_nn_broadcast_tests, basic_cached) { auto p = GetParam(); execute(p, true); }
|
||||
TEST_P(gemm_fp16_tiled_nn_tests, basic_cached) { auto p = GetParam(); execute(p, true); }
|
||||
TEST_P(gemm_fp16_tiled_nt_tests, basic_cached) { auto p = GetParam(); execute(p, true); }
|
||||
TEST_P(gemm_fp16_tiled_tn_tests, basic_cached) { auto p = GetParam(); execute(p, true); }
|
||||
TEST_P(gemm_fp16_tiled_tt_tests, basic_cached) { auto p = GetParam(); execute(p, true); }
|
||||
TEST_P(gemm_fp16_tiled_nn_broadcast_tests, basic_cached) { auto p = GetParam(); execute(p); }
|
||||
#endif // ENABLE_ONEDNN_FOR_GPU
|
||||
#endif // RUN_ALL_MODEL_CACHING_TESTS
|
||||
TEST(gemm_gpu, basic_bfyx_t2_inplace_crop_with_pad_cached) {
|
||||
test_basic_bfyx_t2_inplace_crop_with_pad<float>(true);
|
||||
}
|
||||
} // namespace
|
||||
|
@ -355,24 +355,7 @@ public:
|
||||
const primitive_id reorder_result_id = generate_proposals_id + "Reordered";
|
||||
topology.add(reorder(reorder_result_id, input_info(generate_proposals_id), format::bfyx, data_type));
|
||||
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine, topology);
|
||||
}
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data(input_im_info_id, input_im_info);
|
||||
network->set_input_data(input_anchors_id, input_anchors);
|
||||
|
@ -7,6 +7,12 @@
|
||||
#include "intel_gpu/primitives/grid_sample.hpp"
|
||||
#include "test_utils/test_utils.h"
|
||||
|
||||
#ifdef RUN_ALL_MODEL_CACHING_TESTS
|
||||
#define RUN_CACHING_TEST false, true
|
||||
#else
|
||||
#define RUN_CACHING_TEST false
|
||||
#endif
|
||||
|
||||
using namespace cldnn;
|
||||
using namespace tests;
|
||||
|
||||
@ -24,7 +30,7 @@ struct grid_sample_test_inputs {
|
||||
};
|
||||
|
||||
template <class TD, class TG>
|
||||
using grid_sample_test_params = std::tuple<grid_sample_test_inputs<TD, TG>, format::type>;
|
||||
using grid_sample_test_params = std::tuple<grid_sample_test_inputs<TD, TG>, format::type, bool>;
|
||||
|
||||
template <class T>
|
||||
float getError();
|
||||
@ -45,7 +51,8 @@ public:
|
||||
void test() {
|
||||
format::type fmt;
|
||||
grid_sample_test_inputs<TD, TG> p;
|
||||
std::tie(p, fmt) = testing::TestWithParam<grid_sample_test_params<TD, TG>>::GetParam();
|
||||
bool is_caching_test;
|
||||
std::tie(p, fmt, is_caching_test) = testing::TestWithParam<grid_sample_test_params<TD, TG>>::GetParam();
|
||||
|
||||
auto& engine = get_test_engine();
|
||||
const auto data_data_type = type_to_data_type<TD>::value;
|
||||
@ -68,10 +75,10 @@ public:
|
||||
topology.add(grid_sample("grid_sample", { input_info("reordered_data"), input_info("reordered_grid") }, p.attributes));
|
||||
topology.add(reorder("plane_grid_sample", input_info("grid_sample"), plane_format, data_data_type));
|
||||
|
||||
network network(engine, topology);
|
||||
network.set_input_data("data", data);
|
||||
network.set_input_data("grid", grid);
|
||||
const auto outputs = network.execute();
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
network->set_input_data("data", data);
|
||||
network->set_input_data("grid", grid);
|
||||
const auto outputs = network->execute();
|
||||
|
||||
ASSERT_EQ(outputs.size(), std::size_t(1));
|
||||
ASSERT_EQ(outputs.begin()->first, "plane_grid_sample");
|
||||
@ -88,11 +95,13 @@ public:
|
||||
static std::string PrintToStringParamName(const testing::TestParamInfo<grid_sample_test_params<TD, TG>>& info) {
|
||||
format::type fmt;
|
||||
grid_sample_test_inputs<TD, TG> p;
|
||||
std::tie(p, fmt) = info.param;
|
||||
bool is_caching_test;
|
||||
std::tie(p, fmt, is_caching_test) = info.param;
|
||||
|
||||
std::ostringstream result;
|
||||
result << "TestName=" << p.test_name << ";";
|
||||
result << "Format=" << fmt_to_str(fmt);
|
||||
result << "Format=" << fmt_to_str(fmt) << ";";
|
||||
result << "Cached=" << bool_to_str(is_caching_test) << ";";
|
||||
return result.str();
|
||||
}
|
||||
};
|
||||
@ -673,13 +682,23 @@ TEST_P(grid_sample_gpu_test_FLOAT16_FLOAT16, test) {
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_grid_sample_gpu_test_float_float,
|
||||
grid_sample_gpu_test_float_float,
|
||||
testing::Combine(testing::ValuesIn(getParamsToCheckLayouts<float, float>()),
|
||||
testing::ValuesIn(layout_formats)),
|
||||
testing::ValuesIn(layout_formats),
|
||||
testing::Values(RUN_CACHING_TEST)),
|
||||
grid_sample_gpu_test_float_float::PrintToStringParamName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_grid_sample_gpu_test_FLOAT16_FLOAT16,
|
||||
grid_sample_gpu_test_FLOAT16_FLOAT16,
|
||||
testing::Combine(testing::ValuesIn(getParamsToCheckLogic<FLOAT16, FLOAT16>()),
|
||||
testing::Values(format::bfyx)),
|
||||
testing::Values(format::bfyx),
|
||||
testing::Values(RUN_CACHING_TEST)),
|
||||
grid_sample_gpu_test_FLOAT16_FLOAT16::PrintToStringParamName);
|
||||
|
||||
#ifndef RUN_ALL_MODEL_CACHING_TESTS
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_grid_sample_gpu_test_FLOAT16_FLOAT16_cached,
|
||||
grid_sample_gpu_test_FLOAT16_FLOAT16,
|
||||
testing::Combine(testing::ValuesIn(getNearestParamsOddDimensionsOuterGrids<FLOAT16, FLOAT16>()),
|
||||
testing::Values(format::bfyx),
|
||||
testing::Values(true)),
|
||||
grid_sample_gpu_test_FLOAT16_FLOAT16::PrintToStringParamName);
|
||||
#endif
|
||||
} // namespace
|
||||
|
@ -73,24 +73,7 @@ void test_loop_gpu_basic_no_concat(bool is_caching_test)
|
||||
input_primitive_maps, output_primitive_maps, back_edges, 8)
|
||||
);
|
||||
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine, topology);
|
||||
}
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data("input", input_mem);
|
||||
network->set_input_data("trip_count", trip_count_mem);
|
||||
@ -191,25 +174,7 @@ void test_loop_gpu_basic_concat(bool is_caching_test)
|
||||
input_primitive_maps, output_primitive_maps, back_edges, trip_count)
|
||||
);
|
||||
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topology);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine, topology);
|
||||
}
|
||||
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
network->set_input_data("input", input_mem);
|
||||
network->set_input_data("trip_count", trip_count_mem);
|
||||
network->set_input_data("initial_condition", initial_condition_mem);
|
||||
@ -349,25 +314,7 @@ void test_loop_gpu_basic_concat_nested(bool is_caching_test)
|
||||
/////////////////////////////////
|
||||
// network execution
|
||||
/////////////////////////////////
|
||||
cldnn::network::ptr network;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, main_topology);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, engine);
|
||||
network = std::make_shared<cldnn::network>(ib, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
network = std::make_shared<cldnn::network>(engine, main_topology);
|
||||
}
|
||||
|
||||
cldnn::network::ptr network = get_network(engine, main_topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
network->set_input_data("input", input_mem);
|
||||
network->set_input_data("trip_count", trip_count_mem);
|
||||
network->set_input_data("initial_condition", initial_condition_mem);
|
||||
|
@ -11,7 +11,8 @@
|
||||
using namespace cldnn;
|
||||
using namespace ::tests;
|
||||
|
||||
TEST(lrn_fp32_gpu, basic) {
|
||||
template <typename T>
|
||||
void test_fp32_basic(bool is_caching_test) {
|
||||
// input : 1x16x1x1
|
||||
// Output : 1x16x1x1
|
||||
auto& engine = get_test_engine();
|
||||
@ -22,11 +23,9 @@ TEST(lrn_fp32_gpu, basic) {
|
||||
const size_t x = 1;
|
||||
|
||||
auto input = engine.allocate_memory({ data_types::f32, format::b_fs_yx_fsv16, { b, f, x, y } });
|
||||
std::vector<float> inputVals(b * f * y * x);
|
||||
std::generate(inputVals.begin(), inputVals.end(), []() {
|
||||
static float n = 0;
|
||||
return n++;
|
||||
});
|
||||
std::vector<T> inputVals(b * f * y * x);
|
||||
T n = 0;
|
||||
std::generate(inputVals.begin(), inputVals.end(), [n]() mutable { return n++; });
|
||||
|
||||
set_values(input, inputVals);
|
||||
|
||||
@ -38,11 +37,11 @@ TEST(lrn_fp32_gpu, basic) {
|
||||
float beta = 1.f;
|
||||
topology.add(lrn("lrn", input_info("input"), size, k, alpha, beta, cldnn::lrn_norm_region_across_channel));
|
||||
|
||||
network network(engine, topology);
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network.set_input_data("input", input);
|
||||
network->set_input_data("input", input);
|
||||
|
||||
auto outputs = network.execute();
|
||||
auto outputs = network->execute();
|
||||
|
||||
auto output = outputs.at("lrn").get_memory();
|
||||
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
|
||||
@ -60,7 +59,12 @@ TEST(lrn_fp32_gpu, basic) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(lrn_fp32_gpu, basic2) {
|
||||
TEST(lrn_fp32_gpu, basic) {
|
||||
test_fp32_basic<float>(false);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void test_fp32_basic2(bool is_caching_test) {
|
||||
// input : 1x16x1x1
|
||||
// Output : 1x16x1x1
|
||||
auto& engine = get_test_engine();
|
||||
@ -71,11 +75,9 @@ TEST(lrn_fp32_gpu, basic2) {
|
||||
const size_t x = 1;
|
||||
|
||||
auto input = engine.allocate_memory({ data_types::f32, format::b_fs_yx_fsv16, { b, f, x, y } });
|
||||
std::vector<float> inputVals(b * f * y * x);
|
||||
std::generate(inputVals.begin(), inputVals.end(), []() {
|
||||
static float n = 0;
|
||||
return n++;
|
||||
});
|
||||
std::vector<T> inputVals(b * f * y * x);
|
||||
T n = 0;
|
||||
std::generate(inputVals.begin(), inputVals.end(), [n]() mutable { return n++; });
|
||||
|
||||
set_values(input, inputVals);
|
||||
|
||||
@ -87,11 +89,11 @@ TEST(lrn_fp32_gpu, basic2) {
|
||||
float beta = 1.f;
|
||||
topology.add(lrn("lrn", input_info("input"), size, k, alpha, beta, cldnn::lrn_norm_region_across_channel));
|
||||
|
||||
network network(engine, topology);
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network.set_input_data("input", input);
|
||||
network->set_input_data("input", input);
|
||||
|
||||
auto outputs = network.execute();
|
||||
auto outputs = network->execute();
|
||||
|
||||
auto output = outputs.at("lrn").get_memory();
|
||||
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
|
||||
@ -109,7 +111,12 @@ TEST(lrn_fp32_gpu, basic2) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(lrn_fp16_gpu, basic1) {
|
||||
TEST(lrn_fp32_gpu, basic2) {
|
||||
test_fp32_basic2<float>(false);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void test_fp16_basic1(bool is_caching_test) {
|
||||
// input : 1x16x1x1
|
||||
// Output : 1x16x1x1
|
||||
auto& engine = get_test_engine();
|
||||
@ -120,11 +127,9 @@ TEST(lrn_fp16_gpu, basic1) {
|
||||
const size_t x = 1;
|
||||
|
||||
auto input = engine.allocate_memory({ data_types::f16, format::b_fs_yx_fsv16, { b, f, x, y } });
|
||||
std::vector<half_t> inputVals(b * f * y * x);
|
||||
std::generate(inputVals.begin(), inputVals.end(), []() {
|
||||
static float n = 0;
|
||||
return half_t(n++);
|
||||
});
|
||||
std::vector<T> inputVals(b * f * y * x);
|
||||
float n = 0;
|
||||
std::generate(inputVals.begin(), inputVals.end(), [n]() mutable { return T(n++); });
|
||||
|
||||
set_values(input, inputVals);
|
||||
|
||||
@ -136,11 +141,11 @@ TEST(lrn_fp16_gpu, basic1) {
|
||||
float beta = 1.f;
|
||||
topology.add(lrn("lrn", input_info("input"), size, k, alpha, beta, cldnn::lrn_norm_region_across_channel));
|
||||
|
||||
network network(engine, topology);
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network.set_input_data("input", input);
|
||||
network->set_input_data("input", input);
|
||||
|
||||
auto outputs = network.execute();
|
||||
auto outputs = network->execute();
|
||||
|
||||
auto output = outputs.at("lrn").get_memory();
|
||||
cldnn::mem_lock<uint16_t> output_ptr(output, get_test_stream());
|
||||
@ -158,7 +163,12 @@ TEST(lrn_fp16_gpu, basic1) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(lrn_fp32_gpu, basic3) {
|
||||
TEST(lrn_fp16_gpu, basic1) {
|
||||
test_fp16_basic1<half_t>(false);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void test_fp32_basic3(bool is_caching_test) {
|
||||
// input : 2x16x4x4
|
||||
// Output : 2x16x4x4
|
||||
auto& engine = get_test_engine();
|
||||
@ -169,11 +179,9 @@ TEST(lrn_fp32_gpu, basic3) {
|
||||
const size_t x = 4;
|
||||
|
||||
auto input = engine.allocate_memory({ data_types::f32, format::b_fs_yx_fsv16, { b, f, x, y } });
|
||||
std::vector<float> inputVals(b * f * y * x);
|
||||
std::generate(inputVals.begin(), inputVals.end(), []() {
|
||||
static float n = 0;
|
||||
return n++;
|
||||
});
|
||||
std::vector<T> inputVals(b * f * y * x);
|
||||
T n = 0;
|
||||
std::generate(inputVals.begin(), inputVals.end(), [n]() mutable { return n++; });
|
||||
|
||||
set_values(input, inputVals);
|
||||
|
||||
@ -185,11 +193,11 @@ TEST(lrn_fp32_gpu, basic3) {
|
||||
float beta = 0.75f;
|
||||
topology.add(lrn("lrn", input_info("input"), size, k, alpha, beta, cldnn::lrn_norm_region_across_channel));
|
||||
|
||||
network network(engine, topology);
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network.set_input_data("input", input);
|
||||
network->set_input_data("input", input);
|
||||
|
||||
auto outputs = network.execute();
|
||||
auto outputs = network->execute();
|
||||
|
||||
auto output = outputs.at("lrn").get_memory();
|
||||
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
|
||||
@ -249,3 +257,24 @@ TEST(lrn_fp32_gpu, basic3) {
|
||||
ASSERT_TRUE(are_equal(expected_results[i], output_ptr[i])) << i;
|
||||
}
|
||||
}
|
||||
|
||||
TEST(lrn_fp32_gpu, basic3) {
|
||||
test_fp32_basic3<float>(false);
|
||||
}
|
||||
|
||||
#ifdef RUN_ALL_MODEL_CACHING_TESTS
|
||||
TEST(lrn_fp32_gpu, basic_cached) {
|
||||
test_fp32_basic<float>(true);
|
||||
}
|
||||
|
||||
TEST(lrn_fp32_gpu, basic2_cached) {
|
||||
test_fp32_basic2<float>(true);
|
||||
}
|
||||
|
||||
TEST(lrn_fp16_gpu, basic1_cached) {
|
||||
test_fp16_basic1<half_t>(true);
|
||||
}
|
||||
#endif
|
||||
TEST(lrn_fp32_gpu, basic3_cached) {
|
||||
test_fp32_basic3<float>(true);
|
||||
}
|
||||
|
@ -191,7 +191,7 @@ void lstm_reference(VVVVF<T>& input, VVVVF<T>& hidden, VVVVF<T>& cell,
|
||||
|
||||
template<typename T>
|
||||
void generic_lstm_gemm_gpu_test(int sequence_len, int direction, int batch_size, int input_size, int hidden_size,
|
||||
bool hasBias = true, bool hasHidden = true) {
|
||||
bool hasBias, bool hasHidden, bool is_caching_test = false) {
|
||||
int min_random = -2, max_random = 2;
|
||||
|
||||
VVVVF<T> ref_input = generate_random_4d<T>(batch_size, sequence_len, 1, input_size, min_random, max_random);
|
||||
@ -244,13 +244,13 @@ void generic_lstm_gemm_gpu_test(int sequence_len, int direction, int batch_size,
|
||||
|
||||
topology.add(lstm_gemm("lstm_gemm", input_info("input"), "weights", "recurrent", hasBias ? "biases" : "", hasHidden ? "hidden" : ""));
|
||||
|
||||
network network(engine, topology);
|
||||
network.set_input_data("input", input);
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
network->set_input_data("input", input);
|
||||
if (hasHidden) {
|
||||
network.set_input_data("hidden", hidden);
|
||||
network->set_input_data("hidden", hidden);
|
||||
}
|
||||
|
||||
auto outputs = network.execute();
|
||||
auto outputs = network->execute();
|
||||
ASSERT_EQ(outputs.size(), size_t(1));
|
||||
|
||||
auto output = outputs.begin()->second.get_memory();
|
||||
@ -264,8 +264,8 @@ void generic_lstm_gemm_gpu_test(int sequence_len, int direction, int batch_size,
|
||||
|
||||
template<typename T>
|
||||
void generic_lstm_elt_gpu_test(int /* sequence_len */, int direction, int batch_size,
|
||||
int /* input_size */, int hidden_size, bool hasCell = true,
|
||||
T clip_threshold = (T)0.f, bool input_forget = false) {
|
||||
int /* input_size */, int hidden_size, bool hasCell,
|
||||
T clip_threshold, bool input_forget, bool is_caching_test = false) {
|
||||
// tempGEMM = [ 1, direction, batch, 4 * hidden_size ] input
|
||||
// cell = [ 1, direction, batch, hidden_size ] optional
|
||||
// output = [ 2, direction, batch, hidden_size ] output concat[hidden, cell]
|
||||
@ -307,13 +307,13 @@ void generic_lstm_elt_gpu_test(int /* sequence_len */, int direction, int batch_
|
||||
}
|
||||
topology.add(lstm_elt("lstm_elt", input_info("tempGEMM"), hasCell ? "cell" : "", clip_threshold, input_forget));
|
||||
|
||||
network network(engine, topology);
|
||||
network.set_input_data("tempGEMM", tempGEMM);
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
network->set_input_data("tempGEMM", tempGEMM);
|
||||
if (hasCell) {
|
||||
network.set_input_data("cell", cell);
|
||||
network->set_input_data("cell", cell);
|
||||
}
|
||||
|
||||
auto outputs = network.execute();
|
||||
auto outputs = network->execute();
|
||||
ASSERT_EQ(outputs.size(), size_t(1));
|
||||
|
||||
auto output = outputs.begin()->second.get_memory();
|
||||
@ -390,7 +390,7 @@ void generate_lstm_topology(topology& t, memory::ptr input, memory::ptr hidden,
|
||||
|
||||
template<typename T>
|
||||
void generic_lstm_custom_gpu_test(int sequence_len, int direction, int batch_size, int input_size, int hidden_size,
|
||||
bool hasBias = true, bool hasInitialHidden = true, bool hasInitialCell = true) {
|
||||
bool hasBias, bool hasInitialHidden, bool hasInitialCell, bool is_caching_test = false) {
|
||||
std::cout << "Input Size = " << input_size << " Hidden Size = " << hidden_size << " Sequence Len = " << sequence_len << " Batch Size = " << batch_size << std::endl;
|
||||
int min_random = -2, max_random = 2;
|
||||
VVVVF<T> ref_input = generate_random_4d<T>(batch_size, sequence_len, 1, input_size, min_random, max_random);
|
||||
@ -430,11 +430,11 @@ void generic_lstm_custom_gpu_test(int sequence_len, int direction, int batch_siz
|
||||
generate_lstm_topology(topology, input, hidden, cell, weights, recurrent, biases, sequence_len,
|
||||
hasBias, hasInitialHidden, hasInitialCell);
|
||||
|
||||
network network(engine, topology);
|
||||
network.set_input_data("input", input);
|
||||
if (hasInitialHidden) network.set_input_data("hidden", hidden);
|
||||
if (hasInitialCell) network.set_input_data("cell", cell);
|
||||
auto outputs = network.execute();
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
network->set_input_data("input", input);
|
||||
if (hasInitialHidden) network->set_input_data("hidden", hidden);
|
||||
if (hasInitialCell) network->set_input_data("cell", cell);
|
||||
auto outputs = network->execute();
|
||||
|
||||
ASSERT_EQ(outputs.size(), size_t(1));
|
||||
size_t output_size = outputs.begin()->second.get_memory()->size() / sizeof(T);
|
||||
@ -457,8 +457,8 @@ void generic_lstm_custom_gpu_test(int sequence_len, int direction, int batch_siz
|
||||
// -------------------------------------------------------
|
||||
template<typename T>
|
||||
void generic_lstm_gpu_test(int layers, int sequence_len, int direction, int batch_size, int input_size, int hidden_size,
|
||||
bool hasBias = true, bool hasInitialHidden = true, bool hasInitialCell = true,
|
||||
T clip_threshold = 0, bool input_forget = false) {
|
||||
bool hasBias, bool hasInitialHidden, bool hasInitialCell,
|
||||
T clip_threshold, bool input_forget, bool is_caching_test = false) {
|
||||
std::cout << "Layers = " << layers << " Input Size = " << input_size << " Hidden Size = " << hidden_size
|
||||
<< " Sequence Len = " << sequence_len << " Direction = " << direction << " Batch Size = " << batch_size << std::endl;
|
||||
int min_random = -2, max_random = 2;
|
||||
@ -596,14 +596,14 @@ void generic_lstm_gpu_test(int layers, int sequence_len, int direction, int batc
|
||||
prev_lstm_id = lstm_id;
|
||||
}
|
||||
|
||||
network network(engine, topology);
|
||||
network.set_input_data("input", input);
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
network->set_input_data("input", input);
|
||||
for (int i = 0; i < layers; ++i) {
|
||||
std::string sid = get_string_id(i);
|
||||
if (hasInitialHidden) network.set_input_data("hidden" + sid, hidden[i]);
|
||||
if (hasInitialCell) network.set_input_data("cell" + sid, cell[i]);
|
||||
if (hasInitialHidden) network->set_input_data("hidden" + sid, hidden[i]);
|
||||
if (hasInitialCell) network->set_input_data("cell" + sid, cell[i]);
|
||||
}
|
||||
auto outputs = network.execute();
|
||||
auto outputs = network->execute();
|
||||
{
|
||||
ASSERT_EQ(outputs.size(), size_t(1));
|
||||
size_t output_size = outputs.begin()->second.get_memory()->size() / sizeof(T);
|
||||
@ -637,7 +637,7 @@ void generic_lstm_gpu_test(int layers, int sequence_len, int direction, int batc
|
||||
|
||||
// -------------------------------------------------------
|
||||
template<typename T>
|
||||
void lstm_gpu_output_test(const lstm_output_selection& output_selection, int directions) {
|
||||
void lstm_gpu_output_test(const lstm_output_selection& output_selection, int directions, bool is_caching_test = false) {
|
||||
int layers = 1;
|
||||
int sequence_len = 4;
|
||||
int batch_size = 3;
|
||||
@ -722,12 +722,12 @@ void lstm_gpu_output_test(const lstm_output_selection& output_selection, int dir
|
||||
topology.add(crop("crop:last_cell", input_info("lstm"), cell_tensor, tensor{0, concatenation_len - 1, 0, 0}));
|
||||
}
|
||||
|
||||
network network(engine, topology);
|
||||
network.set_input_data("input", input);
|
||||
network.set_input_data("hidden", hidden);
|
||||
network.set_input_data("cell", cell);
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
network->set_input_data("input", input);
|
||||
network->set_input_data("hidden", hidden);
|
||||
network->set_input_data("cell", cell);
|
||||
|
||||
auto outputs = network.execute();
|
||||
auto outputs = network->execute();
|
||||
uint32_t ref_num_output_primitives = 1; // Output will return atleast 1 primitive
|
||||
|
||||
if (emit_last_cell) {
|
||||
@ -798,7 +798,7 @@ void lstm_gpu_output_test(const lstm_output_selection& output_selection, int dir
|
||||
|
||||
// -------------------------------------------------------
|
||||
template<typename T>
|
||||
void lstm_gpu_format_test(const cldnn::format& format, int directions) {
|
||||
void lstm_gpu_format_test(const cldnn::format& format, int directions, bool is_caching_test = false) {
|
||||
int layers = 1;
|
||||
int sequence_len = 6;
|
||||
int batch_size = 3;
|
||||
@ -886,13 +886,14 @@ void lstm_gpu_format_test(const cldnn::format& format, int directions) {
|
||||
topology.add(crop("crop:last_cell", input_info("lstm"), cell_tensor, tensor{0, concatenation_len - 1, 0, 0}));
|
||||
}
|
||||
|
||||
network network(engine, topology);
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
std::map<primitive_id, network_output> outputs;
|
||||
|
||||
network.set_input_data("input", input);
|
||||
network.set_input_data("hidden", hidden);
|
||||
network.set_input_data("cell", cell);
|
||||
outputs = network.execute();
|
||||
network->set_input_data("input", input);
|
||||
network->set_input_data("hidden", hidden);
|
||||
network->set_input_data("cell", cell);
|
||||
outputs = network->execute();
|
||||
|
||||
uint32_t ref_num_output_primitives = 1; // Output will return atleast 1 primitive
|
||||
|
||||
@ -979,7 +980,7 @@ void lstm_gpu_format_test(const cldnn::format& format, int directions) {
|
||||
|
||||
// -------------------------------------------------------
|
||||
template<typename T>
|
||||
void lstm_gpu_users_test() {
|
||||
void lstm_gpu_users_test(bool is_caching_test = false) {
|
||||
int sequence_len = 2;
|
||||
int batch_size = 1;
|
||||
int input_size = 1;
|
||||
@ -1052,13 +1053,14 @@ void lstm_gpu_users_test() {
|
||||
std::vector<input_info> output_ids_offsets { input_info("lstm"), input_info("hidden") };
|
||||
topology.add(concatenation("concatenation", output_ids_offsets, 1));
|
||||
|
||||
network network(engine, topology);
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
std::map<primitive_id, network_output> outputs;
|
||||
|
||||
network.set_input_data("input", input);
|
||||
network.set_input_data("hidden", hidden);
|
||||
network.set_input_data("cell", cell);
|
||||
outputs = network.execute();
|
||||
network->set_input_data("input", input);
|
||||
network->set_input_data("hidden", hidden);
|
||||
network->set_input_data("cell", cell);
|
||||
outputs = network->execute();
|
||||
|
||||
// check if the number of returned primitives match the expected number of output primitives
|
||||
ASSERT_EQ(size_t(1), outputs.size());
|
||||
@ -1081,9 +1083,9 @@ void lstm_gpu_users_test() {
|
||||
template<typename T>
|
||||
void lstm_gpu_concatenated_input_test(int layers, int sequence_len, int direction,
|
||||
int batch_size, int input_size, int hidden_size,
|
||||
bool has_bias = true, bool has_initial_hidden = true,
|
||||
bool has_initial_cell = true, float clip_threshold = 0,
|
||||
bool input_forget = false)
|
||||
bool has_bias, bool has_initial_hidden,
|
||||
bool has_initial_cell, float clip_threshold,
|
||||
bool input_forget, bool is_caching_test = false)
|
||||
{
|
||||
std::cout << "Layers = " << layers << " Input Size = " << input_size << " Hidden Size = " << hidden_size
|
||||
<< " Sequence Len = " << sequence_len << " Direction = " << direction << " Batch Size = " << batch_size << std::endl;
|
||||
@ -1210,14 +1212,14 @@ void lstm_gpu_concatenated_input_test(int layers, int sequence_len, int directio
|
||||
prev_node_id = output_crop_id;
|
||||
}
|
||||
|
||||
network network(engine, topology);
|
||||
network.set_input_data("input", input);
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
network->set_input_data("input", input);
|
||||
for (int i = 0; i < layers; ++i) {
|
||||
std::string sid = get_string_id(i);
|
||||
if (has_initial_hidden) network.set_input_data("hidden" + sid, hidden[i]);
|
||||
if (has_initial_cell) network.set_input_data("cell" + sid, cell[i]);
|
||||
if (has_initial_hidden) network->set_input_data("hidden" + sid, hidden[i]);
|
||||
if (has_initial_cell) network->set_input_data("cell" + sid, cell[i]);
|
||||
}
|
||||
auto outputs = network.execute();
|
||||
auto outputs = network->execute();
|
||||
{
|
||||
ASSERT_EQ(outputs.size(), size_t(1));
|
||||
size_t output_size = outputs.begin()->second.get_memory()->size() / sizeof(T);
|
||||
@ -1254,7 +1256,7 @@ void lstm_gpu_concatenated_input_test(int layers, int sequence_len, int directio
|
||||
template<typename T>
|
||||
void lstm_gpu_chain_test(int batch_size, int input_size, int hidden_size,
|
||||
int directions, size_t layers, size_t chains, int sequence_len,
|
||||
const lstm_output_selection& output_selection)
|
||||
const lstm_output_selection& output_selection, bool is_caching_test = false)
|
||||
{
|
||||
int min_random = -2, max_random = 2;
|
||||
bool has_bias = false;
|
||||
@ -1553,15 +1555,15 @@ void lstm_gpu_chain_test(int batch_size, int input_size, int hidden_size,
|
||||
}
|
||||
|
||||
// Creating network out of the above designed topology
|
||||
cldnn::network network(engine, topology);
|
||||
network.set_input_data("input", input);
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
network->set_input_data("input", input);
|
||||
for (size_t layer = 0; layer < layers; layer++) {
|
||||
std::string sid = get_string_id(layer);
|
||||
if (has_initial_hidden) network.set_input_data("hidden:000:" + sid, hidden[0][layer]); // 0 is the chain link index
|
||||
if (has_initial_cell) network.set_input_data("cell:000:" + sid, cell[0][layer]); // 0 is the chain link index
|
||||
if (has_initial_hidden) network->set_input_data("hidden:000:" + sid, hidden[0][layer]); // 0 is the chain link index
|
||||
if (has_initial_cell) network->set_input_data("cell:000:" + sid, cell[0][layer]); // 0 is the chain link index
|
||||
}
|
||||
|
||||
auto outputs = network.execute();
|
||||
auto outputs = network->execute();
|
||||
for (auto itr = outputs.begin(); itr != outputs.end(); itr++)
|
||||
{
|
||||
auto output_layout = itr->second.get_memory()->get_layout();
|
||||
@ -1666,23 +1668,23 @@ TEST(lstm_gemm_gpu, gemv_bfyx_1x64_lstm_gemm_no_hidden_bias_f32) {
|
||||
|
||||
// LSTM ELT Tests
|
||||
TEST(DISABLED_lstm_elt_gpu, generic_lstm_elt_test_clip_f32) {
|
||||
generic_lstm_elt_gpu_test<float>(1, 1, 4, 6, 3, true, 0.3f);
|
||||
generic_lstm_elt_gpu_test<float>(1, 1, 4, 6, 3, true, 0.3f, false);
|
||||
}
|
||||
|
||||
TEST(lstm_elt_gpu, generic_lstm_elt_test_input_forget_f32) {
|
||||
generic_lstm_elt_gpu_test<float>(1, 1, 4, 6, 3, true, 0.f, 1);
|
||||
generic_lstm_elt_gpu_test<float>(1, 1, 4, 6, 3, true, 0.f, true);
|
||||
}
|
||||
|
||||
TEST(DISABLED_lstm_elt_gpu, generic_lstm_elt_test_clip_input_forget_f32) {
|
||||
generic_lstm_elt_gpu_test<float>(1, 1, 4, 6, 3, true, 0.5f, 1);
|
||||
generic_lstm_elt_gpu_test<float>(1, 1, 4, 6, 3, true, 0.5f, true);
|
||||
}
|
||||
|
||||
TEST(lstm_elt_gpu, generic_lstm_elt_test_f32) {
|
||||
generic_lstm_elt_gpu_test<float>(1, 1, 4, 6, 3, true);
|
||||
generic_lstm_elt_gpu_test<float>(1, 1, 4, 6, 3, true, 0.f, false);
|
||||
}
|
||||
|
||||
TEST(lstm_elt_gpu, generic_lstm_elt_no_cell_f32) {
|
||||
generic_lstm_elt_gpu_test<float>(1, 1, 4, 6, 3, false);
|
||||
generic_lstm_elt_gpu_test<float>(1, 1, 4, 6, 3, false, 0.f, false);
|
||||
}
|
||||
|
||||
TEST(lstm_custom_gpu, generic_lstm_custom_f32) {
|
||||
@ -1720,35 +1722,35 @@ TEST(lstm_custom_gpu, generic_lstm_custom_no_bias_hidden_cell_f32) {
|
||||
// generic_lstm_gpu_test paramters:
|
||||
// layers, sequence, dir, batch, input, hidden, bias, initial_h, initial_cell, threshold, coupled_input_forget
|
||||
TEST(lstm_gpu, generic_lstm_f32) {
|
||||
generic_lstm_gpu_test<float>(1, 7, 1, 3, 3, 2, true, true, true);
|
||||
generic_lstm_gpu_test<float>(1, 7, 1, 3, 3, 2, true, true, true, 0, false);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_no_bias_f32) {
|
||||
generic_lstm_gpu_test<float>(1, 7, 1, 3, 3, 2, false, true, true);
|
||||
generic_lstm_gpu_test<float>(1, 7, 1, 3, 3, 2, false, true, true, 0, false);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_no_hidden_f32) {
|
||||
generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, true, false, true);
|
||||
generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, true, false, true, 0, false);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_no_bias_hidden_f32) {
|
||||
generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, false, false, true);
|
||||
generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, false, false, true, 0, false);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_no_cell_f32) {
|
||||
generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, true, true, false);
|
||||
generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, true, true, false, 0, false);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_no_bias_cell_f32) {
|
||||
generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, false, true, false);
|
||||
generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, false, true, false, 0, false);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_no_hidden_cell_f32) {
|
||||
generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, true, false, false);
|
||||
generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, true, false, false, 0, false);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_no_bias_hidden_cell_f32) {
|
||||
generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, false, false, false);
|
||||
generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, false, false, false, 0, false);
|
||||
}
|
||||
|
||||
TEST(DISABLED_lstm_gpu, generic_lstm_clip_f32) {
|
||||
@ -1765,46 +1767,46 @@ TEST(DISABLED_lstm_gpu, generic_lstm_clip_input_forget_f32) {
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_offset_order_ifoz_f32) {
|
||||
default_offset_type = lstm_weights_order::ifoz;
|
||||
generic_lstm_gpu_test<float>(1, 7, 1, 3, 3, 2, true, true, true);
|
||||
generic_lstm_gpu_test<float>(1, 7, 1, 3, 3, 2, true, true, true, 0, false);
|
||||
default_offset_type = lstm_weights_order::iofz;
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_canonical_f32) {
|
||||
generic_lstm_gpu_test<float>(1, 1, 1, 1, 1, 1, true, true, true);
|
||||
generic_lstm_gpu_test<float>(1, 1, 1, 1, 1, 1, true, true, true, 0, false);
|
||||
}
|
||||
|
||||
// bidirectional support
|
||||
TEST(lstm_gpu, generic_lstm_bi_f32) {
|
||||
generic_lstm_gpu_test<float>(1, 7, 2, 2, 3, 4, false, false, false);
|
||||
generic_lstm_gpu_test<float>(1, 7, 2, 2, 3, 4, false, false, false, 0, false);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_bi_bias_f32) {
|
||||
generic_lstm_gpu_test<float>(1, 7, 2, 2, 3, 4, true, false, false);
|
||||
generic_lstm_gpu_test<float>(1, 7, 2, 2, 3, 4, true, false, false, 0, false);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_bi_bias_hidden_f32) {
|
||||
generic_lstm_gpu_test<float>(1, 7, 2, 2, 3, 4, true, true, false);
|
||||
generic_lstm_gpu_test<float>(1, 7, 2, 2, 3, 4, true, true, false, 0, false);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_bi_bias_hidden_cell_f32) {
|
||||
generic_lstm_gpu_test<float>(1, 7, 2, 2, 3, 4, true, true, true);
|
||||
generic_lstm_gpu_test<float>(1, 7, 2, 2, 3, 4, true, true, true, 0, false);
|
||||
}
|
||||
|
||||
// multi-layer support
|
||||
TEST(lstm_gpu, generic_lstm_stacked_no_seq_f32) {
|
||||
generic_lstm_gpu_test<float>(4, 1, 1, 3, 3, 2, true, true, true);
|
||||
generic_lstm_gpu_test<float>(4, 1, 1, 3, 3, 2, true, true, true, 0, false);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_stacked_seq_f32) {
|
||||
generic_lstm_gpu_test<float>(4, 7, 1, 3, 3, 2, true, true, true);
|
||||
generic_lstm_gpu_test<float>(4, 7, 1, 3, 3, 2, true, true, true, 0, false);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_stacked_bi_f32) {
|
||||
generic_lstm_gpu_test<float>(4, 7, 2, 3, 3, 2, true, true, true);
|
||||
generic_lstm_gpu_test<float>(4, 7, 2, 3, 3, 2, true, true, true, 0, false);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_stacked_seq_bi_f32) {
|
||||
generic_lstm_gpu_test<float>(4, 7, 2, 3, 3, 2, true, true, true);
|
||||
generic_lstm_gpu_test<float>(4, 7, 2, 3, 3, 2, true, true, true, 0, false);
|
||||
}
|
||||
|
||||
// optional outputs support
|
||||
@ -1864,11 +1866,11 @@ TEST(lstm_gpu, lstm_users_f32) {
|
||||
|
||||
// Test for LSTM with concatenated input
|
||||
TEST(lstm_gpu, generic_lstm_concatenated_input) {
|
||||
lstm_gpu_concatenated_input_test<float>(1, 2, 2, 1, 1, 1, true, true, true);
|
||||
lstm_gpu_concatenated_input_test<float>(1, 2, 2, 1, 1, 1, true, true, true, 0, false);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_concatenated_input_multi_layer) {
|
||||
lstm_gpu_concatenated_input_test<float>(5, 5, 2, 1, 1, 4, true, true, true);
|
||||
lstm_gpu_concatenated_input_test<float>(5, 5, 2, 1, 1, 4, true, true, true, 0, false);
|
||||
}
|
||||
|
||||
// test for LSTM with chain and stack (multilayer)
|
||||
@ -1938,55 +1940,55 @@ TEST(lstm_gemm_gpu, generic_lstm_gemm_no_hidden_bias_f16) {
|
||||
}
|
||||
|
||||
TEST(DISABLED_lstm_elt_gpu, generic_lstm_elt_test_clip_f16) {
|
||||
generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, true, 0.3f);
|
||||
generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, true, 0.3f, false);
|
||||
}
|
||||
|
||||
TEST(lstm_elt_gpu, generic_lstm_elt_test_input_forget_f16) {
|
||||
generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, true, 0.f, 1);
|
||||
generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, true, 0.f, true);
|
||||
}
|
||||
|
||||
TEST(DISABLED_lstm_elt_gpu, generic_lstm_elt_test_clip_input_forget_f16) {
|
||||
generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, true, 0.5f, 1);
|
||||
generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, true, 0.5f, true);
|
||||
}
|
||||
|
||||
TEST(lstm_elt_gpu, generic_lstm_elt_test_f16) {
|
||||
generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, true);
|
||||
generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, true, 0.f, false);
|
||||
}
|
||||
|
||||
TEST(lstm_elt_gpu, generic_lstm_elt_no_cell_f16) {
|
||||
generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, false);
|
||||
generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, false, 0.f, false);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_f16) {
|
||||
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 3, 3, 2, true, true, true);
|
||||
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 3, 3, 2, true, true, true, 0, false);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_no_bias_f16) {
|
||||
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 3, 3, 2, false, true, true);
|
||||
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 3, 3, 2, false, true, true, 0, false);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_no_hidden_f16) {
|
||||
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, true, false, true);
|
||||
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, true, false, true, 0, false);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_no_bias_hidden_f16) {
|
||||
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, false, false, true);
|
||||
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, false, false, true, 0, false);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_no_cell_f16) {
|
||||
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, true, true, false);
|
||||
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, true, true, false, 0, false);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_no_bias_cell_f16) {
|
||||
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, false, true, false);
|
||||
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, false, true, false, 0, false);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_no_hidden_cell_f16) {
|
||||
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, true, false, false);
|
||||
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, true, false, false, 0, false);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_no_bias_hidden_cell_f16) {
|
||||
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, false, false, false);
|
||||
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, false, false, false, 0, false);
|
||||
}
|
||||
|
||||
TEST(DISABLED_lstm_gpu, generic_lstm_clip_f16) {
|
||||
@ -2003,37 +2005,396 @@ TEST(DISABLED_lstm_gpu, generic_lstm_clip_input_forget_f16) {
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_offset_order_ifoz_f16) {
|
||||
default_offset_type = lstm_weights_order::ifoz;
|
||||
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 3, 3, 2, true, true, true);
|
||||
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 3, 3, 2, true, true, true, 0, false);
|
||||
default_offset_type = lstm_weights_order::iofz;
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_canonical_f16) {
|
||||
generic_lstm_gpu_test<FLOAT16>(1, 1, 1, 1, 1, 1, true, true, true);
|
||||
generic_lstm_gpu_test<FLOAT16>(1, 1, 1, 1, 1, 1, true, true, true, 0, false);
|
||||
}
|
||||
|
||||
// bidirectional support
|
||||
TEST(lstm_gpu, generic_lstm_bi_bias_f16) {
|
||||
generic_lstm_gpu_test<FLOAT16>(1, 7, 2, 2, 3, 4, true, false, false);
|
||||
generic_lstm_gpu_test<FLOAT16>(1, 7, 2, 2, 3, 4, true, false, false, 0, false);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_bi_bias_hidden_f16) {
|
||||
generic_lstm_gpu_test<FLOAT16>(1, 7, 2, 2, 3, 4, true, true, false);
|
||||
generic_lstm_gpu_test<FLOAT16>(1, 7, 2, 2, 3, 4, true, true, false, 0, false);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_bi_bias_hidden_cell_f16) {
|
||||
generic_lstm_gpu_test<FLOAT16>(1, 7, 2, 2, 3, 4, true, true, true);
|
||||
generic_lstm_gpu_test<FLOAT16>(1, 7, 2, 2, 3, 4, true, true, true, 0, false);
|
||||
}
|
||||
|
||||
// multi-layer support
|
||||
TEST(lstm_gpu, generic_lstm_stacked_seq_f16) {
|
||||
generic_lstm_gpu_test<FLOAT16>(4, 7, 1, 3, 3, 2, true, true, true);
|
||||
generic_lstm_gpu_test<FLOAT16>(4, 7, 1, 3, 3, 2, true, true, true, 0, false);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_stacked_bi_f16) {
|
||||
generic_lstm_gpu_test<FLOAT16>(4, 7, 2, 3, 3, 2, true, true, true);
|
||||
generic_lstm_gpu_test<FLOAT16>(4, 7, 2, 3, 3, 2, true, true, true, 0, false);
|
||||
}
|
||||
|
||||
// TODO: Add tests for the following:
|
||||
// integration testing using multi-layer and chained LSTMs
|
||||
// LSTMs single input
|
||||
// optional activation list
|
||||
|
||||
#ifdef RUN_ALL_MODEL_CACHING_TESTS
|
||||
TEST(lstm_gemm_gpu, generic_lstm_gemm_test_f32_cached) {
|
||||
generic_lstm_gemm_gpu_test<float>(1, 1, 3, 6, 2, true, true, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gemm_gpu, generic_lstm_gemm_no_bias_f32_cached) {
|
||||
generic_lstm_gemm_gpu_test<float>(1, 1, 3, 6, 2, false, true, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gemm_gpu, generic_lstm_gemm_no_hidden_f32_cached) {
|
||||
generic_lstm_gemm_gpu_test<float>(1, 1, 3, 6, 2, true, false, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gemm_gpu, generic_lstm_gemm_no_hidden_bias_f32_cached) {
|
||||
generic_lstm_gemm_gpu_test<float>(1, 1, 3, 6, 2, false, false, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gemm_gpu, gemv_bfyx_1x64_lstm_gemm_test_f32_cached) {
|
||||
generic_lstm_gemm_gpu_test<float>(5, 1, 1, 1024, 1024, true, true, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gemm_gpu, gemv_bfyx_1x64_lstm_gemm_no_bias_f32_cached) {
|
||||
generic_lstm_gemm_gpu_test<float>(1, 1, 1, 256, 2, false, true, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gemm_gpu, gemv_bfyx_1x64_lstm_gemm_no_hidden_f32_cached) {
|
||||
generic_lstm_gemm_gpu_test<float>(1, 1, 1, 64, 2, true, false, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gemm_gpu, gemv_bfyx_1x64_lstm_gemm_no_hidden_bias_f32_cached) {
|
||||
generic_lstm_gemm_gpu_test<float>(1, 1, 1, 64, 2, false, false, true);
|
||||
}
|
||||
|
||||
TEST(DISABLED_lstm_elt_gpu, generic_lstm_elt_test_clip_f32_cached) {
|
||||
generic_lstm_elt_gpu_test<float>(1, 1, 4, 6, 3, true, 0.3f, false, true);
|
||||
}
|
||||
|
||||
TEST(lstm_elt_gpu, generic_lstm_elt_test_input_forget_f32_cached) {
|
||||
generic_lstm_elt_gpu_test<float>(1, 1, 4, 6, 3, true, 0.f, true, true);
|
||||
}
|
||||
|
||||
TEST(DISABLED_lstm_elt_gpu, generic_lstm_elt_test_clip_input_forget_f32_cached) {
|
||||
generic_lstm_elt_gpu_test<float>(1, 1, 4, 6, 3, true, 0.5f, true, true);
|
||||
}
|
||||
|
||||
TEST(lstm_elt_gpu, generic_lstm_elt_test_f32_cached) {
|
||||
generic_lstm_elt_gpu_test<float>(1, 1, 4, 6, 3, true, 0.f, false, true);
|
||||
}
|
||||
|
||||
TEST(lstm_elt_gpu, generic_lstm_elt_no_cell_f32_cached) {
|
||||
generic_lstm_elt_gpu_test<float>(1, 1, 4, 6, 3, false, 0.f, false, true);
|
||||
}
|
||||
|
||||
TEST(lstm_custom_gpu, generic_lstm_custom_f32_cached) {
|
||||
generic_lstm_custom_gpu_test<float>(3, 1, 3, 3, 2, true, true, true, true);
|
||||
}
|
||||
|
||||
TEST(lstm_custom_gpu, generic_lstm_custom_no_biasf32_cached) {
|
||||
generic_lstm_custom_gpu_test<float>(3, 1, 3, 3, 2, false, true, true, true);
|
||||
}
|
||||
|
||||
TEST(lstm_custom_gpu, generic_lstm_custom_no_hidden_f32_cached) {
|
||||
generic_lstm_custom_gpu_test<float>(3, 1, 3, 3, 2, true, false, true, true);
|
||||
}
|
||||
|
||||
TEST(lstm_custom_gpu, generic_lstm_custom_no_bias_hidden_f32_cached) {
|
||||
generic_lstm_custom_gpu_test<float>(3, 1, 3, 3, 2, false, false, true, true);
|
||||
}
|
||||
|
||||
TEST(lstm_custom_gpu, generic_lstm_custom_no_cell_f32_cached) {
|
||||
generic_lstm_custom_gpu_test<float>(3, 1, 3, 3, 2, true, true, false, true);
|
||||
}
|
||||
|
||||
TEST(lstm_custom_gpu, generic_lstm_custom_no_bias_cell_f32_cached) {
|
||||
generic_lstm_custom_gpu_test<float>(3, 1, 3, 3, 2, false, true, false, true);
|
||||
}
|
||||
|
||||
TEST(lstm_custom_gpu, generic_lstm_custom_no_hidden_cell_f32_cached) {
|
||||
generic_lstm_custom_gpu_test<float>(3, 1, 3, 3, 2, true, false, false, true);
|
||||
}
|
||||
|
||||
TEST(lstm_custom_gpu, generic_lstm_custom_no_bias_hidden_cell_f32_cached) {
|
||||
generic_lstm_custom_gpu_test<float>(3, 1, 3, 3, 2, false, false, false, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_f32_cached) {
|
||||
generic_lstm_gpu_test<float>(1, 7, 1, 3, 3, 2, true, true, true, 0, false, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_no_bias_f32_cached) {
|
||||
generic_lstm_gpu_test<float>(1, 7, 1, 3, 3, 2, false, true, true, 0, false, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_no_hidden_f32_cached) {
|
||||
generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, true, false, true, 0, false, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_no_bias_hidden_f32_cached) {
|
||||
generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, false, false, true, 0, false, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_no_cell_f32_cached) {
|
||||
generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, true, true, false, 0, false, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_no_bias_cell_f32_cached) {
|
||||
generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, false, true, false, 0, false, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_no_hidden_cell_f32_cached) {
|
||||
generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, true, false, false, 0, false, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_no_bias_hidden_cell_f32_cached) {
|
||||
generic_lstm_gpu_test<float>(1, 7, 1, 5, 4, 3, false, false, false, 0, false, true);
|
||||
}
|
||||
|
||||
TEST(DISABLED_lstm_gpu, generic_lstm_clip_f32_cached) {
|
||||
generic_lstm_gpu_test<float>(1, 7, 1, 3, 3, 2, true, true, true, 0.3f, 0, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_input_forget_f32_cached) {
|
||||
generic_lstm_gpu_test<float>(1, 7, 1, 3, 3, 2, true, true, true, 0.f, 1, true);
|
||||
}
|
||||
|
||||
TEST(DISABLED_lstm_gpu, generic_lstm_clip_input_forget_f32_cached) {
|
||||
generic_lstm_gpu_test<float>(1, 7, 1, 3, 3, 2, true, true, true, 0.3f, 1, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_offset_order_ifoz_f32_cached) {
|
||||
default_offset_type = lstm_weights_order::ifoz;
|
||||
generic_lstm_gpu_test<float>(1, 7, 1, 3, 3, 2, true, true, true, 0, false, true);
|
||||
default_offset_type = lstm_weights_order::iofz;
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_canonical_f32_cached) {
|
||||
generic_lstm_gpu_test<float>(1, 1, 1, 1, 1, 1, true, true, true, 0, false, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_bi_f32_cached) {
|
||||
generic_lstm_gpu_test<float>(1, 7, 2, 2, 3, 4, false, false, false, 0, false, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_bi_bias_f32_cached) {
|
||||
generic_lstm_gpu_test<float>(1, 7, 2, 2, 3, 4, true, false, false, 0, false, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_bi_bias_hidden_f32_cached) {
|
||||
generic_lstm_gpu_test<float>(1, 7, 2, 2, 3, 4, true, true, false, 0, false, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_bi_bias_hidden_cell_f32_cached) {
|
||||
generic_lstm_gpu_test<float>(1, 7, 2, 2, 3, 4, true, true, true, 0, false, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_stacked_no_seq_f32_cached) {
|
||||
generic_lstm_gpu_test<float>(4, 1, 1, 3, 3, 2, true, true, true, 0, false, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_stacked_seq_f32_cached) {
|
||||
generic_lstm_gpu_test<float>(4, 7, 1, 3, 3, 2, true, true, true, 0, false, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_stacked_bi_f32_cached) {
|
||||
generic_lstm_gpu_test<float>(4, 7, 2, 3, 3, 2, true, true, true, 0, false, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_stacked_seq_bi_f32_cached) {
|
||||
generic_lstm_gpu_test<float>(4, 7, 2, 3, 3, 2, true, true, true, 0, false, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, output_test_sequence_f32_cached) {
|
||||
lstm_gpu_output_test<float>(lstm_output_selection::sequence, 1, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, output_test_hidden_f32_cached) {
|
||||
lstm_gpu_output_test<float>(lstm_output_selection::hidden, 1, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, output_test_hidden_cell_f32_cached) {
|
||||
lstm_gpu_output_test<float>(lstm_output_selection::hidden_cell, 1, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, output_test_sequence_cell_f32_cached) {
|
||||
lstm_gpu_output_test<float>(lstm_output_selection::sequence_cell, 1, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, output_test_sequence_bi_f32_cached) {
|
||||
lstm_gpu_output_test<float>(lstm_output_selection::sequence, 2, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, output_test_hidden_bi_f32_cached) {
|
||||
lstm_gpu_output_test<float>(lstm_output_selection::hidden, 2, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, output_test_hidden_cell_bi_f32_cached) {
|
||||
lstm_gpu_output_test<float>(lstm_output_selection::hidden_cell, 2, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, output_test_sequence_cell_bi_f32_cached) {
|
||||
lstm_gpu_output_test<float>(lstm_output_selection::sequence_cell, 2, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, lstm_gpu_format_bfyx_f32_cached) {
|
||||
lstm_gpu_format_test<float>(cldnn::format::bfyx, 1, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, lstm_gpu_format_bfyx_bi_f32_cached) {
|
||||
lstm_gpu_format_test<float>(cldnn::format::bfyx, 2, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, lstm_gpu_format_fyxb_f32_cached) {
|
||||
lstm_gpu_format_test<float>(cldnn::format::fyxb, 1, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, lstm_gpu_format_fyxb_bi_f32_cached) {
|
||||
lstm_gpu_format_test<float>(cldnn::format::fyxb, 2, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, lstm_users_f32_cached) {
|
||||
lstm_gpu_users_test<float>(true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_concatenated_input_cached) {
|
||||
lstm_gpu_concatenated_input_test<float>(1, 2, 2, 1, 1, 1, true, true, true, 0, false, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_concatenated_input_multi_layer_cached) {
|
||||
lstm_gpu_concatenated_input_test<float>(5, 5, 2, 1, 1, 4, true, true, true, 0, false, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_chained_unidirectional_f32_cached) {
|
||||
lstm_gpu_chain_test<float>(1, 2, 4, 1, 1, 2, 1, lstm_output_selection::sequence_cell, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_chained_bidirectional_f32_cached) {
|
||||
lstm_gpu_chain_test<float>(1, 2, 4, 2, 1, 1, 1, lstm_output_selection::sequence_cell, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_chained_no_stack_bidirectional_f32_cached) {
|
||||
lstm_gpu_chain_test<float>(2, 2, 4, 2, 1, 2, 5, lstm_output_selection::sequence_cell, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_chained_stacked_bidirectional_f32_cached) {
|
||||
lstm_gpu_chain_test<float>(2, 2, 4, 2, 4, 2, 5, lstm_output_selection::sequence_cell, true);
|
||||
}
|
||||
|
||||
// FP16 Half precision tests
|
||||
TEST(lstm_gemm_gpu, generic_lstm_gemm_test_f16_cached) {
|
||||
generic_lstm_gemm_gpu_test<FLOAT16>(1, 1, 3, 6, 2, true, true, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gemm_gpu, generic_lstm_gemm_no_bias_f16_cached) {
|
||||
generic_lstm_gemm_gpu_test<FLOAT16>(1, 1, 3, 6, 2, false, true, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gemm_gpu, generic_lstm_gemm_no_hidden_f16_cached) {
|
||||
generic_lstm_gemm_gpu_test<FLOAT16>(1, 1, 3, 6, 2, true, false, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gemm_gpu, generic_lstm_gemm_no_hidden_bias_f16_cached) {
|
||||
generic_lstm_gemm_gpu_test<FLOAT16>(1, 1, 3, 6, 2, false, false, true);
|
||||
}
|
||||
|
||||
TEST(DISABLED_lstm_elt_gpu, generic_lstm_elt_test_clip_f16_cached) {
|
||||
generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, true, 0.3f, false, true);
|
||||
}
|
||||
|
||||
TEST(lstm_elt_gpu, generic_lstm_elt_test_input_forget_f16_cached) {
|
||||
generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, true, 0.f, true, true);
|
||||
}
|
||||
|
||||
TEST(DISABLED_lstm_elt_gpu, generic_lstm_elt_test_clip_input_forget_f16_cached) {
|
||||
generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, true, 0.5f, true, true);
|
||||
}
|
||||
|
||||
TEST(lstm_elt_gpu, generic_lstm_elt_test_f16_cached) {
|
||||
generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, true, 0.f, false, true);
|
||||
}
|
||||
|
||||
TEST(lstm_elt_gpu, generic_lstm_elt_no_cell_f16_cached) {
|
||||
generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, false, 0.f, false, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_f16_cached) {
|
||||
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 3, 3, 2, true, true, true, 0, false, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_no_bias_f16_cached) {
|
||||
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 3, 3, 2, false, true, true, 0, false, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_no_hidden_f16_cached) {
|
||||
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, true, false, true, 0, false, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_no_bias_hidden_f16_cached) {
|
||||
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, false, false, true, 0, false, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_no_cell_f16_cached) {
|
||||
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, true, true, false, 0, false, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_no_bias_cell_f16_cached) {
|
||||
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, false, true, false, 0, false, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_no_hidden_cell_f16_cached) {
|
||||
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, true, false, false, 0, false, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_no_bias_hidden_cell_f16_cached) {
|
||||
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, false, false, false, 0, false, true);
|
||||
}
|
||||
|
||||
TEST(DISABLED_lstm_gpu, generic_lstm_clip_f16_cached) {
|
||||
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 3, 3, 2, true, true, true, 0.3f, 0, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_input_forget_f16_cached) {
|
||||
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 3, 3, 2, true, true, true, 0.f, 1, true);
|
||||
}
|
||||
|
||||
TEST(DISABLED_lstm_gpu, generic_lstm_clip_input_forget_f16_cached) {
|
||||
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 3, 3, 2, true, true, true, 0.3f, 1, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_offset_order_ifoz_f16_cached) {
|
||||
default_offset_type = lstm_weights_order::ifoz;
|
||||
generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 3, 3, 2, true, true, true, 0, false, true);
|
||||
default_offset_type = lstm_weights_order::iofz;
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_canonical_f16_cached) {
|
||||
generic_lstm_gpu_test<FLOAT16>(1, 1, 1, 1, 1, 1, true, true, true, 0, false, true);
|
||||
}
|
||||
|
||||
// bidirectional support
|
||||
TEST(lstm_gpu, generic_lstm_bi_bias_f16_cached) {
|
||||
generic_lstm_gpu_test<FLOAT16>(1, 7, 2, 2, 3, 4, true, false, false, 0, false, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_bi_bias_hidden_f16_cached) {
|
||||
generic_lstm_gpu_test<FLOAT16>(1, 7, 2, 2, 3, 4, true, true, false, 0, false, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_bi_bias_hidden_cell_f16_cached) {
|
||||
generic_lstm_gpu_test<FLOAT16>(1, 7, 2, 2, 3, 4, true, true, true, 0, false, true);
|
||||
}
|
||||
|
||||
TEST(lstm_gpu, generic_lstm_stacked_seq_f16_cached) {
|
||||
generic_lstm_gpu_test<FLOAT16>(4, 7, 1, 3, 3, 2, true, true, true, 0, false, true);
|
||||
}
|
||||
#endif
|
||||
TEST(lstm_gpu, generic_lstm_stacked_bi_f16_cached) {
|
||||
generic_lstm_gpu_test<FLOAT16>(4, 7, 2, 3, 3, 2, true, true, true, 0, false, true);
|
||||
}
|
||||
|
@ -48,7 +48,7 @@ struct matrix_nms_test_inputs {
|
||||
std::string test_name;
|
||||
};
|
||||
|
||||
using matrix_nms_test_params = std::tuple<matrix_nms_test_inputs, format::type>;
|
||||
using matrix_nms_test_params = std::tuple<matrix_nms_test_inputs, format::type, bool>;
|
||||
|
||||
template <class T>
|
||||
struct matrix_nms_gpu_test : public testing::TestWithParam<matrix_nms_test_params> {
|
||||
@ -56,7 +56,8 @@ public:
|
||||
void test() {
|
||||
format::type blocked_format;
|
||||
matrix_nms_test_inputs test_inputs;
|
||||
std::tie(test_inputs, blocked_format) = testing::TestWithParam<matrix_nms_test_params>::GetParam();
|
||||
bool is_caching_test;
|
||||
std::tie(test_inputs, blocked_format, is_caching_test) = testing::TestWithParam<matrix_nms_test_params>::GetParam();
|
||||
|
||||
const auto data_type = type_to_data_type<T>::value;
|
||||
const auto plain_format = format::bfyx;
|
||||
@ -106,11 +107,12 @@ public:
|
||||
attrs));
|
||||
topology.add(reorder("matrix_nms", input_info("reordered_matrix_nms"), plain_format, data_type));
|
||||
|
||||
network network(engine, topology);
|
||||
network.set_input_data("boxes", boxes);
|
||||
network.set_input_data("scores", scores);
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
auto outputs = network.execute();
|
||||
network->set_input_data("boxes", boxes);
|
||||
network->set_input_data("scores", scores);
|
||||
|
||||
auto outputs = network->execute();
|
||||
|
||||
auto output = outputs.at("matrix_nms").get_memory();
|
||||
cldnn::mem_lock<T> output_ptr(output, get_test_stream());
|
||||
@ -124,14 +126,16 @@ public:
|
||||
ASSERT_NEAR(expected_output[i], output_ptr[i], THRESHOLD);
|
||||
}
|
||||
|
||||
ASSERT_EQ(test_inputs.expected_selected_boxes.size(), selected_boxes_ptr.size());
|
||||
for (size_t i = 0; i < test_inputs.expected_selected_boxes.size(); ++i) {
|
||||
ASSERT_EQ(test_inputs.expected_selected_boxes[i], selected_boxes_ptr[i]);
|
||||
}
|
||||
if (!is_caching_test) {
|
||||
ASSERT_EQ(test_inputs.expected_selected_boxes.size(), selected_boxes_ptr.size());
|
||||
for (size_t i = 0; i < test_inputs.expected_selected_boxes.size(); ++i) {
|
||||
ASSERT_EQ(test_inputs.expected_selected_boxes[i], selected_boxes_ptr[i]);
|
||||
}
|
||||
|
||||
ASSERT_EQ(test_inputs.expected_valid_outputs.size(), valid_outputs_ptr.size());
|
||||
for (size_t i = 0; i < test_inputs.expected_valid_outputs.size(); ++i) {
|
||||
ASSERT_EQ(test_inputs.expected_valid_outputs[i], valid_outputs_ptr[i]);
|
||||
ASSERT_EQ(test_inputs.expected_valid_outputs.size(), valid_outputs_ptr.size());
|
||||
for (size_t i = 0; i < test_inputs.expected_valid_outputs.size(); ++i) {
|
||||
ASSERT_EQ(test_inputs.expected_valid_outputs[i], valid_outputs_ptr[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -158,7 +162,8 @@ public:
|
||||
result << "Normalized=" << bool_to_str(test_inputs.normalized) << "_";
|
||||
result << "sort_result_type=" << sort_res_type_str << "_";
|
||||
result << "decay_function=" << decay_function_str << "_";
|
||||
result << "Format=" << fmt_to_str(std::get<1>(info.param));
|
||||
result << "Format=" << fmt_to_str(std::get<1>(info.param)) << "_";
|
||||
result << "Cached=" << bool_to_str(std::get<2>(info.param));
|
||||
|
||||
if (!test_inputs.test_name.empty())
|
||||
result << "_TN=" << test_inputs.test_name;
|
||||
@ -630,6 +635,12 @@ const std::vector<format::type> layout_formats = {format::bfyx,
|
||||
format::bs_fs_yx_bsv32_fsv32,
|
||||
format::bs_fs_yx_bsv32_fsv16};
|
||||
|
||||
#ifdef RUN_ALL_MODEL_CACHING_TESTS
|
||||
const std::vector<bool> run_caching_test = {false, true};
|
||||
#else
|
||||
const std::vector<bool> run_caching_test = {false};
|
||||
#endif
|
||||
|
||||
#define INSTANTIATE_MATRIX_NMS_TEST_SUITE(input_type, func) \
|
||||
using matrix_nms_gpu_test_##input_type##func = matrix_nms_gpu_test<input_type>; \
|
||||
TEST_P(matrix_nms_gpu_test_##input_type##func, test) { \
|
||||
@ -637,7 +648,8 @@ const std::vector<format::type> layout_formats = {format::bfyx,
|
||||
} \
|
||||
INSTANTIATE_TEST_SUITE_P(matrix_nms_test_##input_type##func, \
|
||||
matrix_nms_gpu_test_##input_type##func, \
|
||||
testing::Combine(testing::Values(func()), testing::ValuesIn(layout_formats)), \
|
||||
testing::Combine(testing::Values(func()), testing::ValuesIn(layout_formats), \
|
||||
testing::ValuesIn(run_caching_test)), \
|
||||
matrix_nms_gpu_test_##input_type##func::PrintToStringParamName);
|
||||
|
||||
INSTANTIATE_MATRIX_NMS_TEST_SUITE(float, get_matrix_nms_smoke_inputs)
|
||||
@ -668,6 +680,14 @@ INSTANTIATE_MATRIX_NMS_TEST_SUITE(FLOAT16, get_matrix_nms_top_k_inputs)
|
||||
INSTANTIATE_MATRIX_NMS_TEST_SUITE(FLOAT16, get_matrix_nms_single_box_inputs)
|
||||
INSTANTIATE_MATRIX_NMS_TEST_SUITE(FLOAT16, get_matrix_nms_no_output_inputs)
|
||||
|
||||
#ifndef RUN_ALL_MODEL_CACHING_TESTS
|
||||
INSTANTIATE_TEST_SUITE_P(matrix_nms_test_FLOAT16get_matrix_nms_smoke_inputs_cached,
|
||||
matrix_nms_gpu_test_FLOAT16get_matrix_nms_smoke_inputs,
|
||||
testing::Combine(testing::Values(get_matrix_nms_smoke_inputs()), testing::ValuesIn(layout_formats),
|
||||
testing::Values(true)),
|
||||
matrix_nms_gpu_test_FLOAT16get_matrix_nms_smoke_inputs::PrintToStringParamName);
|
||||
#endif
|
||||
|
||||
#undef INSTANTIATE_MATRIX_NMS_TEST_SUITE
|
||||
|
||||
} // namespace
|
||||
|
@ -57,6 +57,8 @@ struct MulticlassNmsParams {
|
||||
std::vector<T> expected_selected_outputs;
|
||||
std::vector<T_IND> expected_selected_indices;
|
||||
std::vector<T_IND> expected_selected_num;
|
||||
|
||||
bool is_caching_test;
|
||||
};
|
||||
|
||||
template<typename T, typename T_IND>
|
||||
@ -170,15 +172,16 @@ public:
|
||||
topology.add(reorder("multiclass_nms", input_info("multiclass_nms_reordered"), plain_format, data_type));
|
||||
ExecutionConfig config;
|
||||
config.set_property(ov::intel_gpu::optimize_data(false));
|
||||
network network(engine, topology, config);
|
||||
|
||||
network.set_input_data("input_boxes", input_boxes);
|
||||
network.set_input_data("input_scores", input_scores);
|
||||
cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), param.is_caching_test);
|
||||
|
||||
network->set_input_data("input_boxes", input_boxes);
|
||||
network->set_input_data("input_scores", input_scores);
|
||||
if (param.has_roisnum) {
|
||||
network.set_input_data("input_roisnum", input_roisnum);
|
||||
network->set_input_data("input_roisnum", input_roisnum);
|
||||
}
|
||||
|
||||
const auto outputs = network.execute();
|
||||
const auto outputs = network->execute();
|
||||
|
||||
const auto output_boxes = outputs.at("multiclass_nms").get_memory();
|
||||
const cldnn::mem_lock<T> output_boxes_ptr(output_boxes, get_test_stream());
|
||||
@ -209,13 +212,17 @@ public:
|
||||
get_test_stream());
|
||||
ASSERT_EQ(output_selected_num_ptr.size(), param.num_batches) << "format=" << fmt_to_str(target_format);
|
||||
|
||||
for (size_t i = 0; i < param.num_batches; ++i) {
|
||||
ASSERT_EQ(param.expected_selected_num[i], output_selected_num_ptr[i])
|
||||
<< "format=" << fmt_to_str(target_format) << " i=" << i;
|
||||
if (!param.is_caching_test) {
|
||||
for (size_t i = 0; i < param.num_batches; ++i) {
|
||||
ASSERT_EQ(param.expected_selected_num[i], output_selected_num_ptr[i])
|
||||
<< "format=" << fmt_to_str(target_format) << " i=" << i;
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t box = 0; box < dim; ++box) {
|
||||
ASSERT_EQ(param.expected_selected_indices[box], output_selected_indices_ptr[box]) << "box=" << box;
|
||||
if (!param.is_caching_test) {
|
||||
ASSERT_EQ(param.expected_selected_indices[box], output_selected_indices_ptr[box]) << "box=" << box;
|
||||
}
|
||||
|
||||
for (size_t j = 0; j < 6; ++j) {
|
||||
const auto idx = box * 6 + j;
|
||||
@ -266,7 +273,7 @@ TEST_P(multiclass_nms_test_blocked, basic) {
|
||||
}
|
||||
|
||||
template<typename T, typename T_IND>
|
||||
std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
|
||||
std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams(bool is_caching_test = false) {
|
||||
std::vector<MulticlassNmsParams<T, T_IND>> params = {
|
||||
{"by_score",
|
||||
cldnn::multiclass_nms::sort_result_type::score,
|
||||
@ -292,7 +299,8 @@ std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
|
||||
0.00, 0.90, 0.00, 0.00, 1.00, 1.00, 1.00, 0.80, 0.00, 10.00, 1.00, 11.00,
|
||||
-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0}),
|
||||
std::vector<T_IND>{3, 0, 0, 3, -1, -1},
|
||||
std::vector<T_IND>{4}},
|
||||
std::vector<T_IND>{4},
|
||||
is_caching_test},
|
||||
|
||||
{"by_class_id",
|
||||
cldnn::multiclass_nms::sort_result_type::classid,
|
||||
@ -306,7 +314,8 @@ std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
|
||||
1.00, 0.95, 0.00, 0.00, 1.00, 1.00, 1.00, 0.80, 0.00, 10.00, 1.00, 11.00,
|
||||
-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0}),
|
||||
std::vector<T_IND>{3, 0, 0, 3, -1, -1},
|
||||
std::vector<T_IND>{4}},
|
||||
std::vector<T_IND>{4},
|
||||
is_caching_test},
|
||||
|
||||
{"three_inputs",
|
||||
cldnn::multiclass_nms::sort_result_type::score,
|
||||
@ -346,7 +355,8 @@ std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
|
||||
-1.0, -1.0, -1.0, -1.0, -1.0, -1.0}),
|
||||
std::vector<T_IND>{1, 0, -1, -1, -1, -1,
|
||||
2, 3, -1, -1, -1, -1},
|
||||
std::vector<T_IND>{2, 2}},
|
||||
std::vector<T_IND>{2, 2},
|
||||
is_caching_test},
|
||||
|
||||
{"across_batches_by_score",
|
||||
cldnn::multiclass_nms::sort_result_type::score,
|
||||
@ -384,7 +394,8 @@ std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
|
||||
-1.0, -1.0, -1.0, -1.0, -1.0, -1.0,
|
||||
-1.0, -1.0, -1.0, -1.0, -1.0, -1.0}),
|
||||
std::vector<T_IND>{3, 0, 6, 0, -1, -1, 3, 9, 4, 5, -1, -1},
|
||||
std::vector<T_IND>{4, 4}},
|
||||
std::vector<T_IND>{4, 4},
|
||||
is_caching_test},
|
||||
|
||||
{"across_batches_by_class_id",
|
||||
cldnn::multiclass_nms::sort_result_type::classid,
|
||||
@ -423,7 +434,8 @@ std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
|
||||
-1.0, -1.0, -1.0, -1.0, -1.0, -1.0,
|
||||
-1.0, -1.0, -1.0, -1.0, -1.0, -1.0}),
|
||||
std::vector<T_IND>{3, 0, 0, 3, -1, -1, 4, 5, 6, 9, -1, -1},
|
||||
std::vector<T_IND>{4, 4}},
|
||||
std::vector<T_IND>{4, 4},
|
||||
is_caching_test},
|
||||
|
||||
{"normalized",
|
||||
cldnn::multiclass_nms::sort_result_type::score,
|
||||
@ -449,7 +461,8 @@ std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
|
||||
getValues<T>({0.00, 0.95, 0.00, 10.00, 1.00, 11.00, 0.00, 0.90, 1.00,
|
||||
1.00, 0.00, 0.00, 0.00, 0.75, 0.00, 0.10, 1.00, 1.10}),
|
||||
std::vector<T_IND>{3, 0, 1},
|
||||
std::vector<T_IND>{3}},
|
||||
std::vector<T_IND>{3},
|
||||
is_caching_test},
|
||||
|
||||
{"identical_boxes",
|
||||
cldnn::multiclass_nms::sort_result_type::score,
|
||||
@ -477,7 +490,8 @@ std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
|
||||
-1.0, -1.0, -1.0, -1.0, -1.0, -1.0,
|
||||
-1.0, -1.0, -1.0, -1.0, -1.0, -1.0}),
|
||||
std::vector<T_IND>{0, -1, -1},
|
||||
std::vector<T_IND>{1}},
|
||||
std::vector<T_IND>{1},
|
||||
is_caching_test},
|
||||
|
||||
{"limit_output_size",
|
||||
cldnn::multiclass_nms::sort_result_type::score,
|
||||
@ -501,7 +515,8 @@ std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
|
||||
std::vector<T_IND>{},
|
||||
getValues<T>({0.00, 0.95, 0.00, 10.00, 1.00, 11.00, 0.00, 0.90, 0.00, 0.00, 1.00, 1.00}),
|
||||
std::vector<T_IND>{3, 0},
|
||||
std::vector<T_IND>{2}},
|
||||
std::vector<T_IND>{2},
|
||||
is_caching_test},
|
||||
|
||||
{"single_box",
|
||||
cldnn::multiclass_nms::sort_result_type::score,
|
||||
@ -525,7 +540,8 @@ std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
|
||||
|
||||
getValues<T>({0.00, 0.90, 0.00, 0.00, 1.00, 1.00}),
|
||||
std::vector<T_IND>{0},
|
||||
std::vector<T_IND>{1}},
|
||||
std::vector<T_IND>{1},
|
||||
is_caching_test},
|
||||
|
||||
{"iou_threshold",
|
||||
cldnn::multiclass_nms::sort_result_type::score,
|
||||
@ -551,7 +567,8 @@ std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
|
||||
getValues<T>({0.00, 0.95, 0.00, 10.00, 1.00, 11.00, 0.00, 0.90, 0.00,
|
||||
0.00, 1.00, 1.00, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0}),
|
||||
std::vector<T_IND>{3, 0, -1},
|
||||
std::vector<T_IND>{2}},
|
||||
std::vector<T_IND>{2},
|
||||
is_caching_test},
|
||||
|
||||
{"iou_and_score_thresholds",
|
||||
cldnn::multiclass_nms::sort_result_type::score,
|
||||
@ -577,7 +594,8 @@ std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
|
||||
getValues<T>({0.00, 0.96, 0.00, 10.00, 1.00, 11.00, -1.0, -1.0, -1.0,
|
||||
-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0}),
|
||||
std::vector<T_IND>{3, -1, -1},
|
||||
std::vector<T_IND>{1}},
|
||||
std::vector<T_IND>{1},
|
||||
is_caching_test},
|
||||
|
||||
{"no_output",
|
||||
cldnn::multiclass_nms::sort_result_type::score,
|
||||
@ -607,7 +625,8 @@ std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
|
||||
-1.0, -1.0, -1.0, -1.0, -1.0, -1.0,
|
||||
-1.0, -1.0, -1.0, -1.0, -1.0, -1.0}),
|
||||
std::vector<T_IND>{-1, -1, -1, -1, -1, -1},
|
||||
std::vector<T_IND>{0}},
|
||||
std::vector<T_IND>{0},
|
||||
is_caching_test},
|
||||
|
||||
{"background_class",
|
||||
cldnn::multiclass_nms::sort_result_type::classid,
|
||||
@ -648,7 +667,8 @@ std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
|
||||
1.00, 0.80, 0.00, 10.00, 1.00, 11.00,
|
||||
-1.0, -1.0, -1.0, -1.0, -1.0, -1.0}),
|
||||
std::vector<T_IND>{0, 3, -1, 6, 9, -1},
|
||||
std::vector<T_IND>{2, 2}},
|
||||
std::vector<T_IND>{2, 2},
|
||||
is_caching_test},
|
||||
|
||||
{"keep_top_k",
|
||||
cldnn::multiclass_nms::sort_result_type::classid,
|
||||
@ -681,7 +701,8 @@ std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
|
||||
1.00, 0.95, 0.00, 0.00, 1.00, 1.00,
|
||||
1.00, 0.80, 0.00, 10.00, 1.00, 11.00}),
|
||||
std::vector<T_IND>{3, 0, 0, 4, 6, 9},
|
||||
std::vector<T_IND>{3, 3}},
|
||||
std::vector<T_IND>{3, 3},
|
||||
is_caching_test},
|
||||
|
||||
{"normalized_by_classid",
|
||||
cldnn::multiclass_nms::sort_result_type::classid,
|
||||
@ -735,14 +756,15 @@ std::vector<MulticlassNmsParams<T, T_IND>> getMulticlassNmsParams() {
|
||||
-1, -1, -1, -1, -1, -1,
|
||||
2, 4, 5, 6, 9, 11,
|
||||
-1, -1, -1, -1, -1, -1},
|
||||
std::vector<T_IND>{6, 6}},
|
||||
std::vector<T_IND>{6, 6},
|
||||
is_caching_test},
|
||||
};
|
||||
|
||||
return params;
|
||||
}
|
||||
|
||||
template<typename T, typename T_IND>
|
||||
std::vector<MulticlassNmsParams<T, T_IND>> getParamsForBlockedLayout() {
|
||||
std::vector<MulticlassNmsParams<T, T_IND>> getParamsForBlockedLayout(bool is_caching_test = false) {
|
||||
MulticlassNmsParams<T, T_IND> param = {
|
||||
"blocked_format_three_inputs",
|
||||
cldnn::multiclass_nms::sort_result_type::score,
|
||||
@ -798,7 +820,8 @@ std::vector<MulticlassNmsParams<T, T_IND>> getParamsForBlockedLayout() {
|
||||
std::vector<T_IND>{1, 0, -1, -1, -1, -1,
|
||||
2, 3, -1, -1, -1, -1},
|
||||
std::vector<T_IND>{2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
is_caching_test
|
||||
};
|
||||
|
||||
const auto indices_size = param.num_batches * param.num_boxes;
|
||||
@ -829,8 +852,24 @@ INSTANTIATE_TEST_SUITE_P(multiclass_nms_gpu_test,
|
||||
PrintToStringParamName());
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(multiclass_nms_gpu_test_blocked,
|
||||
multiclass_nms_test_f32_i32,
|
||||
multiclass_nms_test_blocked,
|
||||
::testing::ValuesIn(getParamsForBlockedLayout<float, int32_t>()),
|
||||
PrintToStringParamName());
|
||||
|
||||
#ifdef RUN_ALL_MODEL_CACHING_TESTS
|
||||
INSTANTIATE_TEST_SUITE_P(multiclass_nms_gpu_test_cached,
|
||||
multiclass_nms_test_f32_i32,
|
||||
::testing::ValuesIn(getMulticlassNmsParams<float, int32_t>(true)),
|
||||
PrintToStringParamName());
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(multiclass_nms_gpu_test_cached,
|
||||
multiclass_nms_test_f16_i64,
|
||||
::testing::ValuesIn(getMulticlassNmsParams<half_t, int64_t>(true)),
|
||||
PrintToStringParamName());
|
||||
#endif
|
||||
INSTANTIATE_TEST_SUITE_P(multiclass_nms_gpu_test_blocked_cached,
|
||||
multiclass_nms_test_blocked,
|
||||
::testing::ValuesIn(getParamsForBlockedLayout<float, int32_t>(true)),
|
||||
PrintToStringParamName());
|
||||
|
||||
}; // namespace
|
||||
|
@ -104,112 +104,80 @@ void mvn_compute_mean_within_channels(cldnn::memory::ptr output, bool normalize_
|
||||
}
|
||||
}
|
||||
|
||||
TEST(mvn_gpu_test, mvn_test_across_channels_outside_sqrt_bfyx) {
|
||||
template <typename T>
|
||||
void test_mvn_test_across_channels_outside_sqrt_bfyx(bool is_caching_test) {
|
||||
// mvn across channels fp32 test with normalize_variance set to false
|
||||
using namespace cldnn;
|
||||
using namespace ::tests;
|
||||
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
auto input = engine.allocate_memory({data_types::f32, format::bfyx, {7, 10, 17, 13}});
|
||||
cldnn::data_types input_data_type = std::is_same<T, FLOAT16>::value ? data_types::f16 : data_types::f32;
|
||||
|
||||
tests::set_random_values<float>(input, true, 8, 100);
|
||||
auto input = engine.allocate_memory({input_data_type, format::bfyx, {7, 10, 17, 13}});
|
||||
|
||||
tests::set_random_values<T>(input, true, 8, 100);
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input", input->get_layout()));
|
||||
topology.add(mvn("mvn", input_info("input"), false, 1e-10f, false, true));
|
||||
|
||||
network network(engine, topology);
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network.set_input_data("input", input);
|
||||
network->set_input_data("input", input);
|
||||
|
||||
auto outputs = network.execute();
|
||||
auto outputs = network->execute();
|
||||
ASSERT_EQ(outputs.size(), size_t(1));
|
||||
ASSERT_EQ(outputs.begin()->first, "mvn");
|
||||
|
||||
auto output = outputs.begin()->second.get_memory();
|
||||
mvn_compute_mean_across_channels<float>(output, false);
|
||||
mvn_compute_mean_across_channels<T>(output, false);
|
||||
}
|
||||
|
||||
TEST(mvn_gpu_test, mvn_test_across_channels_outside_sqrt_bfyx) {
|
||||
test_mvn_test_across_channels_outside_sqrt_bfyx<float>(false);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void test_mvn_test_across_channels_inside_sqrt_bfyx(bool is_caching_test) {
|
||||
// mvn across channels fp32 test with normalize_variance set to false
|
||||
using namespace cldnn;
|
||||
using namespace tests;
|
||||
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
cldnn::data_types input_data_type = std::is_same<T, FLOAT16>::value ? data_types::f16 : data_types::f32;
|
||||
|
||||
auto input = engine.allocate_memory({input_data_type, format::bfyx, {7, 10, 17, 13}});
|
||||
|
||||
tests::set_random_values<T>(input, true, 8, 100);
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input", input->get_layout()));
|
||||
topology.add(mvn("mvn", input_info("input"), false, 1e-10f, true, true));
|
||||
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data("input", input);
|
||||
|
||||
auto outputs = network->execute();
|
||||
ASSERT_EQ(outputs.size(), size_t(1));
|
||||
ASSERT_EQ(outputs.begin()->first, "mvn");
|
||||
|
||||
auto output = outputs.begin()->second.get_memory();
|
||||
mvn_compute_mean_across_channels<T>(output, false);
|
||||
}
|
||||
|
||||
TEST(mvn_gpu_test, mvn_test_across_channels_inside_sqrt_bfyx) {
|
||||
// mvn across channels fp32 test with normalize_variance set to false
|
||||
using namespace cldnn;
|
||||
using namespace tests;
|
||||
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
auto input = engine.allocate_memory({data_types::f32, format::bfyx, {7, 10, 17, 13}});
|
||||
|
||||
tests::set_random_values<float>(input, true, 8, 100);
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input", input->get_layout()));
|
||||
topology.add(mvn("mvn", input_info("input"), false, 1e-10f, true, true));
|
||||
|
||||
network network(engine, topology);
|
||||
|
||||
network.set_input_data("input", input);
|
||||
|
||||
auto outputs = network.execute();
|
||||
ASSERT_EQ(outputs.size(), size_t(1));
|
||||
ASSERT_EQ(outputs.begin()->first, "mvn");
|
||||
|
||||
auto output = outputs.begin()->second.get_memory();
|
||||
mvn_compute_mean_across_channels<float>(output, false);
|
||||
test_mvn_test_across_channels_inside_sqrt_bfyx<float>(false);
|
||||
}
|
||||
|
||||
TEST(mvn_gpu_test, mvn_test_across_channels_bfyx_outside_sqrt_fp16) {
|
||||
// mvn across channels fp16 test with normalize_variance set to false
|
||||
using namespace cldnn;
|
||||
using namespace ::tests;
|
||||
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
auto input = engine.allocate_memory({data_types::f16, format::bfyx, {7, 10, 17, 13}});
|
||||
|
||||
tests::set_random_values<FLOAT16>(input, true, 8, 100);
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input", input->get_layout()));
|
||||
topology.add(mvn("mvn", input_info("input"), false, 1e-10f, false, true));
|
||||
|
||||
network network(engine, topology);
|
||||
|
||||
network.set_input_data("input", input);
|
||||
|
||||
auto outputs = network.execute();
|
||||
ASSERT_EQ(outputs.size(), size_t(1));
|
||||
ASSERT_EQ(outputs.begin()->first, "mvn");
|
||||
|
||||
auto output = outputs.begin()->second.get_memory();
|
||||
mvn_compute_mean_across_channels<FLOAT16>(output, false);
|
||||
TEST(mvn_gpu_test, mvn_test_across_channels_outside_sqrt_bfyx_fp16) {
|
||||
test_mvn_test_across_channels_outside_sqrt_bfyx<FLOAT16>(false);
|
||||
}
|
||||
|
||||
TEST(mvn_gpu_test, mvn_test_across_channels_inside_sqrt_bfyx_fp16) {
|
||||
// mvn across channels fp16 test with normalize_variance set to false
|
||||
using namespace cldnn;
|
||||
using namespace tests;
|
||||
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
auto input = engine.allocate_memory({data_types::f16, format::bfyx, {7, 10, 17, 13}});
|
||||
|
||||
tests::set_random_values<FLOAT16>(input, true, 8, 100);
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input", input->get_layout()));
|
||||
topology.add(mvn("mvn", input_info("input"), false, 1e-10f, true, true));
|
||||
|
||||
network network(engine, topology);
|
||||
|
||||
network.set_input_data("input", input);
|
||||
|
||||
auto outputs = network.execute();
|
||||
ASSERT_EQ(outputs.size(), size_t(1));
|
||||
ASSERT_EQ(outputs.begin()->first, "mvn");
|
||||
|
||||
auto output = outputs.begin()->second.get_memory();
|
||||
mvn_compute_mean_across_channels<FLOAT16>(output, false);
|
||||
test_mvn_test_across_channels_inside_sqrt_bfyx<FLOAT16>(false);
|
||||
}
|
||||
|
||||
TEST(mvn_gpu_test, mvn_test_across_channels_outside_sqrt_bfyx_normalize_variance) {
|
||||
@ -666,7 +634,7 @@ struct mvn_random_test : ::testing::TestWithParam<mvn_basic_test_params> {
|
||||
}
|
||||
}
|
||||
|
||||
void execute(const mvn_basic_test_params& params, engine& eng) {
|
||||
void execute(const mvn_basic_test_params& params, engine& eng, bool is_caching_test) {
|
||||
auto& size = params.input_size;
|
||||
auto& output_pad = params.output_pad;
|
||||
|
||||
@ -695,11 +663,11 @@ struct mvn_random_test : ::testing::TestWithParam<mvn_basic_test_params> {
|
||||
prim.output_paddings = {output_pad};
|
||||
topo.add(prim);
|
||||
|
||||
network net(eng, topo);
|
||||
cldnn::network::ptr net = get_network(eng, topo, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
net.set_input_data("input", input);
|
||||
net->set_input_data("input", input);
|
||||
|
||||
auto outputs = net.execute();
|
||||
auto outputs = net->execute();
|
||||
ASSERT_EQ(outputs.size(), size_t(1));
|
||||
ASSERT_EQ(outputs.begin()->first, "mvn");
|
||||
|
||||
@ -710,7 +678,7 @@ struct mvn_random_test : ::testing::TestWithParam<mvn_basic_test_params> {
|
||||
|
||||
TEST_P(mvn_random_test, random) {
|
||||
auto& engine = tests::get_test_engine();
|
||||
this->execute(GetParam(), engine);
|
||||
this->execute(GetParam(), engine, false);
|
||||
}
|
||||
|
||||
struct mvn_test_case_generator : std::vector<mvn_basic_test_params> {
|
||||
@ -857,7 +825,7 @@ struct mvn_random_test_bsv32 : ::testing::TestWithParam<mvn_basic_test_params> {
|
||||
}
|
||||
}
|
||||
|
||||
void execute(const mvn_basic_test_params& params) {
|
||||
void execute(const mvn_basic_test_params& params, bool is_caching_test) {
|
||||
auto& size = params.input_size;
|
||||
auto& output_pad = params.output_pad;
|
||||
auto& engine = get_test_engine();
|
||||
@ -888,10 +856,11 @@ struct mvn_random_test_bsv32 : ::testing::TestWithParam<mvn_basic_test_params> {
|
||||
config.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{"mvn"}));
|
||||
config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"mvn", {format::type::bfyx, "mvn_gpu_bfyx_opt"}} }));
|
||||
|
||||
network net(engine, topo, config);
|
||||
net.set_input_data("input", input);
|
||||
cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
auto outputs = net.execute();
|
||||
net->set_input_data("input", input);
|
||||
|
||||
auto outputs = net->execute();
|
||||
auto output = outputs.at("mvn").get_memory();
|
||||
|
||||
topology topo_opt;
|
||||
@ -904,10 +873,11 @@ struct mvn_random_test_bsv32 : ::testing::TestWithParam<mvn_basic_test_params> {
|
||||
config_opt.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{"mvn_opt", "input_to_target_layout"}));
|
||||
config_opt.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"mvn_opt", {params.input_format, "mvn_gpu_b_fs_yx_fsv16_imad"}} }));
|
||||
|
||||
network net_opt(engine, topo_opt, config_opt);
|
||||
net_opt.set_input_data("input", input);
|
||||
cldnn::network::ptr net_opt = get_network(engine, topo_opt, config_opt, get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
auto outputs_opt = net_opt.execute();
|
||||
net_opt->set_input_data("input", input);
|
||||
|
||||
auto outputs_opt = net_opt->execute();
|
||||
auto output_opt = outputs_opt.at("mvn_opt").get_memory();
|
||||
|
||||
auto output_dtype = output->get_layout().data_type;
|
||||
@ -933,7 +903,7 @@ struct mvn_random_test_bsv32 : ::testing::TestWithParam<mvn_basic_test_params> {
|
||||
};
|
||||
|
||||
TEST_P(mvn_random_test_bsv32, random) {
|
||||
this->execute(GetParam());
|
||||
this->execute(GetParam(), false);
|
||||
}
|
||||
|
||||
struct mvn_test_case_generator_bsv32 : std::vector<mvn_basic_test_params> {
|
||||
@ -964,3 +934,29 @@ INSTANTIATE_TEST_SUITE_P(mvn_fsv16,
|
||||
mvn_random_test_bsv32,
|
||||
testing::ValuesIn(mvn_test_case_generator_bsv32()
|
||||
.bsv32_tests(format::b_fs_yx_fsv16, data_types::i8)));
|
||||
|
||||
TEST(mvn_gpu_test, mvn_test_across_channels_outside_sqrt_bfyx_cached) {
|
||||
test_mvn_test_across_channels_outside_sqrt_bfyx<float>(true);
|
||||
}
|
||||
#ifdef RUN_ALL_MODEL_CACHING_TESTS
|
||||
TEST(mvn_gpu_test, mvn_test_across_channels_inside_sqrt_bfyx_cached) {
|
||||
test_mvn_test_across_channels_inside_sqrt_bfyx<float>(true);
|
||||
}
|
||||
|
||||
TEST(mvn_gpu_test, mvn_test_across_channels_outside_sqrt_bfyx_fp16_cached) {
|
||||
test_mvn_test_across_channels_outside_sqrt_bfyx<FLOAT16>(true);
|
||||
}
|
||||
|
||||
TEST(mvn_gpu_test, mvn_test_across_channels_inside_sqrt_bfyx_fp16_cached) {
|
||||
test_mvn_test_across_channels_inside_sqrt_bfyx<FLOAT16>(true);
|
||||
}
|
||||
|
||||
TEST_P(mvn_random_test, random_cached) {
|
||||
auto& engine = tests::get_test_engine();
|
||||
this->execute(GetParam(), engine, true);
|
||||
}
|
||||
|
||||
TEST_P(mvn_random_test_bsv32, random_cached) {
|
||||
this->execute(GetParam(), true);
|
||||
}
|
||||
#endif
|
||||
|
@ -128,24 +128,7 @@ struct non_max_suppression_basic : public testing::Test {
|
||||
ExecutionConfig config;
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
|
||||
cldnn::network::ptr net;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topo, config);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, get_test_engine());
|
||||
net = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
net = std::make_shared<cldnn::network>(engine, topo, config);
|
||||
}
|
||||
cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
auto boxes_mem = this->get_boxes_memory(engine);
|
||||
auto scores_mem = this->get_scores_memory(engine);
|
||||
@ -206,24 +189,7 @@ struct non_max_suppression_basic : public testing::Test {
|
||||
ExecutionConfig config;
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
|
||||
cldnn::network::ptr net;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topo, config);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, get_test_engine());
|
||||
net = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
net = std::make_shared<cldnn::network>(engine, topo, config);
|
||||
}
|
||||
cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
auto boxes_mem = this->get_boxes_memory(engine);
|
||||
auto scores_mem = this->get_scores_memory(engine);
|
||||
@ -294,24 +260,7 @@ struct non_max_suppression_basic : public testing::Test {
|
||||
ExecutionConfig config;
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
|
||||
cldnn::network::ptr net;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topo, config);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, get_test_engine());
|
||||
net = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
net = std::make_shared<cldnn::network>(engine, topo, config);
|
||||
}
|
||||
cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
auto boxes_mem = this->get_boxes_memory(engine);
|
||||
auto scores_mem = this->get_scores_memory(engine);
|
||||
@ -430,24 +379,7 @@ struct non_max_suppression_basic : public testing::Test {
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
|
||||
|
||||
cldnn::network::ptr net;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topo, config);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, get_test_engine());
|
||||
net = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
net = std::make_shared<cldnn::network>(engine, topo, config);
|
||||
}
|
||||
cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
auto boxes_mem = this->get_boxes_memory(engine);
|
||||
auto scores_mem = this->get_scores_memory(engine);
|
||||
@ -556,24 +488,7 @@ struct non_max_suppression_basic : public testing::Test {
|
||||
ExecutionConfig config;
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
|
||||
cldnn::network::ptr net;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topo, config);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, get_test_engine());
|
||||
net = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
net = std::make_shared<cldnn::network>(engine, topo, config);
|
||||
}
|
||||
cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
auto boxes_mem = this->get_boxes_memory(engine);
|
||||
auto scores_mem = this->get_scores_memory(engine);
|
||||
@ -630,24 +545,7 @@ struct non_max_suppression_basic : public testing::Test {
|
||||
ExecutionConfig config;
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
|
||||
cldnn::network::ptr net;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topo, config);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, get_test_engine());
|
||||
net = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
net = std::make_shared<cldnn::network>(engine, topo, config);
|
||||
}
|
||||
cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
auto boxes_mem = this->get_boxes_memory(engine);
|
||||
auto scores_mem = this->get_scores_memory(engine);
|
||||
@ -708,24 +606,7 @@ struct non_max_suppression_basic : public testing::Test {
|
||||
ExecutionConfig config;
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
|
||||
cldnn::network::ptr net;
|
||||
|
||||
if (is_caching_test) {
|
||||
membuf mem_buf;
|
||||
{
|
||||
cldnn::network _network(engine, topo, config);
|
||||
std::ostream out_mem(&mem_buf);
|
||||
BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem);
|
||||
_network.save(ob);
|
||||
}
|
||||
{
|
||||
std::istream in_mem(&mem_buf);
|
||||
BinaryInputBuffer ib = BinaryInputBuffer(in_mem, get_test_engine());
|
||||
net = std::make_shared<cldnn::network>(ib, config, get_test_stream_ptr(), engine);
|
||||
}
|
||||
} else {
|
||||
net = std::make_shared<cldnn::network>(engine, topo, config);
|
||||
}
|
||||
cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
auto boxes_mem = this->get_boxes_memory(engine);
|
||||
auto scores_mem = this->get_scores_memory(engine);
|
||||
|
@ -54,6 +54,57 @@ struct normalize_basic : public testing::Test {
|
||||
return inputVals;
|
||||
}
|
||||
|
||||
void execute(bool is_caching_test) {
|
||||
// Input : 1x2x3x3
|
||||
// Output : 1x2x3x3
|
||||
auto& engine = get_test_engine();
|
||||
const unsigned b = 1;
|
||||
const unsigned f = 2;
|
||||
const unsigned y = 3;
|
||||
const unsigned x = 3;
|
||||
|
||||
auto input = engine.allocate_memory({this->data_type, format::bfyx, {b, f, y, x}});
|
||||
auto weights = engine.allocate_memory({data_types::f32, format::bfyx, {1, f, 1, 1}});
|
||||
|
||||
auto inputVals = this->get_input_values(b, f, y, x);
|
||||
std::vector<float> weightVals(f);
|
||||
for (auto& it : weightVals) {
|
||||
it = 1.f;
|
||||
}
|
||||
|
||||
set_values(input, inputVals);
|
||||
set_values(weights, weightVals);
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("Input0", input->get_layout()));
|
||||
topology.add(data("Input1", weights));
|
||||
topology.add(reorder("reordered_Input0", input_info("Input0"), this->format, this->data_type));
|
||||
topology.add(reorder("reordered_Input1", input_info("Input1"), this->format, data_types::f32));
|
||||
topology.add(normalize("normalize2", input_info("reordered_Input0"), "reordered_Input1", this->across_spatial));
|
||||
topology.add(reorder("plane_normalize2", input_info("normalize2"), format::bfyx, this->output_data_type));
|
||||
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data("Input0", input);
|
||||
|
||||
auto outputs = network->execute();
|
||||
|
||||
auto output = outputs.at("plane_normalize2").get_memory();
|
||||
if (this->data_type == data_types::f16) {
|
||||
cldnn::mem_lock<half_t> output_ptr(output, get_test_stream());
|
||||
auto expected_results = this->get_expected_result();
|
||||
for (size_t i = 0; i < expected_results.size(); ++i) {
|
||||
ASSERT_NEAR(expected_results[i], output_ptr[i], 0.001);
|
||||
}
|
||||
} else {
|
||||
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
|
||||
auto expected_results = this->get_expected_result();
|
||||
for (size_t i = 0; i < expected_results.size(); ++i) {
|
||||
ASSERT_TRUE(are_equal(expected_results[i], output_ptr[i]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
static const std::vector<output_type> get_expected_result(std::true_type) {
|
||||
static const std::vector<float> result = {0.f,
|
||||
@ -144,52 +195,23 @@ using format_types = testing::Types<normalize_input_types<format::bfyx, float, f
|
||||
TYPED_TEST_SUITE(normalize_basic, format_types);
|
||||
|
||||
TYPED_TEST(normalize_basic, basic) {
|
||||
// Input : 1x2x3x3
|
||||
// Output : 1x2x3x3
|
||||
auto& engine = get_test_engine();
|
||||
const unsigned b = 1;
|
||||
const unsigned f = 2;
|
||||
const unsigned y = 3;
|
||||
const unsigned x = 3;
|
||||
|
||||
auto input = engine.allocate_memory({this->data_type, format::bfyx, {b, f, y, x}});
|
||||
auto weights = engine.allocate_memory({data_types::f32, format::bfyx, {1, f, 1, 1}});
|
||||
|
||||
auto inputVals = this->get_input_values(b, f, y, x);
|
||||
std::vector<float> weightVals(f);
|
||||
for (auto& it : weightVals) {
|
||||
it = 1.f;
|
||||
}
|
||||
|
||||
set_values(input, inputVals);
|
||||
set_values(weights, weightVals);
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("Input0", input->get_layout()));
|
||||
topology.add(data("Input1", weights));
|
||||
topology.add(reorder("reordered_Input0", input_info("Input0"), this->format, this->data_type));
|
||||
topology.add(reorder("reordered_Input1", input_info("Input1"), this->format, data_types::f32));
|
||||
topology.add(normalize("normalize2", input_info("reordered_Input0"), "reordered_Input1", this->across_spatial));
|
||||
topology.add(reorder("plane_normalize2", input_info("normalize2"), format::bfyx, this->output_data_type));
|
||||
|
||||
network network(engine, topology);
|
||||
|
||||
network.set_input_data("Input0", input);
|
||||
|
||||
auto outputs = network.execute();
|
||||
|
||||
auto output = outputs.at("plane_normalize2").get_memory();
|
||||
if (this->data_type == data_types::f16) {
|
||||
cldnn::mem_lock<half_t> output_ptr(output, get_test_stream());
|
||||
auto expected_results = this->get_expected_result();
|
||||
for (size_t i = 0; i < expected_results.size(); ++i) {
|
||||
ASSERT_NEAR(expected_results[i], output_ptr[i], 0.001);
|
||||
}
|
||||
} else {
|
||||
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
|
||||
auto expected_results = this->get_expected_result();
|
||||
for (size_t i = 0; i < expected_results.size(); ++i) {
|
||||
ASSERT_TRUE(are_equal(expected_results[i], output_ptr[i]));
|
||||
}
|
||||
}
|
||||
this->execute(false);
|
||||
}
|
||||
|
||||
#ifdef RUN_ALL_MODEL_CACHING_TESTS
|
||||
TYPED_TEST(normalize_basic, basic_cached) {
|
||||
this->execute(true);
|
||||
}
|
||||
#else
|
||||
template <typename NormalizeInput>
|
||||
struct normalize_basic_cached : public normalize_basic<NormalizeInput> {
|
||||
};
|
||||
|
||||
using format_types_cached = testing::Types<normalize_input_types<format::bfyx, float, false>>;
|
||||
|
||||
TYPED_TEST_SUITE(normalize_basic_cached, format_types_cached);
|
||||
|
||||
TYPED_TEST(normalize_basic_cached, basic) {
|
||||
this->execute(true);
|
||||
}
|
||||
#endif
|
||||
|
@ -66,7 +66,7 @@ VVVVF<T> one_hot_cpu(VVVVF<T> &input, uint16_t axis,
|
||||
|
||||
template <typename T>
|
||||
void generic_one_hot_test_int(cldnn::format test_input_fmt, int input_b, int input_f, int input_y, int input_x, tensor shape,
|
||||
uint16_t one_hot_axis, int input_padding_y = 0, int input_padding_x = 0, int output_padding_y = 0, int output_padding_x = 0) {
|
||||
uint16_t one_hot_axis, int input_padding_y, int input_padding_x, int output_padding_y, int output_padding_x, bool is_caching_test) {
|
||||
std::vector<tensor::value_type> output_dims = { shape.batch[0], shape.feature[0],
|
||||
shape.spatial[1], shape.spatial[0] };
|
||||
int32_t one_hot_limit = output_dims[one_hot_axis];
|
||||
@ -84,9 +84,9 @@ void generic_one_hot_test_int(cldnn::format test_input_fmt, int input_b, int inp
|
||||
topology.add(input_layout("input", input->get_layout()));
|
||||
topology.add(one_hot("output", input_info("input"), shape, one_hot_axis, one_hot_limit));
|
||||
|
||||
network network(engine, topology);
|
||||
network.set_input_data("input", input);
|
||||
auto outputs = network.execute();
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
network->set_input_data("input", input);
|
||||
auto outputs = network->execute();
|
||||
ASSERT_EQ(outputs.size(), size_t(1));
|
||||
ASSERT_EQ(outputs.begin()->first, "output");
|
||||
|
||||
@ -130,17 +130,33 @@ void generic_one_hot_test_int(cldnn::format test_input_fmt, int input_b, int inp
|
||||
}
|
||||
|
||||
TEST(one_hot_gpu_i32, generic) {
|
||||
generic_one_hot_test_int<int32_t>(format::bfyx, 2, 2, 1, 1, tensor(5, 2, 1, 2), 0);
|
||||
generic_one_hot_test_int<int32_t>(format::bfyx, 1, 2, 3, 1, tensor(1, 5, 3, 2), 1);
|
||||
generic_one_hot_test_int<int32_t>(format::bfyx, 2, 2, 1, 1, tensor(2, 2, 1, 4), 2);
|
||||
generic_one_hot_test_int<int32_t>(format::bfyx, 2, 2, 1, 1, tensor(2, 2, 4, 1), 3);
|
||||
generic_one_hot_test_int<int32_t>(format::bfyx, 2, 2, 1, 1, tensor(5, 2, 1, 2), 0, 0, 0, 0, 0, false);
|
||||
generic_one_hot_test_int<int32_t>(format::bfyx, 1, 2, 3, 1, tensor(1, 5, 3, 2), 1, 0, 0, 0, 0, false);
|
||||
generic_one_hot_test_int<int32_t>(format::bfyx, 2, 2, 1, 1, tensor(2, 2, 1, 4), 2, 0, 0, 0, 0, false);
|
||||
generic_one_hot_test_int<int32_t>(format::bfyx, 2, 2, 1, 1, tensor(2, 2, 4, 1), 3, 0, 0, 0, 0, false);
|
||||
}
|
||||
|
||||
TEST(one_hot_gpu_i64, generic) {
|
||||
generic_one_hot_test_int<int64_t>(format::bfyx, 2, 2, 1, 1, tensor(5, 2, 1, 2), 0);
|
||||
generic_one_hot_test_int<int64_t>(format::bfyx, 1, 2, 3, 1, tensor(1, 5, 3, 2), 1);
|
||||
generic_one_hot_test_int<int64_t>(format::bfyx, 2, 2, 1, 1, tensor(2, 2, 1, 4), 2);
|
||||
generic_one_hot_test_int<int64_t>(format::bfyx, 2, 2, 1, 1, tensor(2, 2, 4, 1), 3);
|
||||
generic_one_hot_test_int<int64_t>(format::bfyx, 2, 2, 1, 1, tensor(5, 2, 1, 2), 0, 0, 0, 0, 0, false);
|
||||
generic_one_hot_test_int<int64_t>(format::bfyx, 1, 2, 3, 1, tensor(1, 5, 3, 2), 1, 0, 0, 0, 0, false);
|
||||
generic_one_hot_test_int<int64_t>(format::bfyx, 2, 2, 1, 1, tensor(2, 2, 1, 4), 2, 0, 0, 0, 0, false);
|
||||
generic_one_hot_test_int<int64_t>(format::bfyx, 2, 2, 1, 1, tensor(2, 2, 4, 1), 3, 0, 0, 0, 0, false);
|
||||
}
|
||||
|
||||
TEST(one_hot_gpu_i32, generic_cached) {
|
||||
generic_one_hot_test_int<int32_t>(format::bfyx, 2, 2, 1, 1, tensor(5, 2, 1, 2), 0, 0, 0, 0, 0, true);
|
||||
#ifdef RUN_ALL_MODEL_CACHING_TESTS
|
||||
generic_one_hot_test_int<int32_t>(format::bfyx, 1, 2, 3, 1, tensor(1, 5, 3, 2), 1, 0, 0, 0, 0, true);
|
||||
generic_one_hot_test_int<int32_t>(format::bfyx, 2, 2, 1, 1, tensor(2, 2, 1, 4), 2, 0, 0, 0, 0, true);
|
||||
generic_one_hot_test_int<int32_t>(format::bfyx, 2, 2, 1, 1, tensor(2, 2, 4, 1), 3, 0, 0, 0, 0, true);
|
||||
}
|
||||
|
||||
TEST(one_hot_gpu_i64, generic_cached) {
|
||||
generic_one_hot_test_int<int64_t>(format::bfyx, 2, 2, 1, 1, tensor(5, 2, 1, 2), 0, 0, 0, 0, 0, true);
|
||||
generic_one_hot_test_int<int64_t>(format::bfyx, 1, 2, 3, 1, tensor(1, 5, 3, 2), 1, 0, 0, 0, 0, true);
|
||||
generic_one_hot_test_int<int64_t>(format::bfyx, 2, 2, 1, 1, tensor(2, 2, 1, 4), 2, 0, 0, 0, 0, true);
|
||||
generic_one_hot_test_int<int64_t>(format::bfyx, 2, 2, 1, 1, tensor(2, 2, 4, 1), 3, 0, 0, 0, 0, true);
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST(one_hot_gpu_i32, bfzyx_ax4) {
|
||||
|
@ -1627,7 +1627,7 @@ public:
|
||||
template<data_types Data_Type>
|
||||
void run_test(const std::vector<cldnn::tensor::value_type>& sizes, cldnn::format format_fsv,
|
||||
const std::string & permute_opt = "permute_tile_8x8_4x4_fsv",
|
||||
std::vector<uint16_t> permute_order = {});
|
||||
std::vector<uint16_t> permute_order = {}, bool is_caching_test = false);
|
||||
};
|
||||
|
||||
template<>
|
||||
@ -1654,7 +1654,7 @@ void TiledPermuteTest::set_random_values<int8_t>(const cldnn::memory::ptr mem) c
|
||||
|
||||
template<data_types Data_Type>
|
||||
void TiledPermuteTest::run_test(const std::vector<cldnn::tensor::value_type>& sizes, cldnn::format format_fsv,
|
||||
const std::string & permute_opt, std::vector<uint16_t> permute_order)
|
||||
const std::string & permute_opt, std::vector<uint16_t> permute_order, bool is_caching_test)
|
||||
{
|
||||
// convert half_t to FLOAT16
|
||||
using type_ = typename data_type_to_type<Data_Type>::type;
|
||||
@ -1690,9 +1690,9 @@ void TiledPermuteTest::run_test(const std::vector<cldnn::tensor::value_type>& si
|
||||
ov::intel_gpu::ImplementationDesc permute_ref = { format_fsv, "permute_ref" };
|
||||
config_ref.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"output", permute_ref} }));
|
||||
|
||||
cldnn::network network_ref(engine, topology_ref, config_ref);
|
||||
network_ref.set_input_data("input", input);
|
||||
auto outputs_ref = network_ref.execute();
|
||||
cldnn::network::ptr network_ref = get_network(engine, topology_ref, config_ref, get_test_stream_ptr(), is_caching_test);
|
||||
network_ref->set_input_data("input", input);
|
||||
auto outputs_ref = network_ref->execute();
|
||||
auto output_ref = outputs_ref.begin()->second.get_memory();
|
||||
cldnn::mem_lock<type> output_ref_ptr(output_ref, get_test_stream());
|
||||
|
||||
@ -1701,9 +1701,9 @@ void TiledPermuteTest::run_test(const std::vector<cldnn::tensor::value_type>& si
|
||||
ov::intel_gpu::ImplementationDesc permute_tile_opt = { format_fsv, permute_opt };
|
||||
config_tile.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"output", permute_tile_opt} }));
|
||||
|
||||
cldnn::network network_tile(engine, topology_ref, config_tile);
|
||||
network_tile.set_input_data("input", input);
|
||||
auto outputs_tile = network_tile.execute();
|
||||
cldnn::network::ptr network_tile = get_network(engine, topology_ref, config_tile, get_test_stream_ptr(), is_caching_test);
|
||||
network_tile->set_input_data("input", input);
|
||||
auto outputs_tile = network_tile->execute();
|
||||
auto output_tile = outputs_tile.begin()->second.get_memory();
|
||||
cldnn::mem_lock<type> output_tile_ptr(output_tile, get_test_stream());
|
||||
|
||||
@ -1920,3 +1920,59 @@ TEST_P(permute_bfzyx_to_bfyxz, combined) {
|
||||
auto p = GetParam();
|
||||
run_test<cldnn::data_types::f32>(p.sizes, p.format_fsv, "permute_bfzyx_to_bfyxz", {0, 1, 3, 4, 2});
|
||||
}
|
||||
|
||||
#ifdef RUN_ALL_MODEL_CACHING_TESTS
|
||||
TEST_P(permute_tile_fsv_4d, f16_cached) {
|
||||
auto p = GetParam();
|
||||
run_test<cldnn::data_types::f16>(p.sizes, p.format_fsv, "permute_tile_8x8_4x4_fsv", {}, true);
|
||||
}
|
||||
|
||||
TEST_P(permute_tile_fsv_4d, f32_cached) {
|
||||
auto p = GetParam();
|
||||
run_test<cldnn::data_types::f32>(p.sizes, p.format_fsv, "permute_tile_8x8_4x4_fsv", {}, true);
|
||||
}
|
||||
|
||||
TEST_P(permute_tile_fsv_4d, i8_cached) {
|
||||
auto p = GetParam();
|
||||
run_test<cldnn::data_types::i8>(p.sizes, p.format_fsv, "permute_tile_8x8_4x4_fsv", {}, true);
|
||||
}
|
||||
|
||||
TEST_P(permute_tile_fsv_4d, i32_cached) {
|
||||
auto p = GetParam();
|
||||
run_test<cldnn::data_types::i32>(p.sizes, p.format_fsv, "permute_tile_8x8_4x4_fsv", {}, true);
|
||||
}
|
||||
|
||||
TEST_P(permute_tile_fsv_4d, i64_cached) {
|
||||
auto p = GetParam();
|
||||
run_test<cldnn::data_types::i64>(p.sizes, p.format_fsv, "permute_tile_8x8_4x4_fsv", {}, true);
|
||||
}
|
||||
|
||||
TEST_P(permute_tile_fsv_5d, f16_cached) {
|
||||
auto p = GetParam();
|
||||
run_test<cldnn::data_types::f16>(p.sizes, p.format_fsv, "permute_tile_8x8_4x4_fsv", {}, true);
|
||||
}
|
||||
|
||||
TEST_P(permute_tile_fsv_5d, f32_cached) {
|
||||
auto p = GetParam();
|
||||
run_test<cldnn::data_types::f32>(p.sizes, p.format_fsv, "permute_tile_8x8_4x4_fsv", {}, true);
|
||||
}
|
||||
|
||||
TEST_P(permute_tile_fsv_5d, i8_cached) {
|
||||
auto p = GetParam();
|
||||
run_test<cldnn::data_types::i8>(p.sizes, p.format_fsv, "permute_tile_8x8_4x4_fsv", {}, true);
|
||||
}
|
||||
|
||||
TEST_P(permute_tile_fsv_5d, i32_cached) {
|
||||
auto p = GetParam();
|
||||
run_test<cldnn::data_types::i32>(p.sizes, p.format_fsv, "permute_tile_8x8_4x4_fsv", {}, true);
|
||||
}
|
||||
|
||||
TEST_P(permute_bfzyx_to_bfyxz, combined_cached) {
|
||||
auto p = GetParam();
|
||||
run_test<cldnn::data_types::f32>(p.sizes, p.format_fsv, "permute_bfzyx_to_bfyxz", {0, 1, 3, 4, 2}, true);
|
||||
}
|
||||
#endif
|
||||
TEST_P(permute_tile_fsv_5d, i64_cached) {
|
||||
auto p = GetParam();
|
||||
run_test<cldnn::data_types::i64>(p.sizes, p.format_fsv, "permute_tile_8x8_4x4_fsv", {}, true);
|
||||
}
|
||||
|
@ -1933,11 +1933,12 @@ public:
|
||||
return "pool";
|
||||
}
|
||||
|
||||
virtual void run_expect(const VVVVVF<output_t>& expected) {
|
||||
virtual void run_expect(const VVVVVF<output_t>& expected, bool is_caching_test) {
|
||||
auto& eng = get_test_engine();
|
||||
auto topo = build_topology(eng);
|
||||
ExecutionConfig config(ov::intel_gpu::optimize_data(true));
|
||||
cldnn::network net(eng, topo, config);
|
||||
|
||||
cldnn::network::ptr net = get_network(eng, topo, config, get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
auto input_size = tensor(batch(batch_num()), feature(input_features()), spatial(input_x(), input_y(), input_z()));
|
||||
auto input_lay = layout(input_type(),
|
||||
@ -1956,20 +1957,22 @@ public:
|
||||
}
|
||||
set_values(input_mem, input_flat);
|
||||
|
||||
net.set_input_data("input", input_mem);
|
||||
auto result = net.execute();
|
||||
net->set_input_data("input", input_mem);
|
||||
auto result = net->execute();
|
||||
auto out_mem = result.at(output_id()).get_memory();
|
||||
auto out_lay = out_mem->get_layout();
|
||||
cldnn::mem_lock<output_t> out_ptr(out_mem, get_test_stream());
|
||||
|
||||
std::string kernel;
|
||||
for (auto i : net.get_primitives_info()) {
|
||||
if (i.original_id == "pool") {
|
||||
kernel = i.kernel_id;
|
||||
if (!is_caching_test) {
|
||||
std::string kernel;
|
||||
for (auto i : net->get_primitives_info()) {
|
||||
if (i.original_id == "pool") {
|
||||
kernel = i.kernel_id;
|
||||
}
|
||||
}
|
||||
std::cout << kernel << std::endl;
|
||||
SCOPED_TRACE("\nkernel: " + kernel);
|
||||
}
|
||||
std::cout << kernel << std::endl;
|
||||
SCOPED_TRACE("\nkernel: " + kernel);
|
||||
|
||||
ASSERT_EQ(out_lay.data_type, output_type());
|
||||
ASSERT_EQ(out_lay.batch(), expected.size());
|
||||
@ -2115,10 +2118,10 @@ public:
|
||||
this->set_offsets(o_x, o_y, o_z);
|
||||
}
|
||||
|
||||
void run_random(const pooling_random_test_params& params) {
|
||||
void run_random(const pooling_random_test_params& params, bool is_caching_test) {
|
||||
param_set_up(params);
|
||||
auto reference = calculate_reference();
|
||||
ASSERT_NO_FATAL_FAILURE(this->run_expect(reference));
|
||||
ASSERT_NO_FATAL_FAILURE(this->run_expect(reference, is_caching_test));
|
||||
}
|
||||
};
|
||||
|
||||
@ -2131,22 +2134,22 @@ struct pooling_random_test : public testing::TestWithParam<pooling_random_test_p
|
||||
|
||||
TEST_P(pooling_random_test, max_i8) {
|
||||
auto test_case = max_pooling_i8_random_test();
|
||||
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam()));
|
||||
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), false));
|
||||
}
|
||||
|
||||
TEST_P(pooling_random_test, max_u8) {
|
||||
auto test_case = max_pooling_u8_random_test();
|
||||
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam()));
|
||||
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), false));
|
||||
}
|
||||
|
||||
TEST_P(pooling_random_test, avg_i8) {
|
||||
auto test_case = avg_pooling_i8_random_test();
|
||||
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam()));
|
||||
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), false));
|
||||
}
|
||||
|
||||
TEST_P(pooling_random_test, avg_u8) {
|
||||
auto test_case = avg_pooling_u8_random_test();
|
||||
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam()));
|
||||
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), false));
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
@ -2243,22 +2246,22 @@ using pooling_random_test_fp16_fp32 = pooling_random_test;
|
||||
|
||||
TEST_P(pooling_random_test_fp16_fp32, avg_fp16) {
|
||||
auto test_case = pooling_random_test_base<FLOAT16, pooling_mode::average>();
|
||||
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam()));
|
||||
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), false));
|
||||
}
|
||||
|
||||
TEST_P(pooling_random_test_fp16_fp32, max_fp16) {
|
||||
auto test_case = pooling_random_test_base<FLOAT16, pooling_mode::max>();
|
||||
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam()));
|
||||
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), false));
|
||||
}
|
||||
|
||||
TEST_P(pooling_random_test_fp16_fp32, avg_fp32) {
|
||||
auto test_case = pooling_random_test_base<float, pooling_mode::average>();
|
||||
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam()));
|
||||
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), false));
|
||||
}
|
||||
|
||||
TEST_P(pooling_random_test_fp16_fp32, max_fp32) {
|
||||
auto test_case = pooling_random_test_base<float, pooling_mode::max>();
|
||||
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam()));
|
||||
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), false));
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
@ -3249,3 +3252,44 @@ TEST(pooling_forward_gpu_onednn, basic_max_pooling_int8) {
|
||||
}
|
||||
|
||||
#endif // ENABLE_ONEDNN_FOR_GPU
|
||||
|
||||
#ifdef RUN_ALL_MODEL_CACHING_TESTS
|
||||
TEST_P(pooling_random_test, max_i8_cached) {
|
||||
auto test_case = max_pooling_i8_random_test();
|
||||
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), true));
|
||||
}
|
||||
|
||||
TEST_P(pooling_random_test, max_u8_cached) {
|
||||
auto test_case = max_pooling_u8_random_test();
|
||||
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), true));
|
||||
}
|
||||
|
||||
TEST_P(pooling_random_test, avg_i8_cached) {
|
||||
auto test_case = avg_pooling_i8_random_test();
|
||||
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), true));
|
||||
}
|
||||
|
||||
TEST_P(pooling_random_test, avg_u8_cached) {
|
||||
auto test_case = avg_pooling_u8_random_test();
|
||||
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), true));
|
||||
}
|
||||
|
||||
TEST_P(pooling_random_test_fp16_fp32, avg_fp16_cached) {
|
||||
auto test_case = pooling_random_test_base<FLOAT16, pooling_mode::average>();
|
||||
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), true));
|
||||
}
|
||||
|
||||
TEST_P(pooling_random_test_fp16_fp32, max_fp16_cached) {
|
||||
auto test_case = pooling_random_test_base<FLOAT16, pooling_mode::max>();
|
||||
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), true));
|
||||
}
|
||||
|
||||
TEST_P(pooling_random_test_fp16_fp32, avg_fp32_cached) {
|
||||
auto test_case = pooling_random_test_base<float, pooling_mode::average>();
|
||||
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), true));
|
||||
}
|
||||
#endif // RUN_ALL_MODEL_CACHING_TESTS
|
||||
TEST_P(pooling_random_test_fp16_fp32, max_fp32_cached) {
|
||||
auto test_case = pooling_random_test_base<float, pooling_mode::max>();
|
||||
ASSERT_NO_FATAL_FAILURE(test_case.run_random(GetParam(), true));
|
||||
}
|
||||
|
@ -40,7 +40,7 @@ using prior_box_param = std::tuple<format, // Input and ou
|
||||
template <class InputType, class OutputType>
|
||||
class PriorBoxGPUTest : public ::testing::TestWithParam<prior_box_param<InputType, OutputType>> {
|
||||
public:
|
||||
void SetUp() override {
|
||||
void execute(bool is_caching_test) {
|
||||
const auto input_data_type = type_to_data_type<InputType>::value;
|
||||
const auto output_data_type = type_to_data_type<OutputType>::value;
|
||||
const auto plain_format = format::bfyx;
|
||||
@ -92,8 +92,10 @@ public:
|
||||
|
||||
ExecutionConfig config;
|
||||
config.set_property(ov::intel_gpu::optimize_data(false));
|
||||
network network(engine, topo, config);
|
||||
const auto outputs = network.execute();
|
||||
|
||||
cldnn::network::ptr network = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
const auto outputs = network->execute();
|
||||
const auto output = outputs.at("prior_box").get_memory();
|
||||
|
||||
cldnn::mem_lock<OutputType> output_ptr(output, get_test_stream());
|
||||
@ -107,7 +109,9 @@ public:
|
||||
};
|
||||
|
||||
using prior_box_test_i32_f32 = PriorBoxGPUTest<int32_t, float>;
|
||||
TEST_P(prior_box_test_i32_f32, prior_box_test_i32_f32) {}
|
||||
TEST_P(prior_box_test_i32_f32, prior_box_test_i32_f32) {
|
||||
this->execute(false);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
prior_box_test_all_formats,
|
||||
@ -261,4 +265,37 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
|
||||
0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1
|
||||
})));
|
||||
|
||||
#ifdef RUN_ALL_MODEL_CACHING_TESTS
|
||||
TEST_P(prior_box_test_i32_f32, prior_box_test_i32_f32_cached) {
|
||||
this->execute(true);
|
||||
}
|
||||
#else
|
||||
using prior_box_test_i32_f32_cached = PriorBoxGPUTest<int32_t, float>;
|
||||
TEST_P(prior_box_test_i32_f32_cached, prior_box_test_i32_f32) {
|
||||
this->execute(true);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
prior_box_test_four_variances,
|
||||
prior_box_test_i32_f32_cached,
|
||||
testing::Combine(
|
||||
testing::Values(format::bfyx),
|
||||
testing::Values(std::vector<int32_t>{2, 2}),
|
||||
testing::Values(std::vector<int32_t>{10, 10}),
|
||||
testing::Values(
|
||||
prior_box_attributes{{2.0f}, {5.0f}, {1.5f}, {}, {}, {}, false, false, 0.0f, 0.0f, {0.1, 0.2, 0.3, 0.4}, true, true}),
|
||||
testing::Values(std::vector<float>{
|
||||
0.15, 0.15, 0.35, 0.35, 0.0918861, 0.0918861, 0.408114, 0.408114, 0.127526, 0.16835, 0.372474, 0.33165,
|
||||
0.65, 0.15, 0.85, 0.35,
|
||||
0.591886, 0.0918861, 0.908114, 0.408114, 0.627526, 0.16835, 0.872474, 0.33165, 0.15, 0.65, 0.35, 0.85,
|
||||
0.0918861, 0.591886, 0.408114, 0.908114,
|
||||
0.127526, 0.66835, 0.372474, 0.83165, 0.65, 0.65, 0.85, 0.85, 0.591886, 0.591886, 0.908114, 0.908114,
|
||||
0.627526, 0.66835, 0.872474, 0.83165,
|
||||
0.1, 0.2, 0.3, 0.4, 0.1, 0.2, 0.3, 0.4, 0.1, 0.2, 0.3, 0.4,
|
||||
0.1, 0.2, 0.3, 0.4, 0.1, 0.2, 0.3, 0.4, 0.1, 0.2, 0.3, 0.4,
|
||||
0.1, 0.2, 0.3, 0.4, 0.1, 0.2, 0.3, 0.4, 0.1, 0.2, 0.3, 0.4,
|
||||
0.1, 0.2, 0.3, 0.4, 0.1, 0.2, 0.3, 0.4, 0.1, 0.2, 0.3, 0.4
|
||||
})));
|
||||
#endif
|
||||
} // namespace
|
||||
|
@ -15,7 +15,8 @@ using namespace ::tests;
|
||||
|
||||
//We expect additional reorder to be added in between "weights1" and "reshape1".
|
||||
//This situation should be handled properly by propagate constants optimization phase
|
||||
TEST(propagate_constants, copy_dependecies_from_nodes) {
|
||||
template <typename T>
|
||||
void test_copy_dependecies_from_nodes(bool is_caching_test) {
|
||||
auto& engine = get_test_engine();
|
||||
ExecutionConfig config;
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
@ -24,8 +25,8 @@ TEST(propagate_constants, copy_dependecies_from_nodes) {
|
||||
auto weights1 = engine.allocate_memory({ data_types::f16, format::yxfb,{ 1, 1, 2, 1 } });
|
||||
auto weights2 = engine.allocate_memory({ data_types::f32, format::byxf,{ 1, 1, 1, 2 } });
|
||||
|
||||
set_values(input, { FLOAT16(1.1f), FLOAT16(1.2f), FLOAT16(1.3f), FLOAT16(1.4f) });
|
||||
set_values(weights1, { FLOAT16(2.1f), FLOAT16(3.1f) });
|
||||
set_values(input, { T(1.1f), T(1.2f), T(1.3f), T(1.4f) });
|
||||
set_values(weights1, { T(2.1f), T(3.1f) });
|
||||
set_values(weights2, { 1.1f, 0.1f });
|
||||
|
||||
topology topology;
|
||||
@ -37,10 +38,10 @@ TEST(propagate_constants, copy_dependecies_from_nodes) {
|
||||
topology.add(reorder("reorder1", input_info("reshape1"), layout(data_types::f32, format::byxf, tensor(4))));
|
||||
topology.add(concatenation("concat", { input_info("reorder1"), input_info("weights2") }, 3));
|
||||
topology.add(convolution("conv2", { input_info("reorder2") }, { "concat" }));
|
||||
network network(engine, topology, config);
|
||||
network.set_input_data("input", input);
|
||||
cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
|
||||
network->set_input_data("input", input);
|
||||
|
||||
auto outputs = network.execute();
|
||||
auto outputs = network->execute();
|
||||
|
||||
float epsilon = 1e-2f;
|
||||
for (auto& it : outputs) {
|
||||
@ -48,3 +49,11 @@ TEST(propagate_constants, copy_dependecies_from_nodes) {
|
||||
ASSERT_NEAR(7.8f, output[0], epsilon);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(propagate_constants, copy_dependecies_from_nodes) {
|
||||
test_copy_dependecies_from_nodes<FLOAT16>(false);
|
||||
}
|
||||
|
||||
TEST(propagate_constants, copy_dependecies_from_nodes_cached) {
|
||||
test_copy_dependecies_from_nodes<FLOAT16>(true);
|
||||
}
|
||||
|
@ -15,116 +15,123 @@ template <typename T>
|
||||
struct pyramid_roi_align_typed_test : testing::Test {
|
||||
static const data_types data_type = type_to_data_type<T>::value;
|
||||
using Type = T;
|
||||
|
||||
void execute(bool is_caching_test) {
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
const int rois_num = 3;
|
||||
const int output_size = 2;
|
||||
const int sampling_points = 2;
|
||||
const int starting_level = 2;
|
||||
const int P2_scale = 1;
|
||||
const int P3_scale = 2;
|
||||
const int P4_scale = 4;
|
||||
const int P5_scale = 8;
|
||||
const int P2_size = 8;
|
||||
const int P3_size = P2_size * P2_scale / P3_scale;
|
||||
const int P4_size = P2_size * P2_scale / P4_scale;
|
||||
const int P5_size = P2_size * P2_scale / P5_scale;
|
||||
|
||||
std::vector<Type> rois_data = {
|
||||
Type(0.f), Type(0.f), Type(1.f), Type(1.f),
|
||||
Type(0.f), Type(0.f), Type(0.5f), Type(0.5f),
|
||||
Type(0.5f), Type(0.5f), Type(0.75f), Type(0.75f)
|
||||
};
|
||||
|
||||
std::vector<Type> P2_data = {
|
||||
Type(0.f), Type(1.f), Type(2.f), Type(3.f), Type(4.f), Type(5.f), Type(6.f), Type(7.f),
|
||||
Type(8.f), Type(9.f), Type(10.f), Type(11.f), Type(12.f), Type(13.f), Type(14.f), Type(15.f),
|
||||
Type(0.f), Type(1.f), Type(2.f), Type(3.f), Type(4.f), Type(5.f), Type(6.f), Type(7.f),
|
||||
Type(8.f), Type(9.f), Type(10.f), Type(11.f), Type(12.f), Type(13.f), Type(14.f), Type(15.f),
|
||||
Type(8.f), Type(9.f), Type(10.f), Type(11.f), Type(12.f), Type(13.f), Type(14.f), Type(15.f),
|
||||
Type(0.f), Type(1.f), Type(2.f), Type(3.f), Type(4.f), Type(5.f), Type(6.f), Type(7.f),
|
||||
Type(0.f), Type(1.f), Type(2.f), Type(3.f), Type(4.f), Type(5.f), Type(6.f), Type(7.f),
|
||||
Type(8.f), Type(9.f), Type(10.f), Type(11.f), Type(12.f), Type(13.f), Type(14.f), Type(15.f),
|
||||
};
|
||||
|
||||
std::vector<Type> P3_data = {
|
||||
Type(9.f), Type(13.f), Type(17.f), Type(21.f),
|
||||
Type(9.f), Type(13.f), Type(17.f), Type(21.f),
|
||||
Type(9.f), Type(13.f), Type(17.f), Type(21.f),
|
||||
Type(9.f), Type(13.f), Type(17.f), Type(21.f),
|
||||
};
|
||||
|
||||
std::vector<Type> P4_data = {
|
||||
Type(11.f), Type(19.f),
|
||||
Type(11.f), Type(19.f),
|
||||
};
|
||||
|
||||
std::vector<Type> P5_data = {
|
||||
Type(15.f)
|
||||
};
|
||||
|
||||
auto rois_lay = layout(this->data_type, format::bfyx, tensor(batch(rois_num), feature(4)));
|
||||
auto P2_lay = layout(this->data_type, format::bfyx, tensor(1, 1, P2_size, P2_size));
|
||||
auto P3_lay = layout(this->data_type, format::bfyx, tensor(1, 1, P3_size, P3_size));
|
||||
auto P4_lay = layout(this->data_type, format::bfyx, tensor(1, 1, P4_size, P4_size));
|
||||
auto P5_lay = layout(this->data_type, format::bfyx, tensor(1, 1, P5_size, P5_size));
|
||||
|
||||
auto rois_mem = engine.allocate_memory(rois_lay);
|
||||
auto P2_mem = engine.allocate_memory(P2_lay);
|
||||
auto P3_mem = engine.allocate_memory(P3_lay);
|
||||
auto P4_mem = engine.allocate_memory(P4_lay);
|
||||
auto P5_mem = engine.allocate_memory(P5_lay);
|
||||
|
||||
tests::set_values(rois_mem, rois_data);
|
||||
tests::set_values(P2_mem, P2_data);
|
||||
tests::set_values(P3_mem, P3_data);
|
||||
tests::set_values(P4_mem, P4_data);
|
||||
tests::set_values(P5_mem, P5_data);
|
||||
|
||||
topology topo;
|
||||
topo.add(data("P2", P2_mem));
|
||||
topo.add(data("P3", P3_mem));
|
||||
topo.add(data("P4", P4_mem));
|
||||
topo.add(data("P5", P5_mem));
|
||||
topo.add(input_layout("rois", rois_lay));
|
||||
topo.add(pyramid_roi_align("pyramid",
|
||||
input_info("rois"),
|
||||
input_info("P2"),
|
||||
input_info("P3"),
|
||||
input_info("P4"),
|
||||
input_info("P5"),
|
||||
output_size,
|
||||
sampling_points,
|
||||
{ P2_scale, P3_scale, P4_scale, P5_scale },
|
||||
starting_level));
|
||||
|
||||
cldnn::network::ptr net = get_network(engine, topo, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
net->set_input_data("rois", rois_mem);
|
||||
|
||||
std::vector<float> expected_out = {
|
||||
// RoI 0,0 - 1,1 from P4
|
||||
14.f, 18.f, 14.f, 18.f,
|
||||
// RoI 0,0 - 0.5,0.5 from P3
|
||||
11.25f, 14.25f, 11.25f, 14.25f,
|
||||
// RoI 0.5,0.5 - 0.75,0.75 from P2
|
||||
12.15625f, 13.03125f, 7.40625f, 8.28125f,
|
||||
};
|
||||
|
||||
auto result = net->execute();
|
||||
|
||||
auto out_mem = result.at("pyramid").get_memory();
|
||||
cldnn::mem_lock<Type> out_ptr(out_mem, get_test_stream());
|
||||
|
||||
ASSERT_EQ(expected_out.size(), out_ptr.size());
|
||||
for (size_t i = 0; i < expected_out.size(); ++i) {
|
||||
ASSERT_EQ(expected_out[i], static_cast<float>(out_ptr[i])) << "at i = " << i;
|
||||
}
|
||||
}
|
||||
};
|
||||
using pyramid_roi_align_types = testing::Types<float, half_t>;
|
||||
|
||||
TYPED_TEST_SUITE(pyramid_roi_align_typed_test, pyramid_roi_align_types);
|
||||
|
||||
TYPED_TEST(pyramid_roi_align_typed_test, smoke_4levels) {
|
||||
using Type = typename pyramid_roi_align_typed_test<TypeParam>::Type;
|
||||
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
const int rois_num = 3;
|
||||
const int output_size = 2;
|
||||
const int sampling_points = 2;
|
||||
const int starting_level = 2;
|
||||
const int P2_scale = 1;
|
||||
const int P3_scale = 2;
|
||||
const int P4_scale = 4;
|
||||
const int P5_scale = 8;
|
||||
const int P2_size = 8;
|
||||
const int P3_size = P2_size * P2_scale / P3_scale;
|
||||
const int P4_size = P2_size * P2_scale / P4_scale;
|
||||
const int P5_size = P2_size * P2_scale / P5_scale;
|
||||
|
||||
std::vector<Type> rois_data = {
|
||||
Type(0.f), Type(0.f), Type(1.f), Type(1.f),
|
||||
Type(0.f), Type(0.f), Type(0.5f), Type(0.5f),
|
||||
Type(0.5f), Type(0.5f), Type(0.75f), Type(0.75f)
|
||||
};
|
||||
|
||||
std::vector<Type> P2_data = {
|
||||
Type(0.f), Type(1.f), Type(2.f), Type(3.f), Type(4.f), Type(5.f), Type(6.f), Type(7.f),
|
||||
Type(8.f), Type(9.f), Type(10.f), Type(11.f), Type(12.f), Type(13.f), Type(14.f), Type(15.f),
|
||||
Type(0.f), Type(1.f), Type(2.f), Type(3.f), Type(4.f), Type(5.f), Type(6.f), Type(7.f),
|
||||
Type(8.f), Type(9.f), Type(10.f), Type(11.f), Type(12.f), Type(13.f), Type(14.f), Type(15.f),
|
||||
Type(8.f), Type(9.f), Type(10.f), Type(11.f), Type(12.f), Type(13.f), Type(14.f), Type(15.f),
|
||||
Type(0.f), Type(1.f), Type(2.f), Type(3.f), Type(4.f), Type(5.f), Type(6.f), Type(7.f),
|
||||
Type(0.f), Type(1.f), Type(2.f), Type(3.f), Type(4.f), Type(5.f), Type(6.f), Type(7.f),
|
||||
Type(8.f), Type(9.f), Type(10.f), Type(11.f), Type(12.f), Type(13.f), Type(14.f), Type(15.f),
|
||||
};
|
||||
|
||||
std::vector<Type> P3_data = {
|
||||
Type(9.f), Type(13.f), Type(17.f), Type(21.f),
|
||||
Type(9.f), Type(13.f), Type(17.f), Type(21.f),
|
||||
Type(9.f), Type(13.f), Type(17.f), Type(21.f),
|
||||
Type(9.f), Type(13.f), Type(17.f), Type(21.f),
|
||||
};
|
||||
|
||||
std::vector<Type> P4_data = {
|
||||
Type(11.f), Type(19.f),
|
||||
Type(11.f), Type(19.f),
|
||||
};
|
||||
|
||||
std::vector<Type> P5_data = {
|
||||
Type(15.f)
|
||||
};
|
||||
|
||||
auto rois_lay = layout(this->data_type, format::bfyx, tensor(batch(rois_num), feature(4)));
|
||||
auto P2_lay = layout(this->data_type, format::bfyx, tensor(1, 1, P2_size, P2_size));
|
||||
auto P3_lay = layout(this->data_type, format::bfyx, tensor(1, 1, P3_size, P3_size));
|
||||
auto P4_lay = layout(this->data_type, format::bfyx, tensor(1, 1, P4_size, P4_size));
|
||||
auto P5_lay = layout(this->data_type, format::bfyx, tensor(1, 1, P5_size, P5_size));
|
||||
|
||||
auto rois_mem = engine.allocate_memory(rois_lay);
|
||||
auto P2_mem = engine.allocate_memory(P2_lay);
|
||||
auto P3_mem = engine.allocate_memory(P3_lay);
|
||||
auto P4_mem = engine.allocate_memory(P4_lay);
|
||||
auto P5_mem = engine.allocate_memory(P5_lay);
|
||||
|
||||
tests::set_values(rois_mem, rois_data);
|
||||
tests::set_values(P2_mem, P2_data);
|
||||
tests::set_values(P3_mem, P3_data);
|
||||
tests::set_values(P4_mem, P4_data);
|
||||
tests::set_values(P5_mem, P5_data);
|
||||
|
||||
topology topo;
|
||||
topo.add(data("P2", P2_mem));
|
||||
topo.add(data("P3", P3_mem));
|
||||
topo.add(data("P4", P4_mem));
|
||||
topo.add(data("P5", P5_mem));
|
||||
topo.add(input_layout("rois", rois_lay));
|
||||
topo.add(pyramid_roi_align("pyramid",
|
||||
input_info("rois"),
|
||||
input_info("P2"),
|
||||
input_info("P3"),
|
||||
input_info("P4"),
|
||||
input_info("P5"),
|
||||
output_size,
|
||||
sampling_points,
|
||||
{ P2_scale, P3_scale, P4_scale, P5_scale },
|
||||
starting_level));
|
||||
|
||||
cldnn::network net(engine, topo);
|
||||
net.set_input_data("rois", rois_mem);
|
||||
|
||||
std::vector<float> expected_out = {
|
||||
// RoI 0,0 - 1,1 from P4
|
||||
14.f, 18.f, 14.f, 18.f,
|
||||
// RoI 0,0 - 0.5,0.5 from P3
|
||||
11.25f, 14.25f, 11.25f, 14.25f,
|
||||
// RoI 0.5,0.5 - 0.75,0.75 from P2
|
||||
12.15625f, 13.03125f, 7.40625f, 8.28125f,
|
||||
};
|
||||
|
||||
auto result = net.execute();
|
||||
|
||||
auto out_mem = result.at("pyramid").get_memory();
|
||||
cldnn::mem_lock<Type> out_ptr(out_mem, get_test_stream());
|
||||
|
||||
ASSERT_EQ(expected_out.size(), out_ptr.size());
|
||||
for (size_t i = 0; i < expected_out.size(); ++i) {
|
||||
ASSERT_EQ(expected_out[i], static_cast<float>(out_ptr[i])) << "at i = " << i;
|
||||
}
|
||||
this->execute(false);
|
||||
}
|
||||
|
||||
TYPED_TEST(pyramid_roi_align_typed_test, smoke_4levels_cached) {
|
||||
this->execute(true);
|
||||
}
|
||||
|
@ -794,7 +794,7 @@ struct quantize_random_test : testing::TestWithParam<quantize_random_test_params
|
||||
}
|
||||
}
|
||||
|
||||
void execute_compare(const quantize_random_test_params& params, bool check_result) {
|
||||
void execute_compare(const quantize_random_test_params& params, bool check_result, bool is_caching_test) {
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
auto in_layout = layout(params.input_type, params.in_format, params.input_size);
|
||||
@ -840,10 +840,11 @@ struct quantize_random_test : testing::TestWithParam<quantize_random_test_params
|
||||
ExecutionConfig config;
|
||||
config.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{"quantize"}));
|
||||
|
||||
network net(engine, topo, config);
|
||||
net.set_input_data("input", input);
|
||||
cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
auto result = net.execute();
|
||||
net->set_input_data("input", input);
|
||||
|
||||
auto result = net->execute();
|
||||
auto output = result.at("quantize").get_memory();
|
||||
|
||||
auto input_opt = engine.allocate_memory(in_layout);
|
||||
@ -909,7 +910,7 @@ struct quantize_random_test_param_generator : std::vector<quantize_random_test_p
|
||||
|
||||
TEST_P(quantize_random_test, random) {
|
||||
auto param = GetParam();
|
||||
execute_compare(param, true);
|
||||
execute_compare(param, true, false);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(quantize_smoke,
|
||||
@ -919,3 +920,23 @@ INSTANTIATE_TEST_SUITE_P(quantize_smoke,
|
||||
.simple_params(data_types::f32, data_types::u8, format::bs_fs_yx_bsv32_fsv32, format::bs_fs_yx_bsv32_fsv32, 5)
|
||||
.simple_params(data_types::f32, data_types::u8, format::b_fs_yx_fsv16, format::b_fs_yx_fsv16, 5)
|
||||
));
|
||||
|
||||
#ifdef RUN_ALL_MODEL_CACHING_TESTS
|
||||
TEST_P(quantize_random_test, random_cached) {
|
||||
auto param = GetParam();
|
||||
execute_compare(param, true, true);
|
||||
}
|
||||
#else
|
||||
using quantize_random_test_cached = quantize_random_test;
|
||||
|
||||
TEST_P(quantize_random_test_cached, random) {
|
||||
auto param = GetParam();
|
||||
execute_compare(param, true, true);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(quantize_smoke,
|
||||
quantize_random_test_cached,
|
||||
testing::Values(
|
||||
quantize_random_test_params{ data_types::f32, data_types::u8, {1, 16, 10, 10}, format::bs_fs_yx_bsv32_fsv32, format::bs_fs_yx_bsv32_fsv32, 5}
|
||||
));
|
||||
#endif
|
||||
|
@ -30,7 +30,7 @@ struct RandomUniformParams {
|
||||
template<typename T>
|
||||
struct random_uniform_gpu_test : public ::testing::TestWithParam<RandomUniformParams<T> > {
|
||||
public:
|
||||
void test() {
|
||||
void test(bool is_caching_test) {
|
||||
|
||||
auto data_type = type_to_data_type<T>::value;
|
||||
RandomUniformParams<T> params = testing::TestWithParam<RandomUniformParams<T> >::GetParam();
|
||||
@ -56,13 +56,13 @@ public:
|
||||
ExecutionConfig config;
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
|
||||
cldnn::network net{engine, topology, config};
|
||||
cldnn::network::ptr net = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
net.set_input_data("shape", shape);
|
||||
net.set_input_data("min_val", min_val);
|
||||
net.set_input_data("max_val", max_val);
|
||||
net->set_input_data("shape", shape);
|
||||
net->set_input_data("min_val", min_val);
|
||||
net->set_input_data("max_val", max_val);
|
||||
|
||||
auto result = net.execute();
|
||||
auto result = net->execute();
|
||||
|
||||
auto out_mem = result.at("random_uniform").get_memory();
|
||||
cldnn::mem_lock<T> out_ptr(out_mem, get_test_stream());
|
||||
@ -105,20 +105,20 @@ using random_uniform_gpu_test_f32 = random_uniform_gpu_test<float>;
|
||||
using random_uniform_gpu_test_f16 = random_uniform_gpu_test<half_t>;
|
||||
|
||||
TEST_P(random_uniform_gpu_test_i32, random_int32) {
|
||||
ASSERT_NO_FATAL_FAILURE(test());
|
||||
ASSERT_NO_FATAL_FAILURE(test(false));
|
||||
}
|
||||
|
||||
TEST_P(random_uniform_gpu_test_i64, random_int64) {
|
||||
ASSERT_NO_FATAL_FAILURE(test());
|
||||
ASSERT_NO_FATAL_FAILURE(test(false));
|
||||
}
|
||||
|
||||
|
||||
TEST_P(random_uniform_gpu_test_f32, random_f32) {
|
||||
ASSERT_NO_FATAL_FAILURE(test());
|
||||
ASSERT_NO_FATAL_FAILURE(test(false));
|
||||
}
|
||||
|
||||
TEST_P(random_uniform_gpu_test_f16, random_f16) {
|
||||
ASSERT_NO_FATAL_FAILURE(test());
|
||||
ASSERT_NO_FATAL_FAILURE(test(false));
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_random_uniform_int32,
|
||||
@ -186,3 +186,20 @@ INSTANTIATE_TEST_SUITE_P(smoke_random_uniform_f16,
|
||||
}
|
||||
),
|
||||
PrintToStringParamName());
|
||||
|
||||
#ifdef RUN_ALL_MODEL_CACHING_TESTS
|
||||
TEST_P(random_uniform_gpu_test_i32, random_int32_cached) {
|
||||
ASSERT_NO_FATAL_FAILURE(test(true));
|
||||
}
|
||||
|
||||
TEST_P(random_uniform_gpu_test_i64, random_int64_cached) {
|
||||
ASSERT_NO_FATAL_FAILURE(test(true));
|
||||
}
|
||||
|
||||
TEST_P(random_uniform_gpu_test_f32, random_f32_cached) {
|
||||
ASSERT_NO_FATAL_FAILURE(test(true));
|
||||
}
|
||||
#endif
|
||||
TEST_P(random_uniform_gpu_test_f16, random_f16_cached) {
|
||||
ASSERT_NO_FATAL_FAILURE(test(true));
|
||||
}
|
||||
|
@ -482,7 +482,7 @@ protected:
|
||||
}
|
||||
|
||||
public:
|
||||
void execute() {
|
||||
void execute(bool is_caching_test) {
|
||||
int input_dim = static_cast<int>(input_format.dimension());
|
||||
cldnn::format layout_format = input_format;
|
||||
|
||||
@ -530,12 +530,11 @@ public:
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
ov::intel_gpu::ImplementationDesc reduce_impl = {input_format, kernel_name};
|
||||
config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{{"reduce", reduce_impl}}));
|
||||
network network(engine, topology, config);
|
||||
network.set_input_data("input", input_mem);
|
||||
cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
|
||||
network->set_input_data("input", input_mem);
|
||||
network->execute();
|
||||
|
||||
network.execute();
|
||||
|
||||
auto out_mem = network.get_output("reduce").get_memory();
|
||||
auto out_mem = network->get_output("reduce").get_memory();
|
||||
cldnn::mem_lock<output_t> out_ptr(out_mem, get_test_stream());
|
||||
auto out_lay = out_mem->get_layout();
|
||||
|
||||
@ -573,13 +572,13 @@ public:
|
||||
};
|
||||
|
||||
class general_reduce_gpu_i8_i8 : public ReduceTestBase<data_types::i8, data_types::i8> {};
|
||||
TEST_P(general_reduce_gpu_i8_i8, base) { execute(); }
|
||||
TEST_P(general_reduce_gpu_i8_i8, base) { execute(false); }
|
||||
|
||||
class general_reduce_gpu_i8_f32 : public ReduceTestBase<data_types::i8, data_types::f32> {};
|
||||
TEST_P(general_reduce_gpu_i8_f32, base) { execute(); }
|
||||
TEST_P(general_reduce_gpu_i8_f32, base) { execute(false); }
|
||||
|
||||
class general_reduce_gpu_f32_f32 : public ReduceTestBase<data_types::f32, data_types::f32> {};
|
||||
TEST_P(general_reduce_gpu_f32_f32, base) { execute(); }
|
||||
TEST_P(general_reduce_gpu_f32_f32, base) { execute(false); }
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(reduce_gpu_b_fs_yx_fsv16_i8_i8,
|
||||
general_reduce_gpu_i8_i8,
|
||||
@ -770,7 +769,8 @@ INSTANTIATE_TEST_SUITE_P(DISABLED_reduce_gpu_ref_f32_f32,
|
||||
),
|
||||
general_reduce_gpu::PrintToStringParamName);
|
||||
|
||||
TEST(reduce_gpu, common_bfyx) {
|
||||
template <typename T>
|
||||
void test_common_bfyx(bool is_caching_test) {
|
||||
auto& engine = get_test_engine();
|
||||
auto input = engine.allocate_memory({data_types::f32, format::bfyx, {1, 1, 1, 1}});
|
||||
|
||||
@ -780,26 +780,30 @@ TEST(reduce_gpu, common_bfyx) {
|
||||
topology.add(input_layout("input", input->get_layout()));
|
||||
topology.add(reduce("reduce", input_info("input"), reduce_mode::sum, {0}, 0));
|
||||
|
||||
network network(engine, topology);
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network.set_input_data("input", input);
|
||||
network->set_input_data("input", input);
|
||||
|
||||
auto outputs = network.execute();
|
||||
auto outputs = network->execute();
|
||||
|
||||
ASSERT_EQ(outputs.size(), size_t(1));
|
||||
ASSERT_EQ(outputs.begin()->first, "reduce");
|
||||
|
||||
auto output = outputs.at("reduce").get_memory();
|
||||
|
||||
std::vector<float> ref_data = {1.0f};
|
||||
std::vector<T> ref_data = {1.0f};
|
||||
|
||||
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
|
||||
cldnn::mem_lock<T> output_ptr(output, get_test_stream());
|
||||
|
||||
for (size_t i = 0; i < ref_data.size(); ++i) {
|
||||
ASSERT_TRUE(are_equal(ref_data[i], output_ptr[i]));
|
||||
}
|
||||
}
|
||||
|
||||
TEST(reduce_gpu, common_bfyx) {
|
||||
test_common_bfyx<float>(false);
|
||||
}
|
||||
|
||||
TEST(reduce_gpu, common_bfyx_keepdims) {
|
||||
auto& engine = get_test_engine();
|
||||
auto input = engine.allocate_memory({data_types::f32, format::bfyx, {1, 3, 4, 1}});
|
||||
@ -1823,7 +1827,7 @@ protected:
|
||||
}
|
||||
|
||||
public:
|
||||
void execute() {
|
||||
void execute(bool is_caching_test) {
|
||||
|
||||
int input_dim = static_cast<int>(input_format.dimension());
|
||||
cldnn::format layout_format = input_format;
|
||||
@ -1891,12 +1895,12 @@ public:
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
ov::intel_gpu::ImplementationDesc reduce_impl = {input_format, kernel_name};
|
||||
config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{{"reduce", reduce_impl}}));
|
||||
network network(engine, topology, config);
|
||||
network.set_input_data("input", input_mem);
|
||||
cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
|
||||
network->set_input_data("input", input_mem);
|
||||
|
||||
network.execute();
|
||||
network->execute();
|
||||
|
||||
auto out_mem = network.get_output("reduce").get_memory();
|
||||
auto out_mem = network->get_output("reduce").get_memory();
|
||||
cldnn::mem_lock<output_t> out_ptr(out_mem, get_test_stream());
|
||||
auto out_lay = out_mem->get_layout();
|
||||
|
||||
@ -1939,10 +1943,10 @@ public:
|
||||
|
||||
|
||||
class general_reduce_gpu_xy_f32 : public ReduceXYWithBigTensorTestBase<data_types::f32, data_types::f32> {};
|
||||
TEST_P(general_reduce_gpu_xy_f32, base) { execute(); }
|
||||
TEST_P(general_reduce_gpu_xy_f32, base) { execute(false); }
|
||||
|
||||
class general_reduce_gpu_xy_i8 : public ReduceXYWithBigTensorTestBase<data_types::i8, data_types::i8> {};
|
||||
TEST_P(general_reduce_gpu_xy_i8, base) { execute(); }
|
||||
TEST_P(general_reduce_gpu_xy_i8, base) { execute(false); }
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(reduce_gpu_b_fs_yx_fsv16_xy_f32,
|
||||
general_reduce_gpu_xy_f32,
|
||||
@ -2111,7 +2115,7 @@ INSTANTIATE_TEST_SUITE_P(onednn_reduce_gpu_b_fs_yx_fsv16_i8_f32,
|
||||
TestParamType_general_reduce_gpu(17, 3, 1, 1, 14, 11, format::b_fs_yx_fsv16, reduce_mode::mean, {1}, "reduce_gpu_b_fs_yx_fsv16", true, data_types::i8, false, data_types::f32)
|
||||
), general_reduce_gpu::PrintToStringParamName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(onednn_reduce_gpu_b_fs_yx_fsv16_f16_f16,
|
||||
INSTANTIATE_TEST_SUITE_P(onednn_reduce_gpu_b_fs_yx_fsv16_f16_f16,
|
||||
onednn_reduce_gpu_f16_f16,
|
||||
::testing::Values(
|
||||
TestParamType_general_reduce_gpu(3, 3, 1, 1, 3, 2, format::b_fs_yx_fsv16, reduce_mode::sum, {3, 2, 1, 0}, "reduce_gpu_b_fs_yx_fsv16", false, data_types::f16, false, data_types::f16),
|
||||
@ -2134,3 +2138,19 @@ INSTANTIATE_TEST_SUITE_P(onednn_reduce_gpu_b_fs_yx_fsv16_i8_f32,
|
||||
TestParamType_general_reduce_gpu(17, 3, 1, 1, 14, 11, format::b_fs_yx_fsv16, reduce_mode::mean, {1}, "reduce_gpu_b_fs_yx_fsv16", true, data_types::f16, false, data_types::f16)
|
||||
), general_reduce_gpu::PrintToStringParamName);
|
||||
#endif // ENABLE_ONEDNN_FOR_GPU
|
||||
|
||||
#ifdef RUN_ALL_MODEL_CACHING_TESTS
|
||||
TEST_P(general_reduce_gpu_i8_i8, base_cached) { execute(true); }
|
||||
|
||||
TEST_P(general_reduce_gpu_i8_f32, base_cached) { execute(true); }
|
||||
|
||||
TEST_P(general_reduce_gpu_f32_f32, base_cached) { execute(true); }
|
||||
|
||||
TEST_P(general_reduce_gpu_xy_f32, base_cached) { execute(true); }
|
||||
|
||||
TEST_P(general_reduce_gpu_xy_i8, base_cached) { execute(true); }
|
||||
#endif // RUN_ALL_MODEL_CACHING_TESTS
|
||||
|
||||
TEST(reduce_gpu, common_bfyx_cached) {
|
||||
test_common_bfyx<float>(true);
|
||||
}
|
||||
|
@ -165,7 +165,7 @@ struct region_yolo_test_params {
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
void runRegionTest(region_yolo_test_params& params) {
|
||||
void runRegionTest(region_yolo_test_params& params, bool is_caching_test = false) {
|
||||
auto& engine = get_test_engine();
|
||||
const tensor kInputTensor(params.tensor[0], params.tensor[1], params.tensor[2], params.tensor[3]);
|
||||
auto inputData = generate_random_1d<T>(params.tensor[0] * params.tensor[1] * params.tensor[2] * params.tensor[3], -1, 1);
|
||||
@ -180,10 +180,11 @@ void runRegionTest(region_yolo_test_params& params) {
|
||||
params.regionNum, static_cast<uint32_t>(params.mask.size()), params.softMax));
|
||||
topology.add(reorder("reorder_post", input_info("region_yolo"), format::bfyx, params.dataType));
|
||||
|
||||
network network(engine, topology);
|
||||
network.set_input_data("InputData", inputPrim);
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
auto outputs = network.execute();
|
||||
network->set_input_data("InputData", inputPrim);
|
||||
|
||||
auto outputs = network->execute();
|
||||
auto output = outputs.at("reorder_post").get_memory();
|
||||
cldnn::mem_lock<T> outputData(output, get_test_stream());
|
||||
|
||||
@ -239,3 +240,44 @@ TEST(region_yolo_gpu_fp16, byxf_softmax) {
|
||||
region_yolo_test_params params{{ 1, 33, 52, 52 }, { 0, 1, 2 }, 4, 6, 3, data_types::f16, format::byxf, true};
|
||||
runRegionTest<FLOAT16>(params);
|
||||
}
|
||||
|
||||
#ifdef RUN_ALL_MODEL_CACHING_TESTS
|
||||
TEST(region_yolo_gpu_fp32, bfyx_cached) {
|
||||
region_yolo_test_params params{{ 1, 33, 52, 52 }, { 0, 1, 2 }, 4, 6, 3, data_types::f32, format::bfyx, false};
|
||||
runRegionTest<float>(params, true);
|
||||
}
|
||||
|
||||
TEST(region_yolo_gpu_fp32, bfyx_softmax_cached) {
|
||||
region_yolo_test_params params{{ 1, 33, 52, 52 }, { 0, 1, 2 }, 4, 6, 3, data_types::f32, format::bfyx, true};
|
||||
runRegionTest<float>(params, true);
|
||||
}
|
||||
|
||||
TEST(region_yolo_gpu_fp32, byxf_cached) {
|
||||
region_yolo_test_params params{{ 1, 33, 52, 52 }, { 0, 1, 2 }, 4, 6, 3, data_types::f32, format::byxf, false};
|
||||
runRegionTest<float>(params, true);
|
||||
}
|
||||
|
||||
TEST(region_yolo_gpu_fp32, byxf_softmax_cached) {
|
||||
region_yolo_test_params params{{ 1, 33, 52, 52 }, { 0, 1, 2 }, 4, 6, 3, data_types::f32, format::byxf, true};
|
||||
runRegionTest<float>(params, true);
|
||||
}
|
||||
|
||||
TEST(region_yolo_gpu_fp16, bfyx_cached) {
|
||||
region_yolo_test_params params{{ 1, 33, 52, 52 }, { 0, 1, 2 }, 4, 6, 3, data_types::f16, format::bfyx, false};
|
||||
runRegionTest<FLOAT16>(params, true);
|
||||
}
|
||||
|
||||
TEST(region_yolo_gpu_fp16, bfyx_softmax_cached) {
|
||||
region_yolo_test_params params{{ 1, 33, 52, 52 }, { 0, 1, 2 }, 4, 6, 3, data_types::f16, format::bfyx, true};
|
||||
runRegionTest<FLOAT16>(params, true);
|
||||
}
|
||||
|
||||
TEST(region_yolo_gpu_fp16, byxf_cached) {
|
||||
region_yolo_test_params params{{ 1, 33, 52, 52 }, { 0, 1, 2 }, 4, 6, 3, data_types::f16, format::byxf, false};
|
||||
runRegionTest<FLOAT16>(params, true);
|
||||
}
|
||||
#endif // RUN_ALL_MODEL_CACHING_TESTS
|
||||
TEST(region_yolo_gpu_fp16, byxf_softmax_cached) {
|
||||
region_yolo_test_params params{{ 1, 33, 52, 52 }, { 0, 1, 2 }, 4, 6, 3, data_types::f16, format::byxf, true};
|
||||
runRegionTest<FLOAT16>(params, true);
|
||||
}
|
||||
|
@ -14,7 +14,8 @@ using namespace cldnn;
|
||||
using namespace ::tests;
|
||||
using namespace testing;
|
||||
|
||||
TEST(removing_output_node, multiple_outputs) {
|
||||
template <typename T>
|
||||
void test_multiple_outputs(bool is_caching_test) {
|
||||
// Tests split with crop implementation
|
||||
// _ strided_slice(bfyx)
|
||||
// |
|
||||
@ -58,19 +59,19 @@ TEST(removing_output_node, multiple_outputs) {
|
||||
topology.add(data("input4", strides));
|
||||
topology.add(strided_slice("strided_slice", input_info("shuffle_channels"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, { 1 }, {}, {}, {6, 1, 1, 1}));
|
||||
|
||||
std::vector<float> input_vec = { 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f };
|
||||
std::vector<float> out_vec = { 0.0f, 3.0f, 1.0f, 4.0f, 2.0f, 5.0f };
|
||||
std::vector<T> input_vec = { 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f };
|
||||
std::vector<T> out_vec = { 0.0f, 3.0f, 1.0f, 4.0f, 2.0f, 5.0f };
|
||||
set_values(input, input_vec);
|
||||
|
||||
ExecutionConfig config;
|
||||
config.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{ "shuffle_channels", "reshape", "strided_slice" }));
|
||||
|
||||
network network(engine, topology, config);
|
||||
network.set_input_data("input", input);
|
||||
auto outputs = network.execute();
|
||||
cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
|
||||
network->set_input_data("input", input);
|
||||
auto outputs = network->execute();
|
||||
|
||||
auto output = outputs.at("reshape").get_memory();
|
||||
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
|
||||
cldnn::mem_lock<T> output_ptr(output, get_test_stream());
|
||||
|
||||
ASSERT_TRUE(output->get_layout().get_tensor() == after_reshape);
|
||||
|
||||
@ -80,7 +81,7 @@ TEST(removing_output_node, multiple_outputs) {
|
||||
// checking the output node has the same name after output node deleting due to StridedSlice optimization
|
||||
ASSERT_TRUE(outputs.find("strided_slice") != outputs.end());
|
||||
auto output2 = outputs.at("strided_slice").get_memory();
|
||||
cldnn::mem_lock<float> output_ptr2(output, get_test_stream());
|
||||
cldnn::mem_lock<T> output_ptr2(output, get_test_stream());
|
||||
|
||||
ASSERT_TRUE(output2->get_layout().get_tensor() == after_strided_slice);
|
||||
|
||||
@ -88,7 +89,12 @@ TEST(removing_output_node, multiple_outputs) {
|
||||
ASSERT_EQ(output_ptr2[i], out_vec[i]);
|
||||
}
|
||||
|
||||
TEST(removing_output_node, output_node_optimization) {
|
||||
TEST(removing_output_node, multiple_outputs) {
|
||||
test_multiple_outputs<float>(false);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void test_output_node_optimization(bool is_caching_test) {
|
||||
// Filter : 2x3
|
||||
// Stride : 2x1
|
||||
// Input : 4x5
|
||||
@ -115,7 +121,7 @@ TEST(removing_output_node, output_node_optimization) {
|
||||
|
||||
set_values(input, { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 2.0f, 2.0f, 3.0f, 4.0f, 6.0f, 3.0f, 3.0f, 3.0f, 5.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f });
|
||||
set_values(weights, { 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f });
|
||||
VVF<float> output_vec = {
|
||||
VVF<T> output_vec = {
|
||||
{ 20.0f, 27.0f, 38.0f },
|
||||
{ 17.0f, 19.0f, 19.0f } };
|
||||
|
||||
@ -125,17 +131,17 @@ TEST(removing_output_node, output_node_optimization) {
|
||||
topology.add(convolution("conv", input_info("input"), { "weights" }, { 2, 1 }));
|
||||
topology.add(activation("relu", input_info("conv"), activation_func::relu));
|
||||
|
||||
network network(engine, topology);
|
||||
network.set_input_data("input", input);
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
network->set_input_data("input", input);
|
||||
|
||||
// checking the output node has the same name after output node deleting due to ReLU optimization
|
||||
auto outputs = network.execute();
|
||||
auto outputs = network->execute();
|
||||
ASSERT_EQ(outputs.size(), size_t(1));
|
||||
ASSERT_EQ(outputs.begin()->first, "relu");
|
||||
|
||||
auto output_memory = outputs.at("relu").get_memory();
|
||||
auto output_layout = output_memory->get_layout();
|
||||
cldnn::mem_lock<float> output_ptr(output_memory, get_test_stream());
|
||||
cldnn::mem_lock<T> output_ptr(output_memory, get_test_stream());
|
||||
|
||||
int y_size = output_layout.spatial(1);
|
||||
int x_size = output_layout.spatial(0);
|
||||
@ -152,3 +158,16 @@ TEST(removing_output_node, output_node_optimization) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(removing_output_node, output_node_optimization) {
|
||||
test_output_node_optimization<float>(false);
|
||||
}
|
||||
|
||||
#ifdef RUN_ALL_MODEL_CACHING_TESTS
|
||||
TEST(removing_output_node, multiple_outputs_cached) {
|
||||
test_multiple_outputs<float>(true);
|
||||
}
|
||||
#endif
|
||||
TEST(removing_output_node, output_node_optimization_cached) {
|
||||
test_output_node_optimization<float>(true);
|
||||
}
|
||||
|
@ -41,7 +41,8 @@ static void compare_result(std::map<cldnn::primitive_id, cldnn::network_output>
|
||||
static void compare_bfyx2blocked_with_ref(const std::string& kernel_name,
|
||||
const data_types input_data_type, const data_types output_data_type,
|
||||
cldnn::format input_format, cldnn::format output_format,
|
||||
int32_t b_in, int32_t f_in, int32_t x_in, int32_t y_in, int32_t z_in = 0, int32_t w_in = 0) {
|
||||
int32_t b_in, int32_t f_in, int32_t x_in, int32_t y_in, int32_t z_in, int32_t w_in,
|
||||
bool is_caching_test) {
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
tensor ts;
|
||||
@ -87,10 +88,11 @@ static void compare_bfyx2blocked_with_ref(const std::string& kernel_name,
|
||||
ov::intel_gpu::ImplementationDesc reorder_ref = { output_format, "reorder_data" };
|
||||
config_ref.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"reorder", reorder_ref} }));
|
||||
|
||||
network network_ref(engine, topology, config_ref);
|
||||
network_ref.set_input_data("input", input);
|
||||
cldnn::network::ptr network_ref = get_network(engine, topology, config_ref, get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
auto outputs_ref = network_ref.execute();
|
||||
network_ref->set_input_data("input", input);
|
||||
|
||||
auto outputs_ref = network_ref->execute();
|
||||
cldnn::event::ptr e1 = outputs_ref.at("reorder").get_event();
|
||||
e1->wait();
|
||||
|
||||
@ -99,10 +101,11 @@ static void compare_bfyx2blocked_with_ref(const std::string& kernel_name,
|
||||
ov::intel_gpu::ImplementationDesc reorder_optimized = { output_format, kernel_name };
|
||||
config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"reorder", reorder_optimized} }));
|
||||
|
||||
network network(engine, topology, config);
|
||||
network.set_input_data("input", input);
|
||||
cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
auto outputs = network.execute();
|
||||
network->set_input_data("input", input);
|
||||
|
||||
auto outputs = network->execute();
|
||||
cldnn::event::ptr e2 = outputs.at("reorder").get_event();
|
||||
e2->wait();
|
||||
|
||||
@ -123,124 +126,124 @@ static void compare_bfyx2blocked_with_ref(const std::string& kernel_name,
|
||||
|
||||
TEST(reorder_gpu_optimization, compare_with_ref__b_fs_yx_fsv32_to_bfyx_f32) {
|
||||
// b_fs_yx_fsv32 -> bfyx
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfyx, 3, 64 + 5, 16 + 11, 3);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfyx, 3, 96 - 12, 16 + 4, 3);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfyx, 3, 64 + 5, 16 + 11, 3, 0, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfyx, 3, 96 - 12, 16 + 4, 3, 0, 0, false);
|
||||
// b_fs_zyx_fsv32 -> bfzyx
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfzyx, 3, 64 + 9, 16 - 1, 2, 8);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfzyx, 2, 64 + 30, 16 + 1, 3, 4);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfzyx, 3, 64 + 9, 16 - 1, 2, 8, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfzyx, 2, 64 + 30, 16 + 1, 3, 4, 0, false);
|
||||
// incremental dims
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfzyx, 2, 64 + 4, 24 - 1, 3);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfwzyx, 2, 64 + 2, 32 - 3, 4);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_zyx_fsv32, format::bfwzyx, 1, 96 + 10, 32 - 3, 4, 3);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfzyx, 2, 64 + 4, 24 - 1, 3, 0, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfwzyx, 2, 64 + 2, 32 - 3, 4, 0, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_zyx_fsv32, format::bfwzyx, 1, 96 + 10, 32 - 3, 4, 3, 0, false);
|
||||
}
|
||||
|
||||
TEST(reorder_gpu_optimization, compare_with_ref__b_fs_yx_fsv32_to_bfyx_different_datatype) {
|
||||
// f32 -> other types
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::u8, format::b_fs_yx_fsv32, format::bfyx, 2, 64, 8 + 7, 2);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::i64, format::b_fs_yx_fsv32, format::bfyx, 2, 64, 16 + 2, 2);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f16, format::b_fs_yx_fsv32, format::bfyx, 1, 64, 16 + 1, 2);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::u8, format::b_fs_yx_fsv32, format::bfyx, 2, 64, 8 + 7, 2, 0, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::i64, format::b_fs_yx_fsv32, format::bfyx, 2, 64, 16 + 2, 2, 0, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f16, format::b_fs_yx_fsv32, format::bfyx, 1, 64, 16 + 1, 2, 0, 0, false);
|
||||
// i32 -> other types
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::i8, format::b_fs_yx_fsv32, format::bfyx, 2, 64, 8 + 7, 2);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::i64, format::b_fs_yx_fsv32, format::bfyx, 2, 64, 16 + 2, 2);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::f16, format::b_fs_yx_fsv32, format::bfyx, 1, 64, 16 + 1, 2);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::i8, format::b_fs_yx_fsv32, format::bfyx, 2, 64, 8 + 7, 2, 0, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::i64, format::b_fs_yx_fsv32, format::bfyx, 2, 64, 16 + 2, 2, 0, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::f16, format::b_fs_yx_fsv32, format::bfyx, 1, 64, 16 + 1, 2, 0, 0, false);
|
||||
}
|
||||
|
||||
TEST(reorder_gpu_optimization, compare_with_ref__b_fs_yx_fsv16_to_bfyx_f32) {
|
||||
// u-net
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx, 1, 64, 388, 388);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx, 1, 64, 388, 388, 0, 0, false);
|
||||
// b_fs_yx_fsv16 -> bfyx
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx, 3, 48 + 1, 16, 3);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx, 2, 32 - 1, 24 - 1, 3);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx, 3, 48 + 1, 16, 3, 0, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx, 2, 32 - 1, 24 - 1, 3, 0, 0, false);
|
||||
// b_fs_zyx_fsv16 -> bfzyx
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_zyx_fsv16, format::bfzyx, 5, 48 - 1, 16, 3, 8);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_zyx_fsv16, format::bfzyx, 2, 32 + 1, 24 - 1, 3, 17);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_zyx_fsv16, format::bfzyx, 5, 48 - 1, 16, 3, 8, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_zyx_fsv16, format::bfzyx, 2, 32 + 1, 24 - 1, 3, 17, 0, false);
|
||||
// incremental dims
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfzyx, 3, 32 - 1, 24 - 1, 3);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfwzyx, 4, 16 + 1, 32 - 3, 4);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_zyx_fsv16, format::bfwzyx, 3, 16 + 2, 32 - 3, 4, 9);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfzyx, 3, 32 - 1, 24 - 1, 3, 0, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfwzyx, 4, 16 + 1, 32 - 3, 4, 0, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_zyx_fsv16, format::bfwzyx, 3, 16 + 2, 32 - 3, 4, 9, 0, false);
|
||||
}
|
||||
|
||||
TEST(reorder_gpu_optimization, compare_with_ref__b_fs_yx_fsv16_to_bfyx_different_datatype) {
|
||||
// f32 -> other types
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::u8, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::i8, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::i32, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::i64, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f16, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::u8, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::i8, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::i32, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::i64, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f16, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, false);
|
||||
// i32 -> other types
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::u8, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::i8, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::i64, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::f16, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::u8, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::i8, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::i64, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::f16, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, false);
|
||||
}
|
||||
|
||||
TEST(reorder_gpu_optimization, compare_with_ref__bfyx_to_blocked_f32) {
|
||||
// bfyx_to_b_fs_yx_fsv4
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv4, 4, 32, 16, 4);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv4, 3, 32 + 2, 32 + 3, 4);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv4, 4, 32, 16, 4, 0, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv4, 3, 32 + 2, 32 + 3, 4, 0, 0, false);
|
||||
// bfyx_to_b_fs_yx_fsv16
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv16, 2, 48, 8, 4);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv16, 3, 32 + 4, 16 + 7, 2);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv16, 2, 48, 8, 4, 0, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv16, 3, 32 + 4, 16 + 7, 2, 0, 0, false);
|
||||
// bfyx to b_fs_yx_fsv32
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv32, 2, 64, 64, 4);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv32, 4, 32 + 6, 96 - 4, 2);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv32, 2, 64, 64, 4, 0, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv32, 4, 32 + 6, 96 - 4, 2, 0, 0, false);
|
||||
// bfyx to fs_b_yx_fsv32
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::fs_b_yx_fsv32, 2, 64, 8, 4);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::fs_b_yx_fsv32, 3, 64 + 5, 8 + 7, 2);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::fs_b_yx_fsv32, 2, 64, 8, 4, 0, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::fs_b_yx_fsv32, 3, 64 + 5, 8 + 7, 2, 0, 0, false);
|
||||
// bfzyx to b_fs_zyx_fsv16
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::b_fs_zyx_fsv16, 2, 48, 8, 4, 4);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::b_fs_zyx_fsv16, 3, 32 + 5, 16 + 7, 2, 2);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::b_fs_zyx_fsv16, 2, 48, 8, 4, 4, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::b_fs_zyx_fsv16, 3, 32 + 5, 16 + 7, 2, 2, 0, false);
|
||||
// bfzyx to b_fs_zyx_fsv32
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::b_fs_zyx_fsv32, 2, 64, 8, 4, 4);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::b_fs_zyx_fsv32, 3, 64 + 5, 8 + 7, 2, 2);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::b_fs_zyx_fsv32, 2, 64, 8, 4, 4, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::b_fs_zyx_fsv32, 3, 64 + 5, 8 + 7, 2, 2, 0, false);
|
||||
}
|
||||
|
||||
TEST(reorder_gpu_optimization, compare_with_ref__bfyx_to_double_blocked_f32) {
|
||||
// bfyx to double blocked format (bs_fs_yx_bsv16_fsv16)
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32, 48, 8, 4); // no
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32 + 2, 48, 16, 4); // b
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32, 48 + 5, 16, 4); // f
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32, 48, 48 + 3, 4); // x
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32 + 2, 48 + 3, 16 + 1, 4); // b-f-x
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32, 48, 8, 4, 0, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32 + 2, 48, 16, 4, 0, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32, 48 + 5, 16, 4, 0, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32, 48, 48 + 3, 4, 0, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32 + 2, 48 + 3, 16 + 1, 4, 0, 0, false);
|
||||
// bfzyx to double blocked format (bs_fs_zyx_bsv16_fsv16)
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32, 48, 8, 4, 16); // no
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32 + 2, 48, 16, 4, 2); // b
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32, 48 + 5, 16, 4, 3); // f
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32, 48, 48 + 3, 4, 4); // x
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32 + 2, 48 + 3, 16 + 1, 4, 2); // b-f-x
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32, 48, 8, 4, 16, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32 + 2, 48, 16, 4, 2, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32, 48 + 5, 16, 4, 3, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32, 48, 48 + 3, 4, 4, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32 + 2, 48 + 3, 16 + 1, 4, 2, 0, false);
|
||||
}
|
||||
|
||||
TEST(reorder_gpu_optimization, compare_with_ref__bfyx_to_double_blocked_f32_bsv16_fsv32) {
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 3, 16, 4, 5, 7);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 1, 1, 1, 1, 1);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 32 + 2, 48, 16, 4, 2);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 32 + 1, 1, 1, 1, 1);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 32, 48 + 5, 16, 4, 3);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 32, 48, 48 + 3, 4, 4);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 32 + 2, 48 + 3, 16 + 1, 4, 2);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 3, 16, 4, 5, 7, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 1, 1, 1, 1, 1, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 32 + 2, 48, 16, 4, 2, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 32 + 1, 1, 1, 1, 1, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 32, 48 + 5, 16, 4, 3, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 32, 48, 48 + 3, 4, 4, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 32 + 2, 48 + 3, 16 + 1, 4, 2, 0, false);
|
||||
}
|
||||
|
||||
TEST(reorder_gpu_optimization, compare_with_ref__bfyx_to_double_blocked_f32_bsv32_fsv16) {
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv16, 1, 1, 1, 1, 1);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv16, 32 + 2, 48, 16, 4, 2);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv16, 32, 48 + 5, 16, 4, 3);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv16, 32, 48, 48 + 3, 4, 4);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv16, 32 + 2, 48 + 3, 16 + 1, 4, 2);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv16, 1, 1, 1, 1, 1, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv16, 32 + 2, 48, 16, 4, 2, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv16, 32, 48 + 5, 16, 4, 3, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv16, 32, 48, 48 + 3, 4, 4, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv16, 32 + 2, 48 + 3, 16 + 1, 4, 2, 0, false);
|
||||
}
|
||||
|
||||
TEST(reorder_gpu_optimization, compare_with_ref__bfyx_to_double_blocked_f32_bsv32_fsv32) {
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv32, 1, 1, 1, 1, 1);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv32, 32 + 2, 48, 16, 4, 2);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv32, 32, 48 + 5, 16, 4, 3);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv32, 32, 48, 48 + 3, 4, 4);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv32, 32 + 2, 48 + 3, 16 + 1, 4, 2);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv32, 1, 1, 1, 1, 1, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv32, 32 + 2, 48, 16, 4, 2, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv32, 32, 48 + 5, 16, 4, 3, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv32, 32, 48, 48 + 3, 4, 4, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv32, 32 + 2, 48 + 3, 16 + 1, 4, 2, 0, false);
|
||||
}
|
||||
|
||||
TEST(reorder_gpu_optimization, compare_with_ref__bfyx_to_blocked_format_different_datatype) {
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f16, format::bfyx, format::b_fs_yx_fsv16, 3, 32 + 4, 16 + 7, 2);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::i8, data_types::f32, format::bfyx, format::b_fs_yx_fsv16, 3, 32 + 4, 16 + 7, 2);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::i64, data_types::f32, format::bfyx, format::b_fs_yx_fsv16, 3, 32 + 4, 16 + 7, 2);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f16, format::bfyx, format::b_fs_yx_fsv16, 3, 32 + 4, 16 + 7, 2, 0, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::i8, data_types::f32, format::bfyx, format::b_fs_yx_fsv16, 3, 32 + 4, 16 + 7, 2, 0, 0, false);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::i64, data_types::f32, format::bfyx, format::b_fs_yx_fsv16, 3, 32 + 4, 16 + 7, 2, 0, 0, false);
|
||||
}
|
||||
|
||||
TEST(reorder_gpu_optimization, bfyx_to_fsv16_without_f_remainder) {
|
||||
@ -2575,12 +2578,14 @@ public:
|
||||
static const int max_random = 200;
|
||||
std::vector<primitive_id> executed_prims;
|
||||
|
||||
void execute(T& p) {
|
||||
void execute(T& p, bool is_caching_test) {
|
||||
auto input_prim = this->get_mem(get_input_layout(p));
|
||||
network network_test(this->engine, this->topology_test, this->config);
|
||||
network_test.set_input_data("input", input_prim);
|
||||
|
||||
executed_prims = network_test.get_executed_primitive_ids();
|
||||
cldnn::network::ptr network_test = get_network(this->engine, this->topology_test, this->config, get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network_test->set_input_data("input", input_prim);
|
||||
|
||||
executed_prims = network_test->get_executed_primitive_ids();
|
||||
}
|
||||
|
||||
bool check_optimized_out(T& p, primitive_id target_id) {
|
||||
@ -2659,7 +2664,7 @@ TEST_P(testing_removal_reorder, removal_reorder_1d_along_f) {
|
||||
reorder("reorder_bfyx", input_info("add_bias1"), p.default_format, data_types::f16)
|
||||
);
|
||||
|
||||
execute(p);
|
||||
execute(p, false);
|
||||
|
||||
ASSERT_EQ(check_optimized_out(p, "reorder_bias1"), true);
|
||||
}
|
||||
@ -2682,7 +2687,7 @@ TEST_P(testing_removal_reorder, only_remove_reorder_shallow_depth_input) {
|
||||
reorder("reorder_output", input_info("resample"), p.default_format, data_types::f32)
|
||||
);
|
||||
|
||||
execute(p);
|
||||
execute(p, false);
|
||||
|
||||
ASSERT_EQ(check_optimized_out(p, "reorder_conv"), false);
|
||||
}
|
||||
@ -2713,7 +2718,7 @@ TEST_P(testing_removal_reorder, removal_no_padded_reorder) {
|
||||
|
||||
setup_with_build_ops(config);
|
||||
|
||||
execute(p);
|
||||
execute(p, false);
|
||||
|
||||
ASSERT_EQ(check_optimized_out(p, "reorder_conv"), true);
|
||||
}
|
||||
@ -2743,7 +2748,7 @@ TEST_P(testing_removal_reorder, removal_padded_reorder) {
|
||||
|
||||
setup_with_build_ops(config);
|
||||
|
||||
execute(p);
|
||||
execute(p, false);
|
||||
|
||||
ASSERT_EQ(check_optimized_out(p, "reorder_conv"), false);
|
||||
}
|
||||
@ -2810,3 +2815,166 @@ TEST(reorder_onednn_gpu, basic_convert_int8) {
|
||||
}
|
||||
}
|
||||
#endif // ENABLE_ONEDNN_FOR_GPU
|
||||
|
||||
#ifdef RUN_ALL_MODEL_CACHING_TESTS
|
||||
TEST(reorder_gpu_optimization, compare_with_ref__b_fs_yx_fsv32_to_bfyx_f32_cached) {
|
||||
// b_fs_yx_fsv32 -> bfyx
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfyx, 3, 64 + 5, 16 + 11, 3, 0, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfyx, 3, 96 - 12, 16 + 4, 3, 0, 0, true);
|
||||
// b_fs_zyx_fsv32 -> bfzyx
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfzyx, 3, 64 + 9, 16 - 1, 2, 8, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfzyx, 2, 64 + 30, 16 + 1, 3, 4, 0, true);
|
||||
// incremental dims
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfzyx, 2, 64 + 4, 24 - 1, 3, 0, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv32, format::bfwzyx, 2, 64 + 2, 32 - 3, 4, 0, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_zyx_fsv32, format::bfwzyx, 1, 96 + 10, 32 - 3, 4, 3, 0, true);
|
||||
}
|
||||
|
||||
TEST(reorder_gpu_optimization, compare_with_ref__b_fs_yx_fsv32_to_bfyx_different_datatype_cached) {
|
||||
// f32 -> other types
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::u8, format::b_fs_yx_fsv32, format::bfyx, 2, 64, 8 + 7, 2, 0, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::i64, format::b_fs_yx_fsv32, format::bfyx, 2, 64, 16 + 2, 2, 0, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f16, format::b_fs_yx_fsv32, format::bfyx, 1, 64, 16 + 1, 2, 0, 0, true);
|
||||
// i32 -> other types
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::i8, format::b_fs_yx_fsv32, format::bfyx, 2, 64, 8 + 7, 2, 0, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::i64, format::b_fs_yx_fsv32, format::bfyx, 2, 64, 16 + 2, 2, 0, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::f16, format::b_fs_yx_fsv32, format::bfyx, 1, 64, 16 + 1, 2, 0, 0, true);
|
||||
}
|
||||
|
||||
TEST(reorder_gpu_optimization, compare_with_ref__b_fs_yx_fsv16_to_bfyx_f32_cached) {
|
||||
// u-net
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx, 1, 64, 388, 388, 0, 0, true);
|
||||
// b_fs_yx_fsv16 -> bfyx
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx, 3, 48 + 1, 16, 3, 0, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx, 2, 32 - 1, 24 - 1, 3, 0, 0, true);
|
||||
// b_fs_zyx_fsv16 -> bfzyx
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_zyx_fsv16, format::bfzyx, 5, 48 - 1, 16, 3, 8, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_zyx_fsv16, format::bfzyx, 2, 32 + 1, 24 - 1, 3, 17, 0, true);
|
||||
// incremental dims
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfzyx, 3, 32 - 1, 24 - 1, 3, 0, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_yx_fsv16, format::bfwzyx, 4, 16 + 1, 32 - 3, 4, 0, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f32, format::b_fs_zyx_fsv16, format::bfwzyx, 3, 16 + 2, 32 - 3, 4, 9, 0, true);
|
||||
}
|
||||
|
||||
TEST(reorder_gpu_optimization, compare_with_ref__b_fs_yx_fsv16_to_bfyx_different_datatype_cached) {
|
||||
// f32 -> other types
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::u8, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::i8, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::i32, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::i64, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::f32, data_types::f16, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, true);
|
||||
// i32 -> other types
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::u8, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::i8, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::i64, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::f16, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_b_fs_yx_fsv16_fsv32_to_bfyx", data_types::i32, data_types::f32, format::b_fs_yx_fsv16, format::bfyx, 2, 32, 16 + 7, 2, 0, 0, true);
|
||||
}
|
||||
|
||||
TEST(reorder_gpu_optimization, compare_with_ref__bfyx_to_blocked_f32_cached) {
|
||||
// bfyx_to_b_fs_yx_fsv4
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv4, 4, 32, 16, 4, 0, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv4, 3, 32 + 2, 32 + 3, 4, 0, 0, true);
|
||||
// bfyx_to_b_fs_yx_fsv16
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv16, 2, 48, 8, 4, 0, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv16, 3, 32 + 4, 16 + 7, 2, 0, 0, true);
|
||||
// bfyx to b_fs_yx_fsv32
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv32, 2, 64, 64, 4, 0, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::b_fs_yx_fsv32, 4, 32 + 6, 96 - 4, 2, 0, 0, true);
|
||||
// bfyx to fs_b_yx_fsv32
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::fs_b_yx_fsv32, 2, 64, 8, 4, 0, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::fs_b_yx_fsv32, 3, 64 + 5, 8 + 7, 2, 0, 0, true);
|
||||
// bfzyx to b_fs_zyx_fsv16
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::b_fs_zyx_fsv16, 2, 48, 8, 4, 4, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::b_fs_zyx_fsv16, 3, 32 + 5, 16 + 7, 2, 2, 0, true);
|
||||
// bfzyx to b_fs_zyx_fsv32
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::b_fs_zyx_fsv32, 2, 64, 8, 4, 4, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::b_fs_zyx_fsv32, 3, 64 + 5, 8 + 7, 2, 2, 0, true);
|
||||
}
|
||||
|
||||
TEST(reorder_gpu_optimization, compare_with_ref__bfyx_to_double_blocked_f32_cached) {
|
||||
// bfyx to double blocked format (bs_fs_yx_bsv16_fsv16)
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32, 48, 8, 4, 0, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32 + 2, 48, 16, 4, 0, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32, 48 + 5, 16, 4, 0, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32, 48, 48 + 3, 4, 0, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfyx, format::bs_fs_yx_bsv16_fsv16, 32 + 2, 48 + 3, 16 + 1, 4, 0, 0, true);
|
||||
// bfzyx to double blocked format (bs_fs_zyx_bsv16_fsv16)
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32, 48, 8, 4, 16, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32 + 2, 48, 16, 4, 2, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32, 48 + 5, 16, 4, 3, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32, 48, 48 + 3, 4, 4, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv16, 32 + 2, 48 + 3, 16 + 1, 4, 2, 0, true);
|
||||
}
|
||||
|
||||
TEST(reorder_gpu_optimization, compare_with_ref__bfyx_to_double_blocked_f32_bsv16_fsv32_cached) {
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 3, 16, 4, 5, 7, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 1, 1, 1, 1, 1, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 32 + 2, 48, 16, 4, 2, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 32 + 1, 1, 1, 1, 1, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 32, 48 + 5, 16, 4, 3, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 32, 48, 48 + 3, 4, 4, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv16_fsv32, 32 + 2, 48 + 3, 16 + 1, 4, 2, 0, true);
|
||||
}
|
||||
|
||||
TEST(reorder_gpu_optimization, compare_with_ref__bfyx_to_double_blocked_f32_bsv32_fsv16_cached) {
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv16, 1, 1, 1, 1, 1, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv16, 32 + 2, 48, 16, 4, 2, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv16, 32, 48 + 5, 16, 4, 3, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv16, 32, 48, 48 + 3, 4, 4, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv16, 32 + 2, 48 + 3, 16 + 1, 4, 2, 0, true);
|
||||
}
|
||||
|
||||
TEST(reorder_gpu_optimization, compare_with_ref__bfyx_to_double_blocked_f32_bsv32_fsv32_cached) {
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv32, 1, 1, 1, 1, 1, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv32, 32 + 2, 48, 16, 4, 2, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv32, 32, 48 + 5, 16, 4, 3, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv32, 32, 48, 48 + 3, 4, 4, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f32, format::bfzyx, format::bs_fs_zyx_bsv32_fsv32, 32 + 2, 48 + 3, 16 + 1, 4, 2, 0, true);
|
||||
}
|
||||
|
||||
TEST(reorder_gpu_optimization, compare_with_ref__bfyx_to_blocked_format_different_datatype_cached) {
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::f32, data_types::f16, format::bfyx, format::b_fs_yx_fsv16, 3, 32 + 4, 16 + 7, 2, 0, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::i8, data_types::f32, format::bfyx, format::b_fs_yx_fsv16, 3, 32 + 4, 16 + 7, 2, 0, 0, true);
|
||||
compare_bfyx2blocked_with_ref("reorder_data_bfyx_to_blocked_format", data_types::i64, data_types::f32, format::bfyx, format::b_fs_yx_fsv16, 3, 32 + 4, 16 + 7, 2, 0, 0, true);
|
||||
}
|
||||
|
||||
TEST_P(testing_removal_reorder, removal_reorder_1d_along_f_cached) {
|
||||
auto p = GetParam();
|
||||
create_topologies(input_layout("input", get_input_layout(p)),
|
||||
reorder("reorder_input", input_info("input"), format::b_fs_yx_fsv16, data_types::f16),
|
||||
data("weights", get_mem(get_weights_layout(p))),
|
||||
data("bias1", get_mem(get_bias_layout(p))),
|
||||
reorder("reorder_bias1", input_info("bias1"), format::b_fs_yx_fsv16, data_types::f16),
|
||||
convolution("conv_prim", input_info("reorder_input"), {"weights"}, std::vector<primitive_id>{}, 1, p.stride, p.pad),
|
||||
reorder("reorder_conv", input_info("conv_prim"), format::b_fs_yx_fsv16, data_types::f16),
|
||||
eltwise("add_bias1", { input_info("reorder_conv"), input_info("reorder_bias1") }, eltwise_mode::sum),
|
||||
reorder("reorder_bfyx", input_info("add_bias1"), p.default_format, data_types::f16)
|
||||
);
|
||||
|
||||
execute(p, true);
|
||||
|
||||
ASSERT_EQ(check_optimized_out(p, "reorder_bias1"), true);
|
||||
}
|
||||
#endif
|
||||
TEST_P(testing_removal_reorder, only_remove_reorder_shallow_depth_input_cached) {
|
||||
auto p = GetParam();
|
||||
layout reorder_layout(data_types::u8, format::b_fs_yx_fsv32, p.in_shape, padding({0, }, 0));
|
||||
|
||||
create_topologies(input_layout("input", get_input_layout(p)),
|
||||
data("weights", get_mem(get_weights_layout(p))),
|
||||
data("bias", get_mem(get_bias_layout(p))),
|
||||
data("weights_sec", get_mem(get_weights_layout(p))),
|
||||
reorder("reorder_fp32", input_info("input"), format::bfyx, data_types::f32),
|
||||
convolution("conv_prim", input_info("reorder_fp32"), { "weights" }, { "bias" }, 1, p.stride, p.pad, {1, 1}, p.in_shape, data_types::u8, false),
|
||||
reorder("reorder_conv", input_info("conv_prim"), reorder_layout),
|
||||
convolution("conv_output", input_info("reorder_conv"), { "weights_sec" }, 1, p.stride, p.pad),
|
||||
reorder("reorder_bfyx", input_info("conv_output"), format::b_fs_yx_fsv32, data_types::f32),
|
||||
resample("resample", input_info("reorder_bfyx"), p.out_shape, 1),
|
||||
reorder("reorder_output", input_info("resample"), p.default_format, data_types::f32)
|
||||
);
|
||||
|
||||
execute(p, true);
|
||||
|
||||
ASSERT_EQ(check_optimized_out(p, "reorder_conv"), false);
|
||||
}
|
||||
|
@ -290,21 +290,21 @@ template<typename T>
|
||||
struct reorg_yolo_test
|
||||
: public ::testing::TestWithParam<ReorgYoloParamsWithLayout<T> > {
|
||||
public:
|
||||
void test() {
|
||||
void test(bool is_caching_test) {
|
||||
ReorgYoloParams<T> params;
|
||||
format::type target_format;
|
||||
bool should_fail;
|
||||
std::tie(params, target_format, should_fail) = this->GetParam();
|
||||
|
||||
if (should_fail) {
|
||||
ASSERT_ANY_THROW(run_test(params, target_format));
|
||||
ASSERT_ANY_THROW(run_test(params, target_format, is_caching_test));
|
||||
} else {
|
||||
ASSERT_NO_FATAL_FAILURE(run_test(params, target_format));
|
||||
ASSERT_NO_FATAL_FAILURE(run_test(params, target_format, is_caching_test));
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
void run_test(const ReorgYoloParams<T>& params, const format::type target_format) {
|
||||
void run_test(const ReorgYoloParams<T>& params, const format::type target_format, bool is_caching_test) {
|
||||
const auto data_type = type_to_data_type<T>::value;
|
||||
const format::type plain_format = format::bfyx;
|
||||
|
||||
@ -320,9 +320,9 @@ private:
|
||||
topology.add(reorg_yolo("reorg_yolo", input_info("input_reordered"), params.stride));
|
||||
topology.add(reorder("reorg_yolo_reordered", input_info("reorg_yolo"), plain_format, data_type));
|
||||
|
||||
network network(engine, topology);
|
||||
network.set_input_data("input", input);
|
||||
const auto result = network.execute();
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
network->set_input_data("input", input);
|
||||
const auto result = network->execute();
|
||||
|
||||
auto out_mem = result.at("reorg_yolo_reordered").get_memory();
|
||||
cldnn::mem_lock<T> out_ptr(out_mem, get_test_stream());
|
||||
@ -339,11 +339,11 @@ using test_f32 = reorg_yolo_test<float>;
|
||||
using test_f16 = reorg_yolo_test<half_t>;
|
||||
|
||||
TEST_P(test_f32, basic) {
|
||||
test();
|
||||
test(false);
|
||||
}
|
||||
|
||||
TEST_P(test_f16, basic) {
|
||||
test();
|
||||
test(false);
|
||||
}
|
||||
|
||||
|
||||
@ -371,3 +371,12 @@ INSTANTIATE_TEST_SUITE_P(reorg_yolo_invalid_input,
|
||||
::testing::Values(format::bfyx),
|
||||
::testing::Values(true)),
|
||||
PrintToStringParamName());
|
||||
|
||||
#ifdef RUN_ALL_MODEL_CACHING_TESTS
|
||||
TEST_P(test_f32, basic_cached) {
|
||||
test(true);
|
||||
}
|
||||
#endif
|
||||
TEST_P(test_f16, basic_cached) {
|
||||
test(true);
|
||||
}
|
||||
|
@ -12,7 +12,8 @@
|
||||
using namespace cldnn;
|
||||
using namespace ::tests;
|
||||
|
||||
TEST(resample_gpu, basic_in2x3x2x2_nearest) {
|
||||
template <typename T>
|
||||
void test_basic_in2x3x2x2_nearest(bool is_caching_test) {
|
||||
// Input : 2x2x3x2
|
||||
// Output : 2x2x6x4
|
||||
// Sample Type: Nearest
|
||||
@ -46,16 +47,16 @@ TEST(resample_gpu, basic_in2x3x2x2_nearest) {
|
||||
12.f, 9.f, -17.f,
|
||||
});
|
||||
|
||||
cldnn::network net{ engine, topology };
|
||||
cldnn::network::ptr net = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
net.set_input_data("input", input);
|
||||
net->set_input_data("input", input);
|
||||
|
||||
auto outputs = net.execute();
|
||||
auto outputs = net->execute();
|
||||
|
||||
auto output = outputs.at("upsampling").get_memory();
|
||||
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
|
||||
cldnn::mem_lock<T> output_ptr(output, get_test_stream());
|
||||
|
||||
float answers[96] = {
|
||||
T answers[96] = {
|
||||
1.f, 1.f, 2.f, 2.f, -10.f, -10.f,
|
||||
1.f, 1.f, 2.f, 2.f, -10.f, -10.f,
|
||||
3.f, 3.f, 4.f, 4.f, -14.f, -14.f,
|
||||
@ -86,6 +87,10 @@ TEST(resample_gpu, basic_in2x3x2x2_nearest) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(resample_gpu, basic_in2x3x2x2_nearest) {
|
||||
test_basic_in2x3x2x2_nearest<float>(false);
|
||||
}
|
||||
|
||||
TEST(resample_gpu, basic_in2x3x2x2_bilinear) {
|
||||
// Input : 1x1x2x2
|
||||
// Output : 1x1x4x4
|
||||
@ -456,7 +461,7 @@ struct resample_random_test : testing::TestWithParam<resample_random_test_params
|
||||
}
|
||||
}
|
||||
|
||||
void execute(const resample_random_test_params& params) {
|
||||
void execute(const resample_random_test_params& params, bool is_caching_test) {
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
auto in_layout = layout(params.input_type, params.in_format, params.input_size);
|
||||
@ -467,26 +472,27 @@ struct resample_random_test : testing::TestWithParam<resample_random_test_params
|
||||
topo.add(prim);
|
||||
|
||||
ExecutionConfig config(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"resample", {params.out_format, ""}} }));
|
||||
cldnn::network net(engine, topo, config);
|
||||
cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
auto in_mem = engine.allocate_memory(in_layout);
|
||||
fill_random(in_mem);
|
||||
net.set_input_data("in", in_mem);
|
||||
net->set_input_data("in", in_mem);
|
||||
|
||||
auto result = net.execute();
|
||||
auto result = net->execute();
|
||||
auto output = result.at("resample").get_memory();
|
||||
|
||||
std::string kernel = "";
|
||||
for (auto& info : net.get_primitives_info()) {
|
||||
if (info.original_id == "resample")
|
||||
kernel = info.kernel_id;
|
||||
if (!is_caching_test) {
|
||||
for (auto& info : net->get_primitives_info()) {
|
||||
if (info.original_id == "resample")
|
||||
kernel = info.kernel_id;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(resample_random_test, random) {
|
||||
execute(GetParam());
|
||||
execute(GetParam(), false);
|
||||
}
|
||||
|
||||
struct resample_random_test_param_generator : std::vector<resample_random_test_params> {
|
||||
@ -611,7 +617,7 @@ struct caffe_resample_random_test : testing::TestWithParam<caffe_resample_random
|
||||
}
|
||||
}
|
||||
|
||||
void execute_compare(const caffe_resample_random_test_params& params, bool check_result) {
|
||||
void execute_compare(const caffe_resample_random_test_params& params, bool check_result, bool is_caching_test) {
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
auto in_layout = layout(params.input_type, params.in_format, params.input_size);
|
||||
@ -647,12 +653,12 @@ struct caffe_resample_random_test : testing::TestWithParam<caffe_resample_random
|
||||
config_opt.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{"resample_opt"}));
|
||||
config_opt.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"resample_opt", {params.in_format, "resample_opt"}} }));
|
||||
|
||||
cldnn::network net_opt(engine, topo_opt, config_opt);
|
||||
cldnn::network::ptr net_opt = get_network(engine, topo_opt, config_opt, get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
// Use in_mem from ref network
|
||||
net_opt.set_input_data("in", in_mem);
|
||||
net_opt->set_input_data("in", in_mem);
|
||||
|
||||
auto result_opt = net_opt.execute();
|
||||
auto result_opt = net_opt->execute();
|
||||
auto output_opt = result_opt.at("resample_opt").get_memory();
|
||||
|
||||
if (check_result == true) {
|
||||
@ -695,7 +701,7 @@ struct caffe_resample_random_test_param_generator : std::vector<caffe_resample_r
|
||||
|
||||
TEST_P(caffe_resample_random_test, random) {
|
||||
auto param = GetParam();
|
||||
execute_compare(param, true);
|
||||
execute_compare(param, true, false);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(caffe_smoke_caffe_fsv16,
|
||||
@ -2004,7 +2010,8 @@ struct resample_opt_random_test : testing::TestWithParam<resample_opt_random_tes
|
||||
}
|
||||
}
|
||||
|
||||
void execute_compare(const resample_opt_random_test_params& params, bool check_result, const std::string& kernel = "resample_opt") {
|
||||
void execute_compare(const resample_opt_random_test_params& params, bool check_result,
|
||||
bool is_caching_test, const std::string& kernel = "resample_opt") {
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
const format origin_format = format::dimension(params.in_format) == 4 ? format::bfyx : format::bfzyx;
|
||||
@ -2042,13 +2049,13 @@ struct resample_opt_random_test : testing::TestWithParam<resample_opt_random_tes
|
||||
ExecutionConfig config_opt;
|
||||
config_opt.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{"resample_opt", "res_to_bfyx"}));
|
||||
|
||||
network net_opt(engine, topo_opt, config_opt);
|
||||
cldnn::network::ptr net_opt = get_network(engine, topo_opt, config_opt, get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
// Use in_mem from ref network
|
||||
net_opt.set_input_data("in", in_mem);
|
||||
net_opt->set_input_data("in", in_mem);
|
||||
|
||||
// first execution of opt
|
||||
auto result_opt = net_opt.execute();
|
||||
auto result_opt = net_opt->execute();
|
||||
auto output_opt = result_opt.at("res_to_bfyx").get_memory();
|
||||
if (!format::is_simple_data_format(params.in_format)) {
|
||||
ASSERT_FALSE(format::is_simple_data_format(result_opt.at("resample_opt").get_memory()->get_layout().format));
|
||||
@ -2176,7 +2183,7 @@ struct resample_opt_random_test_ext : resample_opt_random_test
|
||||
|
||||
TEST_P(resample_opt_random_test, random) {
|
||||
auto param = GetParam();
|
||||
execute_compare(param, true);
|
||||
execute_compare(param, true, false);
|
||||
}
|
||||
|
||||
TEST_P(resample_opt_random_test_ext, DISABLED_random) {
|
||||
@ -2329,3 +2336,22 @@ INSTANTIATE_TEST_SUITE_P(resample_opt_smoke_linear_onnx_5d_3axes_simple,
|
||||
{ data_types::f16, {1, 16, 13, 13, 13}, {1, 16, 26, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_yx_fsv16, format::b_fs_yx_fsv32, {}, {}},
|
||||
}
|
||||
));
|
||||
|
||||
#ifdef RUN_ALL_MODEL_CACHING_TESTS
|
||||
TEST_P(resample_random_test, random_cached) {
|
||||
execute(GetParam(), true);
|
||||
}
|
||||
|
||||
TEST_P(caffe_resample_random_test, random_cached) {
|
||||
auto param = GetParam();
|
||||
execute_compare(param, true, true);
|
||||
}
|
||||
|
||||
TEST_P(resample_opt_random_test, random_cached) {
|
||||
auto param = GetParam();
|
||||
execute_compare(param, true, true);
|
||||
}
|
||||
#endif
|
||||
TEST(resample_gpu, basic_in2x3x2x2_nearest_cached) {
|
||||
test_basic_in2x3x2x2_nearest<float>(true);
|
||||
}
|
||||
|
@ -26,7 +26,7 @@ void verify_int(const int32_t& output_value, const int32_t& value) {
|
||||
template <class ElemType>
|
||||
void generic_reshape_test(format fmt, tensor const& input_size, tensor const& reshape_size,
|
||||
bool /* in_place */, padding const& input_padd = padding(),
|
||||
padding const& output_padd = padding()) {
|
||||
padding const& output_padd = padding(), bool is_caching_test = false) {
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
//allocate input memory
|
||||
@ -68,9 +68,9 @@ void generic_reshape_test(format fmt, tensor const& input_size, tensor const& re
|
||||
ExecutionConfig config;
|
||||
config.set_property(ov::intel_gpu::custom_outputs(std::vector<std::string>{reshape_input, "reshape"}));
|
||||
|
||||
network net(engine, tpl, config);
|
||||
net.set_input_data("input", input);
|
||||
auto outputs = net.execute();
|
||||
cldnn::network::ptr net = get_network(engine, tpl, config, get_test_stream_ptr(), is_caching_test);
|
||||
net->set_input_data("input", input);
|
||||
auto outputs = net->execute();
|
||||
|
||||
ASSERT_TRUE(outputs.size() == 2 && outputs.count("reshape") == 1 && outputs.count(reshape_input) == 1);
|
||||
auto net_input = outputs.at(reshape_input).get_memory();
|
||||
@ -411,7 +411,8 @@ TEST(reshape_gpu_f32, basic_5dim_in_place) {
|
||||
true);
|
||||
}
|
||||
|
||||
TEST(reshape_gpu_f32, multiple_users_with_reorder) {
|
||||
template <typename T>
|
||||
void test_multiple_users_with_reorder(bool is_caching_test) {
|
||||
// Tests split with crop implementation
|
||||
// _ REORDER(yxfb) --> RELU(yxfb)
|
||||
// |
|
||||
@ -452,29 +453,34 @@ TEST(reshape_gpu_f32, multiple_users_with_reorder) {
|
||||
topology.add(activation("relu1", input_info("reorder1"), activation_func::relu));
|
||||
topology.add(activation("relu2", input_info("reshape"), activation_func::relu));
|
||||
|
||||
std::vector<float> input_vec = {-1.f, 2.f, -3.f, 4.f};
|
||||
std::vector<float> out1 = {0.f, 2.f, 0.f, 4.0f};
|
||||
std::vector<float> out2 = {0.f, 2.f, 0.f, 4.0f};
|
||||
std::vector<T> input_vec = {-1.f, 2.f, -3.f, 4.f};
|
||||
std::vector<T> out1 = {0.f, 2.f, 0.f, 4.0f};
|
||||
std::vector<T> out2 = {0.f, 2.f, 0.f, 4.0f};
|
||||
set_values(input, input_vec);
|
||||
|
||||
network network(engine, topology);
|
||||
network.set_input_data("input", input);
|
||||
auto outputs = network.execute();
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
network->set_input_data("input", input);
|
||||
auto outputs = network->execute();
|
||||
|
||||
auto output = outputs.at("relu1").get_memory();
|
||||
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
|
||||
cldnn::mem_lock<T> output_ptr(output, get_test_stream());
|
||||
|
||||
for (size_t i = 0; i < out1.size(); i++)
|
||||
ASSERT_EQ(output_ptr[i], out1[i]);
|
||||
|
||||
auto output_2 = outputs.at("relu2").get_memory();
|
||||
cldnn::mem_lock<float> output_ptr_2(output_2, get_test_stream());
|
||||
cldnn::mem_lock<T> output_ptr_2(output_2, get_test_stream());
|
||||
|
||||
for (size_t i = 0; i < out2.size(); i++)
|
||||
ASSERT_EQ(output_ptr_2[i], out2[i]);
|
||||
}
|
||||
|
||||
TEST(reshape_gpu_f32, calc_output_shape) {
|
||||
TEST(reshape_gpu_f32, multiple_users_with_reorder) {
|
||||
test_multiple_users_with_reorder<float>(false);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void test_calc_output_shape(bool is_caching_test) {
|
||||
// INPUT(bfyx,2x2x1x1) -- RESHAPE(1, 1, 0, -1)
|
||||
|
||||
// Input:
|
||||
@ -495,9 +501,9 @@ TEST(reshape_gpu_f32, calc_output_shape) {
|
||||
|
||||
set_values(input, {-1.f, 2.f, -3.f, 4.f});
|
||||
|
||||
network network(engine, topology);
|
||||
network.set_input_data("input", input);
|
||||
auto outputs = network.execute();
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
network->set_input_data("input", input);
|
||||
auto outputs = network->execute();
|
||||
|
||||
ASSERT_EQ(outputs.size(), size_t(1));
|
||||
ASSERT_EQ(outputs.begin()->first, "reshape");
|
||||
@ -509,15 +515,20 @@ TEST(reshape_gpu_f32, calc_output_shape) {
|
||||
|
||||
ASSERT_TRUE(output->get_layout().get_tensor() == tensor(1, 1, 1, 4));
|
||||
|
||||
float answers[4] = {-1.f, 2.f, -3.f, 4.f};
|
||||
T answers[4] = {-1.f, 2.f, -3.f, 4.f};
|
||||
|
||||
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
|
||||
cldnn::mem_lock<T> output_ptr(output, get_test_stream());
|
||||
for (int i = 0; i < 4; i++) {
|
||||
ASSERT_TRUE(are_equal(answers[i], output_ptr[i]));
|
||||
}
|
||||
}
|
||||
|
||||
TEST(reshape_gpu_f32, basic_bfwzyx) {
|
||||
TEST(reshape_gpu_f32, calc_output_shape) {
|
||||
test_calc_output_shape<float>(false);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void test_basic_bfwzyx(bool is_caching_test) {
|
||||
// input: bfwzyx, (3, 3, 2, 2, 1, 1)
|
||||
// reshape: (1, 1, 2, 2, 3, 3), pad (0, 0, 0, 0, 0, 1)
|
||||
|
||||
@ -562,9 +573,9 @@ TEST(reshape_gpu_f32, basic_bfwzyx) {
|
||||
|
||||
set_values(input, input_data);
|
||||
|
||||
network network(engine, topology);
|
||||
network.set_input_data("input", input);
|
||||
auto outputs = network.execute();
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
network->set_input_data("input", input);
|
||||
auto outputs = network->execute();
|
||||
|
||||
ASSERT_EQ(outputs.size(), size_t(1));
|
||||
ASSERT_EQ(outputs.begin()->first, "reshape");
|
||||
@ -582,7 +593,12 @@ TEST(reshape_gpu_f32, basic_bfwzyx) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(reshape_gpu_f32, shrink_chain_partial) {
|
||||
TEST(reshape_gpu_f32, basic_bfwzyx) {
|
||||
test_basic_bfwzyx<float>(false);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void test_shrink_chain_partial(bool is_caching_test) {
|
||||
auto& engine = get_test_engine();
|
||||
auto batch_num = 2;
|
||||
auto feature_num = 2;
|
||||
@ -592,8 +608,8 @@ TEST(reshape_gpu_f32, shrink_chain_partial) {
|
||||
auto scale_in = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }});
|
||||
auto shift_in = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }});
|
||||
|
||||
std::vector<float> scale_vals = {0.f, 1.f, 2.f, 3.f};
|
||||
std::vector<float> scale_shifts = {5.f, 10.f, 15.f, 20.0f};
|
||||
std::vector<T> scale_vals = {0.f, 1.f, 2.f, 3.f};
|
||||
std::vector<T> scale_shifts = {5.f, 10.f, 15.f, 20.0f};
|
||||
set_values(scale_in, scale_vals);
|
||||
set_values(shift_in, scale_shifts);
|
||||
|
||||
@ -609,8 +625,53 @@ TEST(reshape_gpu_f32, shrink_chain_partial) {
|
||||
topology.add(eltwise("shift", { input_info("scale"), input_info("shift_in") }, eltwise_mode::sum));
|
||||
topology.add(reorder("out_reorder", input_info("shift"), format::yxfb, data_types::f32));
|
||||
|
||||
std::vector<float> input_vec = {-1.f, 2.f, -3.f, 4.f};
|
||||
std::vector<float> out = {5.f, 12.f, 15.f, 32.0f};
|
||||
std::vector<T> input_vec = {-1.f, 2.f, -3.f, 4.f};
|
||||
std::vector<T> out = {5.f, 12.f, 15.f, 32.0f};
|
||||
set_values(input, input_vec);
|
||||
|
||||
ExecutionConfig config;
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
|
||||
network->set_input_data("input", input);
|
||||
auto outputs = network->execute();
|
||||
|
||||
auto output = outputs.at("out_reorder").get_memory();
|
||||
cldnn::mem_lock<T> output_ptr(output, get_test_stream());
|
||||
|
||||
for (size_t i = 0; i < out.size(); i++)
|
||||
ASSERT_EQ(output_ptr[i], out[i]) << " i=" << i;
|
||||
}
|
||||
|
||||
TEST(reshape_gpu_f32, shrink_chain_partial) {
|
||||
test_shrink_chain_partial<float>(false);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void test_shrink_chain_full(bool is_caching_test) {
|
||||
auto& engine = get_test_engine();
|
||||
auto input = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }});
|
||||
auto scale_in = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }});
|
||||
auto shift_in = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }});
|
||||
|
||||
std::vector<T> scale_vals = {0.f, 1.f, 2.f, 3.f};
|
||||
std::vector<T> scale_shifts = {5.f, 10.f, 15.f, 20.0f};
|
||||
set_values(scale_in, scale_vals);
|
||||
set_values(shift_in, scale_shifts);
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input", input->get_layout()));
|
||||
topology.add(data("scale_in", scale_in));
|
||||
topology.add(data("shift_in", shift_in));
|
||||
topology.add(activation("relu", input_info("input"), activation_func::relu));
|
||||
topology.add(reshape("reshape", input_info("relu"), tensor(spatial(2, 2))));
|
||||
topology.add(reorder("reorder", input_info("reshape"), format::bfyx, data_types::f32));
|
||||
topology.add(reshape("reshape1", input_info("reorder"), tensor(feature(4))));
|
||||
topology.add(eltwise("scale", { input_info("reshape1"), input_info("scale_in") }, eltwise_mode::prod));
|
||||
topology.add(eltwise("shift", { input_info("scale"), input_info("shift_in") }, eltwise_mode::sum));
|
||||
topology.add(reorder("out_reorder", input_info("shift"), format::yxfb, data_types::f32));
|
||||
|
||||
std::vector<T> input_vec = {-1.f, 2.f, -3.f, 4.f};
|
||||
std::vector<T> out = {5.f, 12.f, 15.f, 32.0f};
|
||||
set_values(input, input_vec);
|
||||
|
||||
ExecutionConfig config;
|
||||
@ -620,85 +681,54 @@ TEST(reshape_gpu_f32, shrink_chain_partial) {
|
||||
auto outputs = network.execute();
|
||||
|
||||
auto output = outputs.at("out_reorder").get_memory();
|
||||
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
|
||||
cldnn::mem_lock<T> output_ptr(output, get_test_stream());
|
||||
|
||||
for (size_t i = 0; i < out.size(); i++)
|
||||
ASSERT_EQ(output_ptr[i], out[i]) << " i=" << i;
|
||||
}
|
||||
|
||||
TEST(reshape_gpu_f32, shrink_chain_full) {
|
||||
test_shrink_chain_full<float>(false);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void test_shrink_chain_out(bool is_caching_test) {
|
||||
auto& engine = get_test_engine();
|
||||
auto input = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }});
|
||||
auto scale_in = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }});
|
||||
auto shift_in = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }});
|
||||
|
||||
std::vector<float> scale_vals = {0.f, 1.f, 2.f, 3.f};
|
||||
std::vector<float> scale_shifts = {5.f, 10.f, 15.f, 20.0f};
|
||||
std::vector<T> scale_vals = {0.f, 1.f, 2.f, 3.f};
|
||||
std::vector<T> scale_shifts = {5.f, 10.f, 15.f, 20.0f};
|
||||
set_values(scale_in, scale_vals);
|
||||
set_values(shift_in, scale_shifts);
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input", input->get_layout()));
|
||||
topology.add(data("scale_in", scale_in));
|
||||
topology.add(data("shift_in", shift_in));
|
||||
topology.add(activation("relu", input_info("input"), activation_func::relu));
|
||||
topology.add(reshape("reshape", input_info("relu"), tensor(spatial(2, 2))));
|
||||
topology.add(reorder("reorder", input_info("reshape"), format::bfyx, data_types::f32));
|
||||
topology.add(reshape("reshape1", input_info("reorder"), tensor(feature(4))));
|
||||
topology.add(eltwise("scale", { input_info("reshape1"), input_info("scale_in") }, eltwise_mode::prod));
|
||||
topology.add(eltwise("shift", { input_info("scale"), input_info("shift_in") }, eltwise_mode::sum));
|
||||
topology.add(reorder("out_reorder", input_info("shift"), format::yxfb, data_types::f32));
|
||||
|
||||
std::vector<float> input_vec = {-1.f, 2.f, -3.f, 4.f};
|
||||
std::vector<float> out = {5.f, 12.f, 15.f, 32.0f};
|
||||
std::vector<T> input_vec = {-1.f, 2.f, -3.f, 4.f};
|
||||
std::vector<T> out = {0.f, 2.f, 0.f, 4.0f};
|
||||
set_values(input, input_vec);
|
||||
|
||||
ExecutionConfig config;
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
network network(engine, topology, config);
|
||||
network.set_input_data("input", input);
|
||||
auto outputs = network.execute();
|
||||
cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
|
||||
network->set_input_data("input", input);
|
||||
auto outputs = network->execute();
|
||||
|
||||
auto output = outputs.at("out_reorder").get_memory();
|
||||
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
|
||||
auto output = outputs.at("reshape1").get_memory();
|
||||
cldnn::mem_lock<T> output_ptr(output, get_test_stream());
|
||||
|
||||
for (size_t i = 0; i < out.size(); i++)
|
||||
ASSERT_EQ(output_ptr[i], out[i]) << " i=" << i;
|
||||
}
|
||||
|
||||
TEST(reshape_gpu_f32, shrink_chain_out) {
|
||||
auto& engine = get_test_engine();
|
||||
auto input = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }});
|
||||
auto scale_in = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }});
|
||||
auto shift_in = engine.allocate_memory({data_types::f32, format::bfyx, { tensor(feature(4)) }});
|
||||
|
||||
std::vector<float> scale_vals = {0.f, 1.f, 2.f, 3.f};
|
||||
std::vector<float> scale_shifts = {5.f, 10.f, 15.f, 20.0f};
|
||||
set_values(scale_in, scale_vals);
|
||||
set_values(shift_in, scale_shifts);
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input", input->get_layout()));
|
||||
topology.add(activation("relu", input_info("input"), activation_func::relu));
|
||||
topology.add(reshape("reshape", input_info("relu"), tensor(spatial(2, 2))));
|
||||
topology.add(reorder("reorder", input_info("reshape"), format::bfyx, data_types::f32));
|
||||
topology.add(reshape("reshape1", input_info("reorder"), tensor(feature(4))));
|
||||
|
||||
std::vector<float> input_vec = {-1.f, 2.f, -3.f, 4.f};
|
||||
std::vector<float> out = {0.f, 2.f, 0.f, 4.0f};
|
||||
set_values(input, input_vec);
|
||||
|
||||
ExecutionConfig config;
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
network network(engine, topology, config);
|
||||
network.set_input_data("input", input);
|
||||
auto outputs = network.execute();
|
||||
|
||||
auto output = outputs.at("reshape1").get_memory();
|
||||
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
|
||||
|
||||
for (size_t i = 0; i < out.size(); i++)
|
||||
ASSERT_EQ(output_ptr[i], out[i]) << " i=" << i;
|
||||
test_shrink_chain_out<float>(false);
|
||||
}
|
||||
|
||||
TEST(reshape_gpu_f32, basic_runtime_static_shape) {
|
||||
@ -910,3 +940,369 @@ TEST(reshape_gpu_f32, basic_runtime_dynamic_shape_with_const_optimized_out) {
|
||||
ASSERT_TRUE(are_equal(input_data[i], output_ptr[i]));
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef RUN_ALL_MODEL_CACHING_TESTS
|
||||
TEST(reshape_gpu_f32, basic_2dim_in_place_cached) {
|
||||
generic_reshape_test<float>(
|
||||
format::bfyx,
|
||||
tensor(1, 1, 2, 2),
|
||||
tensor(1, 1, 4, 1),
|
||||
true,
|
||||
padding(),
|
||||
padding(),
|
||||
true);
|
||||
}
|
||||
|
||||
TEST(reshape_gpu_f16, basic_2dim_in_place_cached) {
|
||||
generic_reshape_test<FLOAT16>(
|
||||
format::bfyx,
|
||||
tensor(1, 1, 2, 2),
|
||||
tensor(1, 1, 1, 4),
|
||||
true,
|
||||
padding(),
|
||||
padding(),
|
||||
true);
|
||||
}
|
||||
|
||||
TEST(reshape_gpu_i8, basic_2dim_in_place_cached) {
|
||||
generic_reshape_test<int8_t>(
|
||||
format::bfyx,
|
||||
tensor(1, 1, 2, 2),
|
||||
tensor(1, 1, 1, 4),
|
||||
true,
|
||||
padding(),
|
||||
padding(),
|
||||
true);
|
||||
}
|
||||
|
||||
TEST(reshape_gpu_i32, basic_2dim_in_place_cached) {
|
||||
generic_reshape_test<int32_t>(
|
||||
format::bfyx,
|
||||
tensor(1, 1, 2, 2),
|
||||
tensor(1, 1, 1, 4),
|
||||
true,
|
||||
padding(),
|
||||
padding(),
|
||||
true);
|
||||
}
|
||||
|
||||
TEST(reshape_gpu_i64, basic_2dim_in_place_cached) {
|
||||
generic_reshape_test<int64_t>(
|
||||
format::bfyx,
|
||||
tensor(1, 1, 2, 2),
|
||||
tensor(1, 1, 1, 4),
|
||||
true,
|
||||
padding(),
|
||||
padding(),
|
||||
true);
|
||||
}
|
||||
|
||||
TEST(reshape_gpu_f32, basic_4dim_in_place_cached) {
|
||||
generic_reshape_test<float>(
|
||||
format::yxfb,
|
||||
tensor(9, 9, 2, 4),
|
||||
tensor(27, 2, 3, 4),
|
||||
true,
|
||||
padding(),
|
||||
padding(),
|
||||
true);
|
||||
}
|
||||
|
||||
TEST(reshape_gpu_f16, basic_4dim_in_place_cached) {
|
||||
generic_reshape_test<FLOAT16>(
|
||||
format::yxfb,
|
||||
tensor(9, 9, 2, 4),
|
||||
tensor(3, 4, 27, 2),
|
||||
true,
|
||||
padding(),
|
||||
padding(),
|
||||
true);
|
||||
}
|
||||
|
||||
TEST(reshape_gpu_i32, basic_4dim_in_place_cached) {
|
||||
generic_reshape_test<int32_t>(
|
||||
format::yxfb,
|
||||
tensor(9, 9, 2, 4),
|
||||
tensor(3, 4, 27, 2),
|
||||
true,
|
||||
padding(),
|
||||
padding(),
|
||||
true);
|
||||
}
|
||||
|
||||
TEST(reshape_gpu_i64, basic_4dim_in_place_cached) {
|
||||
generic_reshape_test<int64_t>(
|
||||
format::yxfb,
|
||||
tensor(9, 9, 2, 4),
|
||||
tensor(3, 4, 27, 2),
|
||||
true,
|
||||
padding(),
|
||||
padding(),
|
||||
true);
|
||||
}
|
||||
|
||||
TEST(reshpape_gpu_f32, basic_2dim_output_padd_cached) {
|
||||
generic_reshape_test<float>(
|
||||
format::byxf,
|
||||
tensor(1, 1, 4, 2),
|
||||
tensor(1, 1, 8, 1),
|
||||
false,
|
||||
padding(),
|
||||
padding(std::vector<int>{0, 0, 1, 1}),
|
||||
true);
|
||||
}
|
||||
|
||||
TEST(reshape_gpu_f16, basic_2dim_output_padd_cached) {
|
||||
generic_reshape_test<FLOAT16>(
|
||||
format::byxf,
|
||||
tensor(1, 1, 3, 4),
|
||||
tensor(1, 1, 2, 6),
|
||||
false,
|
||||
padding(),
|
||||
padding(std::vector<int>{0, 0, 2, 2}),
|
||||
true);
|
||||
}
|
||||
|
||||
TEST(reshape_gpu_i8, basic_2dim_output_padd_cached) {
|
||||
generic_reshape_test<int8_t>(
|
||||
format::byxf,
|
||||
tensor(1, 1, 3, 4),
|
||||
tensor(1, 1, 2, 6),
|
||||
false,
|
||||
padding(),
|
||||
padding(std::vector<int>{0, 0, 2, 2}),
|
||||
true);
|
||||
}
|
||||
|
||||
TEST(reshape_gpu_i32, basic_2dim_output_padd_cached) {
|
||||
generic_reshape_test<int32_t>(
|
||||
format::byxf,
|
||||
tensor(1, 1, 3, 4),
|
||||
tensor(1, 1, 2, 6),
|
||||
false,
|
||||
padding(),
|
||||
padding(std::vector<int>{0, 0, 2, 2}),
|
||||
true);
|
||||
}
|
||||
|
||||
TEST(reshape_gpu_i64, basic_2dim_output_padd_cached) {
|
||||
generic_reshape_test<int64_t>(
|
||||
format::byxf,
|
||||
tensor(1, 1, 3, 4),
|
||||
tensor(1, 1, 2, 6),
|
||||
false,
|
||||
padding(),
|
||||
padding(std::vector<int>{0, 0, 2, 2}),
|
||||
true);
|
||||
}
|
||||
|
||||
TEST(reshape_gpu_f32, basic_2dim_input_padd_cached) {
|
||||
generic_reshape_test<float>(
|
||||
format::fyxb,
|
||||
tensor(1, 1, 2, 5),
|
||||
tensor(1, 1, 5, 2),
|
||||
false,
|
||||
padding({0, 0, 3, 2}, {0, 0, 1, 4}),
|
||||
padding(),
|
||||
true);
|
||||
}
|
||||
|
||||
TEST(reshape_gpu_f16, basic_2dim_input_padd_cached) {
|
||||
generic_reshape_test<FLOAT16>(
|
||||
format::fyxb,
|
||||
tensor(1, 1, 3, 3),
|
||||
tensor(1, 1, 1, 9),
|
||||
false,
|
||||
padding({0, 0, 4, 1}, {0, 0, 2, 3}),
|
||||
padding(),
|
||||
true);
|
||||
}
|
||||
|
||||
TEST(reshape_gpu_i8, basic_2dim_input_padd_cached) {
|
||||
generic_reshape_test<int8_t>(
|
||||
format::fyxb,
|
||||
tensor(1, 1, 3, 3),
|
||||
tensor(1, 1, 1, 9),
|
||||
false,
|
||||
padding({0, 0, 4, 1}, {0, 0, 2, 3}),
|
||||
padding(),
|
||||
true);
|
||||
}
|
||||
|
||||
TEST(reshape_gpu_i32, basic_2dim_input_padd_cached) {
|
||||
generic_reshape_test<int32_t>(
|
||||
format::fyxb,
|
||||
tensor(1, 1, 3, 3),
|
||||
tensor(1, 1, 1, 9),
|
||||
false,
|
||||
padding({0, 0, 4, 1}, {0, 0, 2, 3}),
|
||||
padding(),
|
||||
true);
|
||||
}
|
||||
|
||||
TEST(reshape_gpu_i64, basic_2dim_input_padd_cached) {
|
||||
generic_reshape_test<int64_t>(
|
||||
format::fyxb,
|
||||
tensor(1, 1, 3, 3),
|
||||
tensor(1, 1, 1, 9),
|
||||
false,
|
||||
padding({0, 0, 4, 1}, {0, 0, 2, 3}),
|
||||
padding(),
|
||||
true);
|
||||
}
|
||||
|
||||
TEST(reshape_gpu_f32, basic_2dim_input_output_padd_cached) {
|
||||
generic_reshape_test<float>(
|
||||
format::byxf,
|
||||
tensor(1, 1, 5, 7),
|
||||
tensor(1, 1, 7, 5),
|
||||
false,
|
||||
padding({0, 0, 4, 4}, {0, 0, 1, 1}),
|
||||
padding({0, 0, 0, 0}, {0, 0, 3, 0}),
|
||||
true);
|
||||
}
|
||||
|
||||
TEST(reshape_gpu_f16, basic_2dim_input_output_padd_cached) {
|
||||
generic_reshape_test<FLOAT16>(
|
||||
format::byxf,
|
||||
tensor(1, 1, 6, 6),
|
||||
tensor(1, 1, 3, 12),
|
||||
false,
|
||||
padding({0, 0, 1, 1}, {0, 0, 0, 0}),
|
||||
padding({0, 0, 2, 1}, {0, 0, 1, 2}),
|
||||
true);
|
||||
}
|
||||
|
||||
TEST(reshape_gpu_i8, basic_2dim_input_output_padd_cached) {
|
||||
generic_reshape_test<int8_t>(
|
||||
format::byxf,
|
||||
tensor(1, 1, 5, 7),
|
||||
tensor(1, 1, 7, 5),
|
||||
false,
|
||||
padding({0, 0, 4, 4}, {0, 0, 1, 1}),
|
||||
padding({0, 0, 0, 0}, {0, 0, 3, 0}),
|
||||
true);
|
||||
}
|
||||
|
||||
TEST(reshape_gpu_i32, basic_2dim_input_output_padd_cached) {
|
||||
generic_reshape_test<int32_t>(
|
||||
format::byxf,
|
||||
tensor(1, 1, 5, 7),
|
||||
tensor(1, 1, 7, 5),
|
||||
false,
|
||||
padding({0, 0, 4, 4}, {0, 0, 1, 1}),
|
||||
padding({0, 0, 0, 0}, {0, 0, 3, 0}),
|
||||
true);
|
||||
}
|
||||
|
||||
TEST(reshape_gpu_i64, basic_2dim_input_output_padd_cached) {
|
||||
generic_reshape_test<int64_t>(
|
||||
format::byxf,
|
||||
tensor(1, 1, 5, 7),
|
||||
tensor(1, 1, 7, 5),
|
||||
false,
|
||||
padding({0, 0, 4, 4}, {0, 0, 1, 1}),
|
||||
padding({0, 0, 0, 0}, {0, 0, 3, 0}),
|
||||
true);
|
||||
}
|
||||
|
||||
TEST(reshpape_gpu_f32, basic_4dim_output_padd_cached) {
|
||||
generic_reshape_test<float>(
|
||||
format::bfyx,
|
||||
tensor(2, 5, 7, 3),
|
||||
tensor(1, 14, 15, 1),
|
||||
false,
|
||||
padding(),
|
||||
padding({1, 0, 0, 1}, {0, 2, 3, 0}),
|
||||
true);
|
||||
}
|
||||
|
||||
TEST(reshape_gpu_f16, basic_4dim_output_padd_cached) {
|
||||
generic_reshape_test<FLOAT16>(
|
||||
format::bfyx,
|
||||
tensor(5, 4, 2, 2),
|
||||
tensor(40, 2, 1, 1),
|
||||
false,
|
||||
padding(),
|
||||
padding({0, 2, 0, 1}, {0, 2, 3, 0}),
|
||||
true);
|
||||
}
|
||||
|
||||
TEST(reshape_gpu_f32, basic_4dim_input_padd_cached) {
|
||||
generic_reshape_test<float>(
|
||||
format::yxfb,
|
||||
tensor(8, 128, 3, 3),
|
||||
tensor(16, 8, 8, 9),
|
||||
false,
|
||||
padding({0, 1, 3, 3}, {0, 1, 1, 1}),
|
||||
padding(),
|
||||
true);
|
||||
}
|
||||
|
||||
TEST(reshape_gpu_f16, basic_4dim_input_padd_cached) {
|
||||
generic_reshape_test<FLOAT16>(
|
||||
format::yxfb,
|
||||
tensor(2, 32, 8, 8),
|
||||
tensor(8, 128, 1, 4),
|
||||
false,
|
||||
padding({2, 2, 1, 0}, {1, 2, 2, 0}),
|
||||
padding(),
|
||||
true);
|
||||
}
|
||||
|
||||
TEST(reshape_gpu_f32, basic_4dim_input_output_padd_cached) {
|
||||
generic_reshape_test<float>(
|
||||
format::fyxb,
|
||||
tensor(8, 1024, 25, 25),
|
||||
tensor(8, 64, 100, 100),
|
||||
false,
|
||||
padding({2, 0, 2, 1}, {0, 1, 4, 0}),
|
||||
padding({1, 2, 3, 4}, {0, 4, 1, 1}),
|
||||
true);
|
||||
}
|
||||
|
||||
TEST(reshape_gpu_f16, basic_4dim_input_output_padd_cached) {
|
||||
generic_reshape_test<FLOAT16>(
|
||||
format::byxf,
|
||||
tensor(32, 3, 227, 227),
|
||||
tensor(8, 12, 227, 227),
|
||||
false,
|
||||
padding({0, 1, 4, 4}, {0, 1, 1, 1}),
|
||||
padding({0, 29, 29, 0}, {0, 0, 0, 0}),
|
||||
true);
|
||||
}
|
||||
|
||||
TEST(reshape_gpu_f32, basic_5dim_in_place_cached) {
|
||||
generic_reshape_test<float>(
|
||||
format::bfzyx,
|
||||
tensor(9, 9, 2, 4, 2),
|
||||
tensor(27, 2, 1, 4, 6),
|
||||
true,
|
||||
padding(),
|
||||
padding(),
|
||||
true);
|
||||
}
|
||||
|
||||
TEST(reshape_gpu_f32, multiple_users_with_reorder_cached) {
|
||||
test_multiple_users_with_reorder<float>(true);
|
||||
}
|
||||
|
||||
TEST(reshape_gpu_f32, calc_output_shape_cached) {
|
||||
test_calc_output_shape<float>(true);
|
||||
}
|
||||
|
||||
TEST(reshape_gpu_f32, basic_bfwzyx_cached) {
|
||||
test_basic_bfwzyx<float>(true);
|
||||
}
|
||||
|
||||
TEST(reshape_gpu_f32, shrink_chain_partial_cached) {
|
||||
test_shrink_chain_partial<float>(true);
|
||||
}
|
||||
|
||||
TEST(reshape_gpu_f32, shrink_chain_full_cached) {
|
||||
test_shrink_chain_full<float>(true);
|
||||
}
|
||||
#endif
|
||||
TEST(reshape_gpu_f32, shrink_chain_out_cached) {
|
||||
test_shrink_chain_out<float>(true);
|
||||
}
|
||||
|
@ -43,7 +43,7 @@ struct ReverseParams {
|
||||
template <typename T, reverse_mode mode>
|
||||
struct reverse_gpu_test : public ::testing::TestWithParam<ReverseParams<T, mode>> {
|
||||
public:
|
||||
void test() {
|
||||
void test(bool is_caching_test = false) {
|
||||
auto data_type = type_to_data_type<T>::value;
|
||||
ReverseParams<T, mode> params = testing::TestWithParam<ReverseParams<T, mode>>::GetParam();
|
||||
auto& engine = get_test_engine();
|
||||
@ -76,10 +76,10 @@ public:
|
||||
tp.add(reverse(reverse_id, input_info(reverse_input_id), input_info(axes_id), mode));
|
||||
}
|
||||
|
||||
network network(engine, tp);
|
||||
network.set_input_data(reverse_input_id, reverse_input);
|
||||
network.set_input_data(axes_id, reverse_axes);
|
||||
auto result = network.execute();
|
||||
cldnn::network::ptr network = get_network(engine, tp, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
network->set_input_data(reverse_input_id, reverse_input);
|
||||
network->set_input_data(axes_id, reverse_axes);
|
||||
auto result = network->execute();
|
||||
|
||||
auto out_mem = result.at(ouput_op_name).get_memory();
|
||||
cldnn::mem_lock<T> out_ptr(out_mem, get_test_stream());
|
||||
@ -422,3 +422,52 @@ INSTANTIATE_TEST_SUITE_P(smoke_reverse_f16_index,
|
||||
reverse_gpu_test_f16_index,
|
||||
::testing::ValuesIn(generateIndexParams<half_t>()),
|
||||
PrintToStringParamName());
|
||||
|
||||
#ifdef RUN_ALL_MODEL_CACHING_TESTS
|
||||
TEST_P(reverse_gpu_test_int32_mask, reverse_i32_mask_cached) {
|
||||
ASSERT_NO_FATAL_FAILURE(test(true));
|
||||
}
|
||||
|
||||
TEST_P(reverse_gpu_test_int32_index, reverse_i32_index_cached) {
|
||||
ASSERT_NO_FATAL_FAILURE(test(true));
|
||||
}
|
||||
|
||||
TEST_P(reverse_gpu_test_int64_mask, reverse_i64_mask_cached) {
|
||||
ASSERT_NO_FATAL_FAILURE(test(true));
|
||||
}
|
||||
|
||||
TEST_P(reverse_gpu_test_int64_index, reverse_i64_index_cached) {
|
||||
ASSERT_NO_FATAL_FAILURE(test(true));
|
||||
}
|
||||
|
||||
TEST_P(reverse_gpu_test_float_mask, reverse_float_mask_cached) {
|
||||
ASSERT_NO_FATAL_FAILURE(test(true));
|
||||
}
|
||||
|
||||
TEST_P(reverse_gpu_test_float_index, reverse_float_index_cached) {
|
||||
ASSERT_NO_FATAL_FAILURE(test(true));
|
||||
}
|
||||
|
||||
TEST_P(reverse_gpu_test_int8_mask, reverse_int8_mask_cached) {
|
||||
ASSERT_NO_FATAL_FAILURE(test(true));
|
||||
}
|
||||
|
||||
TEST_P(reverse_gpu_test_int8_index, reverse_int8_index_cached) {
|
||||
ASSERT_NO_FATAL_FAILURE(test(true));
|
||||
}
|
||||
|
||||
TEST_P(reverse_gpu_test_uint8_mask, reverse_uint8_mask_cached) {
|
||||
ASSERT_NO_FATAL_FAILURE(test(true));
|
||||
}
|
||||
|
||||
TEST_P(reverse_gpu_test_uint8_index, reverse_uint8_index_cached) {
|
||||
ASSERT_NO_FATAL_FAILURE(test(true));
|
||||
}
|
||||
|
||||
TEST_P(reverse_gpu_test_f16_mask, reverse_f16_mask_cached) {
|
||||
ASSERT_NO_FATAL_FAILURE(test(true));
|
||||
}
|
||||
#endif
|
||||
TEST_P(reverse_gpu_test_f16_index, reverse_f16_index_cached) {
|
||||
ASSERT_NO_FATAL_FAILURE(test(true));
|
||||
}
|
||||
|
@ -12,7 +12,8 @@
|
||||
using namespace cldnn;
|
||||
using namespace ::tests;
|
||||
|
||||
TEST(reverese_sequence_gpu_test, fp32_d2_2_ba1_sa0) {
|
||||
template <typename T>
|
||||
void test_fp32_d2_2_ba1_sa0(bool is_caching_test) {
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 1 } });
|
||||
@ -35,17 +36,17 @@ TEST(reverese_sequence_gpu_test, fp32_d2_2_ba1_sa0) {
|
||||
reverse_sequence("reverse_sequence", input_info("input"), input_info("seq_lengths"), seq_axis, batch_axis)
|
||||
);
|
||||
|
||||
network network(engine, topology);
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network.set_input_data("input", input);
|
||||
network.set_input_data("seq_lengths", seq_lengths);
|
||||
network->set_input_data("input", input);
|
||||
network->set_input_data("seq_lengths", seq_lengths);
|
||||
|
||||
auto outputs = network.execute();
|
||||
auto outputs = network->execute();
|
||||
|
||||
auto output = outputs.at("reverse_sequence").get_memory();
|
||||
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
|
||||
cldnn::mem_lock<T> output_ptr(output, get_test_stream());
|
||||
|
||||
std::vector<float> expected_results = {
|
||||
std::vector<T> expected_results = {
|
||||
0.0f, 3.0f, 2.0f, 1.0f
|
||||
};
|
||||
|
||||
@ -54,7 +55,12 @@ TEST(reverese_sequence_gpu_test, fp32_d2_2_ba1_sa0) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(reverese_sequence_gpu_test, fp32_d3_3_3_ba0_sa1) {
|
||||
TEST(reverese_sequence_gpu_test, fp32_d2_2_ba1_sa0) {
|
||||
test_fp32_d2_2_ba1_sa0<float>(false);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void test_fp32_d3_3_3_ba0_sa1(bool is_caching_test) {
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 3, 3, 1, 3 } });
|
||||
@ -79,17 +85,17 @@ TEST(reverese_sequence_gpu_test, fp32_d3_3_3_ba0_sa1) {
|
||||
reverse_sequence("reverse_sequence", input_info("input"), input_info("seq_lengths"), seq_axis, batch_axis)
|
||||
);
|
||||
|
||||
network network(engine, topology);
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network.set_input_data("input", input);
|
||||
network.set_input_data("seq_lengths", seq_lengths);
|
||||
network->set_input_data("input", input);
|
||||
network->set_input_data("seq_lengths", seq_lengths);
|
||||
|
||||
auto outputs = network.execute();
|
||||
auto outputs = network->execute();
|
||||
|
||||
auto output = outputs.at("reverse_sequence").get_memory();
|
||||
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
|
||||
cldnn::mem_lock<T> output_ptr(output, get_test_stream());
|
||||
|
||||
std::vector<float> expected_results = {
|
||||
std::vector<T> expected_results = {
|
||||
3.0f, 4.0f, 5.0f, 0.0f, 1.0f, 2.0f, 6.0f, 7.0f, 8.0f,
|
||||
12.0f, 13.0f, 14.0f, 9.0f, 10.0f, 11.0f, 15.0f, 16.0f, 17.0f,
|
||||
21.0f, 22.0f, 23.0f, 18.0f, 19.0f, 20.0f, 24.0f, 25.0f, 26.0f
|
||||
@ -100,6 +106,10 @@ TEST(reverese_sequence_gpu_test, fp32_d3_3_3_ba0_sa1) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(reverese_sequence_gpu_test, fp32_d3_3_3_ba0_sa1) {
|
||||
test_fp32_d3_3_3_ba0_sa1<float>(false);
|
||||
}
|
||||
|
||||
TEST(reverese_sequence_gpu_test, fp32_d3_3_3_ba2_sa0) {
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
@ -603,3 +613,12 @@ TEST(reverese_sequence_gpu_test, fp16_d2_2_3_2ba2_sa0) {
|
||||
ASSERT_EQ(expected_results[i], half_to_float(output_ptr[i]));
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef RUN_ALL_MODEL_CACHING_TESTS
|
||||
TEST(reverese_sequence_gpu_test, fp32_d2_2_ba1_sa0_cached) {
|
||||
test_fp32_d2_2_ba1_sa0<float>(true);
|
||||
}
|
||||
#endif
|
||||
TEST(reverese_sequence_gpu_test, fp32_d3_3_3_ba0_sa1_cached) {
|
||||
test_fp32_d3_3_3_ba0_sa1<float>(true);
|
||||
}
|
||||
|
@ -66,7 +66,8 @@ struct roi_align_test : public testing::Test {
|
||||
|
||||
void execute(const std::vector<TD>& expected_output,
|
||||
roi_align::PoolingMode pooling_mode,
|
||||
roi_align::AlignedMode aligned_mode) const {
|
||||
roi_align::AlignedMode aligned_mode,
|
||||
bool is_caching_test) const {
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
auto input = get_memory(engine, input_lt, input_data);
|
||||
@ -90,12 +91,13 @@ struct roi_align_test : public testing::Test {
|
||||
aligned_mode));
|
||||
topology.add(reorder("out", input_info("roi_align"), plain_format, device_data_type));
|
||||
|
||||
network network(engine, topology);
|
||||
network.set_input_data("input", input);
|
||||
network.set_input_data("coords", coords);
|
||||
network.set_input_data("roi_ind", roi_ind);
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
auto outputs = network.execute();
|
||||
network->set_input_data("input", input);
|
||||
network->set_input_data("coords", coords);
|
||||
network->set_input_data("roi_ind", roi_ind);
|
||||
|
||||
auto outputs = network->execute();
|
||||
|
||||
auto output = outputs.at("out").get_memory();
|
||||
cldnn::mem_lock<TD> output_ptr(output, get_test_stream());
|
||||
@ -158,19 +160,41 @@ TYPED_TEST(roi_align_test, avg_asymmetric) {
|
||||
using TD = typename TypeParam::DataType;
|
||||
const std::vector<TD>
|
||||
expected_output{TD(3.f), TD(3.75f), TD(4.75f), TD(5.f), TD(3.f), TD(5.5f), TD(2.75f), TD(3.75f)};
|
||||
this->execute(expected_output, roi_align::PoolingMode::avg, roi_align::AlignedMode::asymmetric);
|
||||
this->execute(expected_output, roi_align::PoolingMode::avg, roi_align::AlignedMode::asymmetric, false);
|
||||
}
|
||||
|
||||
TYPED_TEST(roi_align_test, avg_half_pixel_for_nn) {
|
||||
using TD = typename TypeParam::DataType;
|
||||
const std::vector<TD> expected_output =
|
||||
{TD(3.14f), TD(2.16f), TD(2.86f), TD(5.03f), TD(1.83f), TD(5.84f), TD(2.77f), TD(3.44f)};
|
||||
this->execute(expected_output, roi_align::PoolingMode::avg, roi_align::AlignedMode::half_pixel_for_nn);
|
||||
this->execute(expected_output, roi_align::PoolingMode::avg, roi_align::AlignedMode::half_pixel_for_nn, false);
|
||||
}
|
||||
|
||||
TYPED_TEST(roi_align_test, max_half_pixel) {
|
||||
using TD = typename TypeParam::DataType;
|
||||
const std::vector<TD> expected_output =
|
||||
{TD(4.375f), TD(4.9375f), TD(5.6875f), TD(5.625f), TD(4.625f), TD(7.125f), TD(3.3125f), TD(4.3125f)};
|
||||
this->execute(expected_output, roi_align::PoolingMode::max, roi_align::AlignedMode::half_pixel);
|
||||
this->execute(expected_output, roi_align::PoolingMode::max, roi_align::AlignedMode::half_pixel, false);
|
||||
}
|
||||
|
||||
#ifdef RUN_ALL_MODEL_CACHING_TESTS
|
||||
TYPED_TEST(roi_align_test, avg_asymmetric_cached) {
|
||||
using TD = typename TypeParam::DataType;
|
||||
const std::vector<TD>
|
||||
expected_output{TD(3.f), TD(3.75f), TD(4.75f), TD(5.f), TD(3.f), TD(5.5f), TD(2.75f), TD(3.75f)};
|
||||
this->execute(expected_output, roi_align::PoolingMode::avg, roi_align::AlignedMode::asymmetric, true);
|
||||
}
|
||||
|
||||
TYPED_TEST(roi_align_test, avg_half_pixel_for_nn_cached) {
|
||||
using TD = typename TypeParam::DataType;
|
||||
const std::vector<TD> expected_output =
|
||||
{TD(3.14f), TD(2.16f), TD(2.86f), TD(5.03f), TD(1.83f), TD(5.84f), TD(2.77f), TD(3.44f)};
|
||||
this->execute(expected_output, roi_align::PoolingMode::avg, roi_align::AlignedMode::half_pixel_for_nn, true);
|
||||
}
|
||||
#endif
|
||||
TYPED_TEST(roi_align_test, max_half_pixel_cached) {
|
||||
using TD = typename TypeParam::DataType;
|
||||
const std::vector<TD> expected_output =
|
||||
{TD(4.375f), TD(4.9375f), TD(5.6875f), TD(5.625f), TD(4.625f), TD(7.125f), TD(3.3125f), TD(4.3125f)};
|
||||
this->execute(expected_output, roi_align::PoolingMode::max, roi_align::AlignedMode::half_pixel, true);
|
||||
}
|
||||
|
@ -116,7 +116,7 @@ using roi_pooling_test_params = std::tuple<roi_pooling_test_inputs<T>,
|
||||
template <class T>
|
||||
struct roi_pooling_gpu_test : public testing::TestWithParam<roi_pooling_test_params<T>> {
|
||||
public:
|
||||
void test() {
|
||||
void test(bool is_caching_test) {
|
||||
format::type fmt;
|
||||
pooling_mode mode;
|
||||
bool position_sensitive;
|
||||
@ -185,11 +185,12 @@ public:
|
||||
|
||||
topology.add(reorder("reordered_roi_pooling", input_info("roi_pooling"), plane_format, type_to_data_type<T>::value));
|
||||
|
||||
network network(engine, topology);
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
for (auto& input : inputs) {
|
||||
network.set_input_data(input.first, input.second);
|
||||
network->set_input_data(input.first, input.second);
|
||||
}
|
||||
const auto outputs = network.execute();
|
||||
const auto outputs = network->execute();
|
||||
|
||||
ASSERT_EQ(outputs.size(), size_t(1));
|
||||
ASSERT_EQ(outputs.begin()->first, "reordered_roi_pooling");
|
||||
@ -236,7 +237,11 @@ public:
|
||||
using roi_pooling_gpu_test_float = roi_pooling_gpu_test<float>;
|
||||
|
||||
TEST_P(roi_pooling_gpu_test_float, test) {
|
||||
ASSERT_NO_FATAL_FAILURE(test());
|
||||
ASSERT_NO_FATAL_FAILURE(test(false));
|
||||
}
|
||||
|
||||
TEST_P(roi_pooling_gpu_test_float, test_cached) {
|
||||
ASSERT_NO_FATAL_FAILURE(test(true));
|
||||
}
|
||||
|
||||
const std::vector<roi_pooling_test_inputs<float>> roi_pooling_max_inputs = {
|
||||
|
@ -37,7 +37,7 @@ using roll_test_params = std::tuple<roll_test_input<T>, format::type>;
|
||||
|
||||
template <class T>
|
||||
struct roll_test : testing::TestWithParam<roll_test_params<T>> {
|
||||
void test() {
|
||||
void test(bool is_caching_test) {
|
||||
roll_test_input<T> p;
|
||||
format::type input_format;
|
||||
std::tie(p, input_format) = testing::TestWithParam<roll_test_params<T>>::GetParam();
|
||||
@ -54,9 +54,9 @@ struct roll_test : testing::TestWithParam<roll_test_params<T>> {
|
||||
topology.add(roll("roll", input_info("reordered_input"), tensor(input_format, p.shift)));
|
||||
topology.add(reorder("reordered_roll", input_info("roll"), plane_format, type_to_data_type<T>::value));
|
||||
|
||||
network network(engine, topology);
|
||||
network.set_input_data("input", input);
|
||||
const auto outputs = network.execute();
|
||||
cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test);
|
||||
network->set_input_data("input", input);
|
||||
const auto outputs = network->execute();
|
||||
|
||||
auto output = outputs.at("reordered_roll").get_memory();
|
||||
cldnn::mem_lock<T> output_ptr(output, get_test_stream());
|
||||
@ -226,7 +226,7 @@ std::vector<format::type> formats6d = {format::bfwzyx};
|
||||
#define INSTANTIATE_ROLL_TEST_SUITE(type, func, formats) \
|
||||
class roll_test_##type##func : public roll_test<type> {}; \
|
||||
TEST_P(roll_test_##type##func, roll_##type##func) { \
|
||||
test(); \
|
||||
test(false); \
|
||||
} \
|
||||
INSTANTIATE_TEST_SUITE_P(roll_smoke_##type##func, \
|
||||
roll_test_##type##func, \
|
||||
@ -257,4 +257,33 @@ INSTANTIATE_ROLL_TEST_SUITE(float, getRollFloatingPointAdditionalLogic, {format:
|
||||
|
||||
#undef INSTANTIATE_ROLL_TEST_SUITE
|
||||
|
||||
#define INSTANTIATE_ROLL_TEST_SUITE_CACHED(type, func) \
|
||||
TEST_P(roll_test_##type##func, roll_##type##func##_cached) { \
|
||||
test(true); \
|
||||
}
|
||||
|
||||
#ifdef RUN_ALL_MODEL_CACHING_TESTS
|
||||
INSTANTIATE_ROLL_TEST_SUITE_CACHED(int8_t, getRollParamsToCheckLogic)
|
||||
INSTANTIATE_ROLL_TEST_SUITE_CACHED(uint8_t, getRollParamsToCheckLogic)
|
||||
INSTANTIATE_ROLL_TEST_SUITE_CACHED(int32_t, getRollParamsToCheckLogic)
|
||||
INSTANTIATE_ROLL_TEST_SUITE_CACHED(int64_t, getRollParamsToCheckLogic)
|
||||
INSTANTIATE_ROLL_TEST_SUITE_CACHED(int8_t, getRollParamsToCheckLayouts)
|
||||
INSTANTIATE_ROLL_TEST_SUITE_CACHED(uint8_t, getRollParamsToCheckLayouts)
|
||||
INSTANTIATE_ROLL_TEST_SUITE_CACHED(int32_t, getRollParamsToCheckLayouts)
|
||||
INSTANTIATE_ROLL_TEST_SUITE_CACHED(int64_t, getRollParamsToCheckLayouts)
|
||||
INSTANTIATE_ROLL_TEST_SUITE_CACHED(int8_t, getRollParams5D)
|
||||
INSTANTIATE_ROLL_TEST_SUITE_CACHED(uint8_t, getRollParams5D)
|
||||
INSTANTIATE_ROLL_TEST_SUITE_CACHED(int32_t, getRollParams5D)
|
||||
INSTANTIATE_ROLL_TEST_SUITE_CACHED(int64_t, getRollParams5D)
|
||||
INSTANTIATE_ROLL_TEST_SUITE_CACHED(int8_t, getRollParams6D)
|
||||
INSTANTIATE_ROLL_TEST_SUITE_CACHED(uint8_t, getRollParams6D)
|
||||
INSTANTIATE_ROLL_TEST_SUITE_CACHED(int32_t, getRollParams6D)
|
||||
INSTANTIATE_ROLL_TEST_SUITE_CACHED(int64_t, getRollParams6D)
|
||||
INSTANTIATE_ROLL_TEST_SUITE_CACHED(FLOAT16, getRollFloatingPointParams)
|
||||
INSTANTIATE_ROLL_TEST_SUITE_CACHED(float, getRollFloatingPointParams)
|
||||
INSTANTIATE_ROLL_TEST_SUITE_CACHED(FLOAT16, getRollFloatingPointAdditionalLogic)
|
||||
#endif
|
||||
INSTANTIATE_ROLL_TEST_SUITE_CACHED(float, getRollFloatingPointAdditionalLogic)
|
||||
|
||||
#undef INSTANTIATE_ROLL_TEST_SUITE_CACHED
|
||||
} // namespace
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user