[GPU] Optimize out Gather by converting to implicit crop (#17743)
+ Changed Gather if it divides input tensor along batch axis + Converted Gather to cldnn Crop in CreateGatherOpBase + Added implicit Crop condition for batch axis Signed-off-by: Min, Byungil <byungil.min@intel.com>
This commit is contained in:
@@ -317,6 +317,72 @@ static bool can_reshape_be_optimized(const reshape_node& node) {
|
||||
return node.is_in_place() && !node.has_fused_primitives();
|
||||
}
|
||||
|
||||
static bool is_optimizable_padding_for_crop(const crop_node& node) {
|
||||
const auto& crop_layout = node.get_output_layout();
|
||||
auto input_layout = node.get_dependency(0).get_output_layout();
|
||||
auto crop_prim = node.get_primitive();
|
||||
auto opt_lower_pad = crop_prim->offsets.feature[0];
|
||||
auto opt_upper_pad = input_layout.feature() - crop_prim->offsets.feature[0] - crop_layout.get_tensor().feature[0];
|
||||
|
||||
// do not optimize crop if paddings are not properly aligned
|
||||
for (auto& usr : node.get_users()) {
|
||||
auto usr_layout = usr->get_output_layout();
|
||||
if (usr_layout.format == format::b_fs_yx_fsv16 &&
|
||||
(opt_lower_pad % 16 != 0 || opt_upper_pad % 16 != 0))
|
||||
return false;
|
||||
|
||||
if (input_layout.data_padding.lower_size().batch[0] != 0 || input_layout.data_padding.upper_size().batch[0] != 0 ||
|
||||
input_layout.data_padding.lower_size().spatial[0] != 0 || input_layout.data_padding.upper_size().spatial[0] != 0 ||
|
||||
input_layout.data_padding.lower_size().spatial[1] != 0 || input_layout.data_padding.upper_size().spatial[1] != 0)
|
||||
return false;
|
||||
|
||||
// oneDNN doesn't support paddings
|
||||
if (usr->get_preferred_impl_type() == impl_types::onednn)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool can_crop_be_optimized_along_feature(const crop_node& node) {
|
||||
const auto& crop_layout = node.get_output_layout();
|
||||
auto format = crop_layout.format;
|
||||
auto input_layout = node.get_dependency(0).get_output_layout();
|
||||
const auto& crop_size = crop_layout.get_tensor();
|
||||
const auto& out_pad = crop_layout.data_padding;
|
||||
|
||||
if (format == format::bfyx && crop_size.batch[0] == input_layout.batch() &&
|
||||
crop_size.spatial[0] == input_layout.spatial(0) &&
|
||||
crop_size.spatial[1] == input_layout.spatial(1) && out_pad.lower_size().feature[0] == 0 &&
|
||||
out_pad.upper_size().feature[0] == 0 && out_pad.lower_size().batch[0] == 0 &&
|
||||
out_pad.upper_size().batch[0] == 0 && out_pad.lower_size().spatial[0] == 0 &&
|
||||
out_pad.lower_size().spatial[1] == 0 && out_pad.upper_size().spatial[0] == 0 &&
|
||||
out_pad.upper_size().spatial[1] == 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool can_crop_be_optimized_along_batch(const crop_node& node) {
|
||||
const auto& crop_layout = node.get_output_layout();
|
||||
auto format = crop_layout.format;
|
||||
auto input_layout = node.get_dependency(0).get_output_layout();
|
||||
const auto crop_shape = crop_layout.get_ordered_dims();
|
||||
const auto input_shape = input_layout.get_ordered_dims();
|
||||
const auto& in_padding = input_layout.data_padding;
|
||||
const auto& out_padding = crop_layout.data_padding;
|
||||
|
||||
// Check format's order is 'bxxx' and only batch size is different
|
||||
if (format::is_simple_data_format(format) && format::traits(format)._order[0] == 0 &&
|
||||
std::equal(input_shape.begin()+1, input_shape.end(), crop_shape.begin()+1) &&
|
||||
!out_padding && !in_padding) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void propagate_padding_to_opt_out_users(program_node& node, cldnn::padding padding_data) {
|
||||
if (padding_data == cldnn::padding())
|
||||
return;
|
||||
@@ -366,6 +432,7 @@ void prepare_buffer_fusing::run(program& p) {
|
||||
|
||||
if (!can_optimize(node))
|
||||
continue;
|
||||
|
||||
// zero copy
|
||||
program_helpers::do_for_types<crop>(*node, [&p](crop_node& node) {
|
||||
// if the node is marked as network output, prevent optimizations which would affect a form of its output,
|
||||
@@ -392,56 +459,38 @@ void prepare_buffer_fusing::run(program& p) {
|
||||
if (p.is_loop_body() && node.get_dependency(0).is_type<lstm_elt>()) {
|
||||
return;
|
||||
}
|
||||
// optimization is available for cropping across depth(features) only
|
||||
|
||||
// optimization is available for cropping across depth(features) or batch
|
||||
// if output padding has defined padding across features already it wouldn't
|
||||
// work because it expect to have zeros in the padded area.
|
||||
if (!is_optimizable_padding_for_crop(node))
|
||||
return;
|
||||
|
||||
const auto& crop_layout = node.get_output_layout();
|
||||
auto format = crop_layout.format;
|
||||
auto crop_prim = node.get_primitive();
|
||||
auto input_layout = node.get_input_layout(0);
|
||||
const auto& crop_size = crop_layout.get_tensor();
|
||||
const auto& out_padd = crop_layout.data_padding;
|
||||
auto opt_lower_pad = crop_prim->offsets.feature[0];
|
||||
auto opt_upper_pad = input_layout.feature() - crop_prim->offsets.feature[0] - crop_size.feature[0];
|
||||
const auto& out_pad = crop_layout.data_padding;
|
||||
auto input_layout = node.get_input_layout(0);
|
||||
auto crop_prim = node.get_primitive();
|
||||
|
||||
// do not optimize crop if paddings are not properly aligned
|
||||
for (auto& usr : node.get_users()) {
|
||||
auto usr_layout = usr->get_output_layout();
|
||||
if (usr_layout.format == format::b_fs_yx_fsv16 &&
|
||||
(opt_lower_pad % 16 != 0 || opt_upper_pad % 16 != 0))
|
||||
return;
|
||||
if (input_layout.data_padding.lower_size().batch[0] != 0 || input_layout.data_padding.upper_size().batch[0] != 0 ||
|
||||
input_layout.data_padding.lower_size().spatial[0] != 0 || input_layout.data_padding.upper_size().spatial[0] != 0 ||
|
||||
input_layout.data_padding.lower_size().spatial[1] != 0 || input_layout.data_padding.upper_size().spatial[1] != 0)
|
||||
return;
|
||||
// oneDNN doesn't support paddings
|
||||
if (usr->get_preferred_impl_type() == impl_types::onednn)
|
||||
return;
|
||||
}
|
||||
|
||||
if (format == format::bfyx && crop_size.batch[0] == input_layout.batch() &&
|
||||
crop_size.spatial[0] == input_layout.spatial(0) &&
|
||||
crop_size.spatial[1] == input_layout.spatial(1) && out_padd.lower_size().feature[0] == 0 &&
|
||||
out_padd.upper_size().feature[0] == 0 && out_padd.lower_size().batch[0] == 0 &&
|
||||
out_padd.upper_size().batch[0] == 0 && out_padd.lower_size().spatial[0] == 0 &&
|
||||
out_padd.lower_size().spatial[1] == 0 && out_padd.upper_size().spatial[0] == 0 &&
|
||||
out_padd.upper_size().spatial[1] == 0) {
|
||||
// Regular crop
|
||||
// crop input buffer
|
||||
// |___________data____________|
|
||||
//
|
||||
// crop output buffer
|
||||
// |-------->| offsets[f] |<--|
|
||||
// |_____data____|
|
||||
// <------------>
|
||||
// reference size
|
||||
//
|
||||
// In-place crop
|
||||
// crop output buffer
|
||||
// |_low_pad_|__data_size__|___|<-upper pad
|
||||
|
||||
// feature num of pad should be accumulated if dep has been optimized out.
|
||||
// Regular crop
|
||||
// crop input buffer
|
||||
// |___________data____________|
|
||||
//
|
||||
// crop output buffer
|
||||
// |-------->| offsets[f] |<--|
|
||||
// |_____data____|
|
||||
// <------------>
|
||||
// reference size
|
||||
//
|
||||
// In-place crop
|
||||
// crop output buffer
|
||||
// |_low_pad_|__data_size__|___|<-upper pad
|
||||
if (can_crop_be_optimized_along_feature(node)) {
|
||||
auto crop_prim = node.get_primitive();
|
||||
auto opt_lower_pad = crop_prim->offsets.feature[0];
|
||||
auto opt_upper_pad = input_layout.feature() - crop_prim->offsets.feature[0] - crop_size.feature[0];
|
||||
auto& dep = node.get_dependency(0);
|
||||
// feature num of pad should be accumulated if dep has been optimized out.
|
||||
if (dep.is_type<crop>() && dep.can_be_optimized()) {
|
||||
auto dep_pad = dep.get_output_layout().data_padding;
|
||||
OPENVINO_ASSERT(
|
||||
@@ -454,18 +503,36 @@ void prepare_buffer_fusing::run(program& p) {
|
||||
opt_upper_pad += dep_pad.upper_size().feature[0];
|
||||
}
|
||||
|
||||
// set padding
|
||||
node.set_output_padding(
|
||||
padding({out_padd.lower_size().batch[0],
|
||||
opt_lower_pad,
|
||||
out_padd.lower_size().spatial[0],
|
||||
out_padd.lower_size().spatial[1]},
|
||||
{out_padd.upper_size().batch[0],
|
||||
opt_upper_pad,
|
||||
out_padd.upper_size().spatial[0],
|
||||
out_padd.upper_size().spatial[1]}));
|
||||
node.can_be_optimized(true);
|
||||
propagate_padding_to_opt_out_users(node, node.get_output_layout().data_padding);
|
||||
padding({out_pad.lower_size().batch[0],
|
||||
opt_lower_pad,
|
||||
out_pad.lower_size().spatial[0],
|
||||
out_pad.lower_size().spatial[1]},
|
||||
{out_pad.upper_size().batch[0],
|
||||
opt_upper_pad,
|
||||
out_pad.upper_size().spatial[0],
|
||||
out_pad.upper_size().spatial[1]}));
|
||||
} else if (can_crop_be_optimized_along_batch(node)) {
|
||||
auto crop_prim = node.get_primitive();
|
||||
auto opt_lower_pad = crop_prim->offsets.batch[0];
|
||||
auto opt_upper_pad = input_layout.batch() - crop_prim->offsets.batch[0] - crop_size.batch[0];
|
||||
|
||||
auto new_padding = padding({opt_lower_pad,
|
||||
out_pad.lower_size().feature[0],
|
||||
out_pad.lower_size().spatial[0],
|
||||
out_pad.lower_size().spatial[1]},
|
||||
{opt_upper_pad,
|
||||
out_pad.upper_size().feature[0],
|
||||
out_pad.upper_size().spatial[0],
|
||||
out_pad.upper_size().spatial[1]});
|
||||
node.set_output_padding(new_padding);
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
|
||||
node.can_be_optimized(true);
|
||||
propagate_padding_to_opt_out_users(node, node.get_output_layout().data_padding);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@@ -4,12 +4,14 @@
|
||||
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
#include "transformations/utils/utils.hpp"
|
||||
|
||||
#include "ngraph/op/gather.hpp"
|
||||
|
||||
#include "intel_gpu/primitives/gather.hpp"
|
||||
#include "intel_gpu/primitives/reorder.hpp"
|
||||
#include "intel_gpu/primitives/reshape.hpp"
|
||||
#include "intel_gpu/primitives/crop.hpp"
|
||||
|
||||
using namespace InferenceEngine;
|
||||
namespace ov {
|
||||
@@ -44,12 +46,13 @@ void CreateGatherOpBase(Program& p, const std::shared_ptr<T>& op, const int64_t
|
||||
}
|
||||
|
||||
// Dynamic path will do shape infer internally, so no need to pass valid out shape for that case
|
||||
ov::Shape out_shape = op->get_output_partial_shape(0).is_static() ? op->get_output_shape(0) : ov::Shape{};
|
||||
bool is_static = op->get_output_partial_shape(0).is_static();
|
||||
ov::Shape out_shape = is_static ? op->get_output_shape(0) : ov::Shape{};
|
||||
|
||||
// Update output_shape in case of scalar indice
|
||||
bool need_reshape = false;
|
||||
auto out_shape_original = out_shape;
|
||||
if (!p.use_new_shape_infer() && op->get_output_partial_shape(0).is_static()) {
|
||||
if (!p.use_new_shape_infer() && is_static) {
|
||||
auto input1_shape = op->get_input_shape(1);
|
||||
if (input1_shape.size() == 0 && batch_dim == 0) {
|
||||
need_reshape = true;
|
||||
@@ -77,21 +80,50 @@ void CreateGatherOpBase(Program& p, const std::shared_ptr<T>& op, const int64_t
|
||||
}
|
||||
}
|
||||
|
||||
// gather
|
||||
// Set layer name for Gather
|
||||
auto reshapeName = layerName + "";
|
||||
if (need_reshape) {
|
||||
layerName = layerName + "_reshape_output";
|
||||
}
|
||||
|
||||
auto gatherPrim = cldnn::gather(layerName,
|
||||
reordered_inputs[0],
|
||||
reordered_inputs[1],
|
||||
axis,
|
||||
out_shape,
|
||||
batch_dim,
|
||||
support_neg_ind);
|
||||
// Check if Gather could be converted to other primitive
|
||||
const auto input_shape = op->get_input_partial_shape(0);
|
||||
const auto input_rank = input_shape.rank().get_length();
|
||||
const auto& indices = op->input_value(1);
|
||||
if (is_static && axis == 0 && input_rank > 1 && indices.get_partial_shape().rank().get_length() == 0 &&
|
||||
std::equal(input_shape.begin()+1, input_shape.end(), out_shape.begin()+1)) {
|
||||
// Gather -> Crop
|
||||
// this Gather simply divides an input tensor along Batch axis
|
||||
auto get_crop_layer_name = [&](std::string name, size_t idx)->std::string {
|
||||
return (name + "/crop_" + std::to_string(idx));
|
||||
};
|
||||
|
||||
p.add_primitive(*op, gatherPrim);
|
||||
// Get indices info to calculate offset
|
||||
const auto& indices_node = indices.get_node_shared_ptr();
|
||||
auto indices_constant = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(indices_node);
|
||||
float result = 0.f;
|
||||
ov::op::util::get_single_value(indices_constant, result);
|
||||
|
||||
// Set tensors for crop shape and offset
|
||||
InferenceEngine::SizeVector start_offset(input_shape.size());
|
||||
start_offset[0] = static_cast<size_t>(result);
|
||||
auto offsetTensor = tensor_from_dims(start_offset, 0);
|
||||
auto outTensor = tensor_from_dims(out_shape, 1);
|
||||
|
||||
// Create Crop
|
||||
layerName = get_crop_layer_name(layerName, static_cast<size_t>(result));
|
||||
auto cropPrim = cldnn::crop(layerName, reordered_inputs[0], outTensor, offsetTensor);
|
||||
p.add_primitive(*op, cropPrim);
|
||||
} else {
|
||||
auto gatherPrim = cldnn::gather(layerName,
|
||||
reordered_inputs[0],
|
||||
reordered_inputs[1],
|
||||
axis,
|
||||
out_shape,
|
||||
batch_dim,
|
||||
support_neg_ind);
|
||||
p.add_primitive(*op, gatherPrim);
|
||||
}
|
||||
|
||||
// Add reorder and reshape for scalar indice
|
||||
if (need_reshape) {
|
||||
|
||||
@@ -559,4 +559,23 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
GatherLayerTest::getTestCaseName
|
||||
);
|
||||
|
||||
const auto GatherAxes0Optimized = []() {
|
||||
return testing::Combine(testing::ValuesIn({std::vector<size_t>{4, 8, 2, 2}}),
|
||||
testing::ValuesIn({std::vector<size_t>{}}),
|
||||
testing::ValuesIn({std::tuple<int, int>{0, 0}}),
|
||||
testing::ValuesIn(netPrecisionsFP32),
|
||||
testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||
testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||
testing::Values(InferenceEngine::Layout::ANY),
|
||||
testing::Values(InferenceEngine::Layout::ANY),
|
||||
testing::Values(CommonTestUtils::DEVICE_GPU));
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
smoke_Gather7Axes0Optimized,
|
||||
Gather8IndiceScalarLayerTest,
|
||||
GatherAxes0Optimized(),
|
||||
Gather8IndiceScalarLayerTest::getTestCaseName
|
||||
);
|
||||
|
||||
} // namespace
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
|
||||
#include "intel_gpu/graph/program.hpp"
|
||||
#include "data_inst.h"
|
||||
#include "crop_inst.h"
|
||||
#include "reshape_inst.h"
|
||||
#include "fully_connected_inst.h"
|
||||
#include "permute_inst.h"
|
||||
@@ -165,7 +166,7 @@ TEST(prepare_buffer_fusing, in_place_concat_static) {
|
||||
topology.add(concatenation("concat", { input_info("permute1"), input_info("permute2") }, 2));
|
||||
topology.add(permute("output", input_info("concat"), {0, 2, 3, 1}));
|
||||
|
||||
ExecutionConfig config;
|
||||
ExecutionConfig config = get_test_default_config(engine);
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
auto prog = program::build_program(engine, topology, config, false, false);
|
||||
ASSERT_NE(prog, nullptr);
|
||||
@@ -218,7 +219,7 @@ TEST(prepare_buffer_fusing, in_place_concat_dynamic) {
|
||||
topology.add(concatenation("concat", { input_info("permute1"), input_info("permute2") }, 2));
|
||||
topology.add(permute("output", input_info("concat"), {0, 2, 3, 1}));
|
||||
|
||||
ExecutionConfig config;
|
||||
ExecutionConfig config = get_test_default_config(engine);
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
|
||||
auto prog = program::build_program(engine, topology, config, false, false);
|
||||
@@ -273,7 +274,7 @@ TEST(prepare_buffer_fusing, in_place_concat_dynamic__static_dim_dyn_pad) {
|
||||
topology.add(concatenation("concat", { input_info("permute1"), input_info("permute2") }, 2));
|
||||
topology.add(permute("output", input_info("concat"), {0, 2, 3, 1}));
|
||||
|
||||
ExecutionConfig config;
|
||||
ExecutionConfig config = get_test_default_config(engine);
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
|
||||
auto prog = program::build_program(engine, topology, config, false, false);
|
||||
@@ -311,3 +312,54 @@ TEST(prepare_buffer_fusing, in_place_concat_dynamic__static_dim_dyn_pad) {
|
||||
ASSERT_EQ(ref_output[x], output_ptr[x]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(prepare_buffer_fusing, crop_b_axis) {
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, tensor{ 3, 2, 2, 2 } });
|
||||
|
||||
set_values(input1, {
|
||||
1.f, 2.f, 3.f, 4.f, 1.f, 2.f, 3.f, 4.f,
|
||||
5.f, 6.f, 7.f, 8.f, 5.f, 6.f, 7.f, 11.f,
|
||||
9.f, 10.f, 11.f, 12.f, 9.f, 10.f, 11.f, 12.f
|
||||
});
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("Input", input1->get_layout()));
|
||||
topology.add(crop("crop", input_info("Input"), tensor{1, 2, 2, 2}, tensor(1, 0, 0, 0)));
|
||||
topology.add(reorder("reorder", input_info("crop"), format::bfyx, data_types::i8));
|
||||
|
||||
ExecutionConfig config = get_test_default_config(engine);
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
network network(engine, topology, config);
|
||||
|
||||
network.set_input_data("Input", input1);
|
||||
|
||||
auto outputs = network.execute();
|
||||
|
||||
auto crop_prim = network.get_primitive("crop");
|
||||
ASSERT_EQ(crop_prim->can_be_optimized(), true);
|
||||
|
||||
auto output = outputs.at("reorder").get_memory();
|
||||
cldnn::mem_lock<int8_t> output_ptr(output, get_test_stream());
|
||||
|
||||
std::vector<int8_t> expected_results = {
|
||||
5, 6, 7, 8, 5, 6, 7, 11
|
||||
};
|
||||
|
||||
int crop_batch_num = 1;
|
||||
int crop_feature_num = 2;
|
||||
int crop_y_size = 2;
|
||||
int crop_x_size = 2;
|
||||
for (int b = 0; b < crop_batch_num; ++b) {
|
||||
for (int f = 0; f < crop_feature_num; ++f) {
|
||||
for (int y = 0; y < crop_y_size; ++y) {
|
||||
for (int x = 0; x < crop_x_size; ++x) {
|
||||
int linear_id = x + 2 * (y + 2 * f);
|
||||
int output_linear_id = x + crop_x_size * (y + crop_y_size * (f + crop_feature_num * b));
|
||||
ASSERT_EQ(output_ptr[output_linear_id], expected_results[linear_id]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -61,4 +61,13 @@ INSTANTIATE_TEST_SUITE_P(smoke, gather_test,
|
||||
},
|
||||
}));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(optimized, gather_test,
|
||||
testing::ValuesIn(std::vector<gather_test_params>{
|
||||
{
|
||||
layout{ov::PartialShape{3, 4, 2, 2}, data_types::f32, format::bfyx}, layout{ov::PartialShape{1}, data_types::f32, format::bfyx},
|
||||
0, 0,
|
||||
layout{ov::PartialShape{1, 4, 2, 2}, data_types::f32, format::bfyx}
|
||||
},
|
||||
}));
|
||||
|
||||
} // shape_infer_tests
|
||||
|
||||
@@ -1576,3 +1576,54 @@ TEST(crop_gpu, optimized_out_crop) {
|
||||
ASSERT_TRUE(all_primitives["crop1"] == "_optimized_");
|
||||
ASSERT_TRUE(all_primitives["crop2"] == "_optimized_");
|
||||
}
|
||||
|
||||
TEST(crop_single_axis, simple_Baxis) {
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, tensor{ 3, 2, 1, 2 } });
|
||||
|
||||
set_values(input1, {
|
||||
1.f, 2.f, 3.f, 4.f,
|
||||
5.f, 6.f, 7.f, 8.f,
|
||||
9.f, 10.f, 11.f, 12.f
|
||||
});
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("Input", input1->get_layout()));
|
||||
topology.add(crop("crop", input_info("Input"), tensor{1, 2, 1, 2}, tensor(1, 0, 0, 0)));
|
||||
topology.add(reorder("reorder", input_info("crop"), format::bfyx, data_types::i8));
|
||||
|
||||
ExecutionConfig config = get_test_default_config(engine);
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
network network(engine, topology, config);
|
||||
|
||||
network.set_input_data("Input", input1);
|
||||
|
||||
|
||||
auto outputs = network.execute();
|
||||
auto output = outputs.at("reorder").get_memory();
|
||||
cldnn::mem_lock<int8_t> output_ptr(output, get_test_stream());
|
||||
|
||||
std::vector<int8_t> expected_results = {
|
||||
5, 6, 7, 8
|
||||
};
|
||||
|
||||
int crop_batch_num = 1;
|
||||
int crop_feature_num = 2;
|
||||
int crop_y_size = 2;
|
||||
int crop_x_size = 1;
|
||||
for (int b = 0; b < crop_batch_num; ++b) {
|
||||
for (int f = 0; f < crop_feature_num; ++f) {
|
||||
for (int y = 0; y < crop_y_size; ++y) {
|
||||
for (int x = 0; x < crop_x_size; ++x) {
|
||||
int linear_id = x + y + 2 * f;
|
||||
int output_linear_id = x + crop_x_size * (y + crop_y_size * (f + crop_feature_num * b));
|
||||
ASSERT_EQ(output_ptr[output_linear_id], expected_results[linear_id]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto crop_prim = network.get_primitive("crop");
|
||||
ASSERT_EQ(crop_prim->can_be_optimized(), true);
|
||||
}
|
||||
|
||||
@@ -2097,3 +2097,64 @@ TEST(gather_gpu_u8, 322_axisF) {
|
||||
TEST(gather_gpu_u8, export_import) {
|
||||
test_gather_gpu_u8_322_axisF<uint8_t>(true);
|
||||
}
|
||||
|
||||
TEST(gather_single_axis, simple_Baxis) {
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx, tensor{ 3, 2, 1, 2 } }); // Dictionary
|
||||
auto input2 = engine.allocate_memory({ data_types::i32, format::bfyx, tensor{ 1 } }); // Indexes
|
||||
int64_t axis = 0;
|
||||
|
||||
set_values(input1, {
|
||||
1.f, 2.f, 3.f, 4.f,
|
||||
5.f, 6.f, 7.f, 8.f,
|
||||
9.f, 10.f, 11.f, 12.f
|
||||
});
|
||||
|
||||
set_values(input2, {
|
||||
1
|
||||
});
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("InputDictionary", input1->get_layout()));
|
||||
topology.add(input_layout("InputText", input2->get_layout()));
|
||||
topology.add(
|
||||
gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{1, 2, 2, 1})
|
||||
);
|
||||
topology.add(reorder("reorder", input_info("gather"), format::bfyx, data_types::i8));
|
||||
|
||||
ExecutionConfig config = get_test_default_config(engine);
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
network network(engine, topology, config);
|
||||
|
||||
network.set_input_data("InputDictionary", input1);
|
||||
network.set_input_data("InputText", input2);
|
||||
|
||||
auto outputs = network.execute();
|
||||
|
||||
auto output = outputs.at("reorder").get_memory();
|
||||
cldnn::mem_lock<int8_t> output_ptr(output, get_test_stream());
|
||||
|
||||
std::vector<int8_t> expected_results = {
|
||||
5, 6, 7, 8
|
||||
};
|
||||
|
||||
int crop_batch_num = 1;
|
||||
int crop_feature_num = 2;
|
||||
int crop_y_size = 2;
|
||||
int crop_x_size = 1;
|
||||
for (int b = 0; b < crop_batch_num; ++b) {
|
||||
for (int f = 0; f < crop_feature_num; ++f) {
|
||||
for (int y = 0; y < crop_y_size; ++y) {
|
||||
for (int x = 0; x < crop_x_size; ++x) {
|
||||
int linear_id = x + y + 2 * f;
|
||||
int output_linear_id = x + crop_x_size * (y + crop_y_size * (f + crop_feature_num * b));
|
||||
ASSERT_EQ(output_ptr[output_linear_id], expected_results[linear_id]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto crop_prim = network.get_primitive("gather");
|
||||
ASSERT_EQ(crop_prim->can_be_optimized(), false);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user