[GPU] Don't reorder weights when can reinterpret (#16714)
* [GPU] Don't reorder weights when can reinterpret * [GPU] Test fixes
This commit is contained in:
parent
4098434233
commit
f5e199c494
@ -324,17 +324,14 @@ void prepare_buffer_fusing::run(program& p) {
|
|||||||
If crop is before concat there can be padding mismtach, since concat changes padding.
|
If crop is before concat there can be padding mismtach, since concat changes padding.
|
||||||
*/
|
*/
|
||||||
auto can_optimize = [](const program_node* node) {
|
auto can_optimize = [](const program_node* node) {
|
||||||
bool is_dynamic = node->get_output_layout().is_dynamic();
|
bool is_dynamic = node->is_dynamic();
|
||||||
bool is_planar = format::is_default_format(node->get_output_layout().format);
|
bool is_planar = format::is_default_format(node->get_output_layout().format);
|
||||||
bool no_pad = !node->get_output_layout().data_padding && !node->get_input_layouts().empty() && !node->get_input_layouts()[0].data_padding;
|
bool no_pad = !node->get_output_layout().data_padding && !node->get_input_layouts().empty() && !node->get_input_layouts()[0].data_padding;
|
||||||
// The condition below check only output layout as cases like
|
|
||||||
// (dyn_shape) -> reshape -> (static_shape) -> some_static_primitive
|
|
||||||
// may have invalid set_arguments call as output memory of reshape won't be available until reshape primitive is executed
|
|
||||||
if (node->is_type<reshape>() && is_dynamic && is_planar && no_pad && !node->is_output() && !node->has_fused_primitives()) {
|
if (node->is_type<reshape>() && is_dynamic && is_planar && no_pad && !node->is_output() && !node->has_fused_primitives()) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (node->is_dynamic() || node->is_output() || node->has_fused_primitives()) {
|
if (is_dynamic || node->is_output() || node->has_fused_primitives()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
|
@ -642,9 +642,23 @@ void network::set_arguments() {
|
|||||||
return;
|
return;
|
||||||
|
|
||||||
for (auto const& prim : _exec_order) {
|
for (auto const& prim : _exec_order) {
|
||||||
if (!prim->is_dynamic())
|
if (!prim->is_dynamic()) {
|
||||||
|
bool can_set_args = true;
|
||||||
|
for (auto& dep : prim->dependencies()) {
|
||||||
|
// Skip set args for nodes with dynamic & optimized_out dependency
|
||||||
|
// This is needed to handle dynamic -> static cases like
|
||||||
|
// (dynamic) -> reshape -> (static) -> some_op
|
||||||
|
// In that case some_op is static and we may want to set arguments once,
|
||||||
|
// but dynamic optimized out reshape means that output buffer of reshape is unavailable
|
||||||
|
// and attempt to set args will fail.
|
||||||
|
if (dep.first->can_be_optimized() && dep.first->is_dynamic())
|
||||||
|
can_set_args = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (can_set_args)
|
||||||
prim->set_arguments();
|
prim->set_arguments();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
_reset_arguments = false;
|
_reset_arguments = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1308,7 +1322,7 @@ void network::allocate_primitive_instance(program_node const& node) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
if (dep.first->can_be_optimized()) {
|
if (dep.first->can_be_optimized()) {
|
||||||
if (is_mutable_input(*dep.first)) {
|
if (is_mutable_input(*dep.first) || dep.first->is_dynamic()) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -395,11 +395,13 @@ bool primitive_inst::update_impl() {
|
|||||||
impl->set_kernels(kernels);
|
impl->set_kernels(kernels);
|
||||||
cache.add(updated_params, impl->clone());
|
cache.add(updated_params, impl->clone());
|
||||||
});
|
});
|
||||||
|
if (!can_be_optimized()) {
|
||||||
_impl = _dynamic_impl->clone();
|
_impl = _dynamic_impl->clone();
|
||||||
auto new_impl_params = _impl->canonicalize_shapes(*_impl_params);
|
auto new_impl_params = _impl->canonicalize_shapes(*_impl_params);
|
||||||
_impl->update_dispatch_data(new_impl_params);
|
_impl->update_dispatch_data(new_impl_params);
|
||||||
|
|
||||||
update_shape_info(new_impl_params);
|
update_shape_info(new_impl_params);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
_impl = _node->type()->choose_impl(*_node, updated_params);
|
_impl = _node->type()->choose_impl(*_node, updated_params);
|
||||||
auto& kernels_cache = get_network().get_program()->get_kernels_cache();
|
auto& kernels_cache = get_network().get_program()->get_kernels_cache();
|
||||||
@ -715,22 +717,35 @@ event::ptr primitive_inst::update_weights() {
|
|||||||
if (!weightable_node)
|
if (!weightable_node)
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
|
||||||
|
auto& engine = _network.get_engine();
|
||||||
auto& weights_params = _impl->_weights_reorder_params;
|
auto& weights_params = _impl->_weights_reorder_params;
|
||||||
bool requires_reorder = weights_params.engine != kernel_selector::GenericKernelParams::Engine::NONE;
|
|
||||||
|
|
||||||
const auto weights_idx = _node->get_primitive()->input.size();
|
auto weights_idx = _node->get_primitive()->input.size();
|
||||||
const auto original_weights_memory = dep_memory_ptr(weights_idx);
|
auto original_weights_memory = dep_memory_ptr(weights_idx);
|
||||||
auto expected_layout = requires_reorder ? from_weights_tensor(weights_params.dest)
|
auto original_layout = original_weights_memory->get_layout();
|
||||||
: original_weights_memory->get_layout();
|
|
||||||
|
|
||||||
|
if (weights_params.engine == kernel_selector::GenericKernelParams::Engine::NONE) {
|
||||||
|
// If kernel doesn't says that it doesn't require weights reorder, but weights were reordered previously, then
|
||||||
|
// incorrect memory buffer may be assigned, so reset cached weights for such case
|
||||||
|
_reordered_weights_cache.add(original_weights_memory->get_layout(), original_weights_memory);
|
||||||
|
} else {
|
||||||
|
auto expected_layout = from_weights_tensor(weights_params.dest);
|
||||||
// Set original patrial shape, because it may be lost during kernel_selector::weights_tensor -> layout conversion
|
// Set original patrial shape, because it may be lost during kernel_selector::weights_tensor -> layout conversion
|
||||||
expected_layout.set_partial_shape(original_weights_memory->get_layout().get_partial_shape());
|
expected_layout.set_partial_shape(original_weights_memory->get_layout().get_partial_shape());
|
||||||
|
_impl_params->weights_layout = optional_layout(expected_layout);
|
||||||
|
|
||||||
if (requires_reorder && !_reordered_weights_cache.has(expected_layout)) {
|
if (_reordered_weights_cache.has(expected_layout)) {
|
||||||
|
GPU_DEBUG_PROFILED_STAGE_CACHE_HIT(true);
|
||||||
|
GPU_DEBUG_TRACE_DETAIL << id() << ": reuse weights for " << expected_layout.to_short_string() << std::endl;
|
||||||
|
return nullptr;
|
||||||
|
} else if (original_layout.compatible(expected_layout)) {
|
||||||
|
GPU_DEBUG_PROFILED_STAGE_CACHE_HIT(true);
|
||||||
|
GPU_DEBUG_TRACE_DETAIL << id() << ": reinterpret original weights memory from " << original_layout.to_short_string()
|
||||||
|
<< " to " << expected_layout.to_short_string() << std::endl;
|
||||||
|
_reordered_weights_cache.add(expected_layout, engine.reinterpret_buffer(*original_weights_memory, expected_layout));
|
||||||
|
return nullptr;
|
||||||
|
} else {
|
||||||
GPU_DEBUG_PROFILED_STAGE_CACHE_HIT(false);
|
GPU_DEBUG_PROFILED_STAGE_CACHE_HIT(false);
|
||||||
auto original_layout = original_weights_memory->get_layout();
|
|
||||||
auto& engine = _network.get_engine();
|
|
||||||
|
|
||||||
auto get_kernel_key = [&]() -> size_t {
|
auto get_kernel_key = [&]() -> size_t {
|
||||||
auto seed = _node->get_primitive()->hash();
|
auto seed = _node->get_primitive()->hash();
|
||||||
seed = hash_combine(seed, expected_layout.hash());
|
seed = hash_combine(seed, expected_layout.hash());
|
||||||
@ -766,7 +781,7 @@ event::ptr primitive_inst::update_weights() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (can_reuse) {
|
if (can_reuse) {
|
||||||
GPU_DEBUG_TRACE_DETAIL << id() << ": reuse weights memory" << std::endl;
|
GPU_DEBUG_TRACE_DETAIL << id() << ": reuse weights memory for new layout " << expected_layout.to_short_string() << std::endl;
|
||||||
weights_memory = engine.reinterpret_buffer(*weights_memory, expected_layout);
|
weights_memory = engine.reinterpret_buffer(*weights_memory, expected_layout);
|
||||||
} else {
|
} else {
|
||||||
GPU_DEBUG_TRACE_DETAIL << id() << ": allocate weights memory" << std::endl;
|
GPU_DEBUG_TRACE_DETAIL << id() << ": allocate weights memory" << std::endl;
|
||||||
@ -775,7 +790,6 @@ event::ptr primitive_inst::update_weights() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
_reordered_weights_cache.add(expected_layout, weights_memory);
|
_reordered_weights_cache.add(expected_layout, weights_memory);
|
||||||
_impl_params->weights_layout = optional_layout(expected_layout);
|
|
||||||
GPU_DEBUG_TRACE_DETAIL << id() << ": update weights cache: " << expected_layout.to_short_string() << " cache_size="
|
GPU_DEBUG_TRACE_DETAIL << id() << ": update weights cache: " << expected_layout.to_short_string() << " cache_size="
|
||||||
<< _reordered_weights_cache.size() << "/" << _reordered_weights_cache.capacity() << std::endl;
|
<< _reordered_weights_cache.size() << "/" << _reordered_weights_cache.capacity() << std::endl;
|
||||||
|
|
||||||
@ -791,14 +805,9 @@ event::ptr primitive_inst::update_weights() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
return ev;
|
return ev;
|
||||||
} else {
|
|
||||||
// If kernel doesn't says that it doesn't require weights reorder, but weights were reordered previously, then
|
|
||||||
// incorrect memory buffer may be assigned, so push front original memory in LRU cache
|
|
||||||
if (weights_params.engine == kernel_selector::GenericKernelParams::Engine::NONE) {
|
|
||||||
_reordered_weights_cache.add(expected_layout, original_weights_memory);
|
|
||||||
_impl_params->weights_layout = optional_layout(expected_layout);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
GPU_DEBUG_PROFILED_STAGE_CACHE_HIT(true);
|
GPU_DEBUG_PROFILED_STAGE_CACHE_HIT(true);
|
||||||
|
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
@ -893,6 +893,10 @@ EinsumDecomposition::EinsumDecomposition() {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (einsum_node->is_dynamic()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
auto equation = einsum_node->get_equation();
|
auto equation = einsum_node->get_equation();
|
||||||
std::vector<std::string> input_subscripts;
|
std::vector<std::string> input_subscripts;
|
||||||
std::string output_subscript;
|
std::string output_subscript;
|
||||||
|
@ -11,86 +11,6 @@
|
|||||||
namespace cldnn {
|
namespace cldnn {
|
||||||
static inline bool check_redundant_1d_along_feature(layout const& l1, layout const& l2);
|
static inline bool check_redundant_1d_along_feature(layout const& l1, layout const& l2);
|
||||||
namespace {
|
namespace {
|
||||||
// pair.first tells whether l1 and l2 are absolutely identical
|
|
||||||
// pair.second tells whether l1 and l2 can be reinterpreted to each other without need of reordering
|
|
||||||
// note: layouts can only be considered identical if data size described by both layouts match (so no data are genereted
|
|
||||||
// nor dropped) note: if layouts describe two buffers with different size, consider them not to be identical even if
|
|
||||||
// smaller buffer can be considered to hold subsequence of larger buffer,
|
|
||||||
// this behavior is required to force buffer allocation for smaller buffer which, currently, should always be
|
|
||||||
// performed
|
|
||||||
std::pair<bool, bool> are_layouts_identical(layout const& l1, layout const& l2) {
|
|
||||||
const auto& l1_pad = l1.data_padding;
|
|
||||||
const auto& l2_pad = l2.data_padding;
|
|
||||||
|
|
||||||
if (l1.is_dynamic() || l2.is_dynamic())
|
|
||||||
return {false, false};
|
|
||||||
|
|
||||||
auto l1_size = l1.get_tensor();
|
|
||||||
auto l2_size = l2.get_tensor();
|
|
||||||
if (l1 == l2)
|
|
||||||
return {true, true};
|
|
||||||
if (check_redundant_1d_along_feature(l1, l2))
|
|
||||||
return {false, true};
|
|
||||||
if (l1.data_type != l2.data_type)
|
|
||||||
return {false, false};
|
|
||||||
// Reorders between bfyx, bfzyx, bfwzyx can pe reinterpeted as reshape when
|
|
||||||
// there is no padding and both hold same number of elements.
|
|
||||||
if (format::is_default_format(l1.format) && format::is_default_format(l2.format) &&
|
|
||||||
!l1_pad && !l2_pad && l1.get_linear_size() == l2.get_linear_size())
|
|
||||||
return {false, true};
|
|
||||||
if (l1_size != l2_size)
|
|
||||||
return {false, false};
|
|
||||||
if (l1.get_linear_size() != l2.get_linear_size())
|
|
||||||
return {false, false};
|
|
||||||
|
|
||||||
auto check_format = [&l1, &l2](cldnn::format format) {
|
|
||||||
return (l1.format == format && l2.format != format) ||
|
|
||||||
(l2.format == format && l1.format != format);
|
|
||||||
};
|
|
||||||
|
|
||||||
if (check_format(format::b_fs_yx_fsv2) ||
|
|
||||||
check_format(format::b_fs_yx_fsv4) ||
|
|
||||||
check_format(format::fs_b_yx_fsv32) ||
|
|
||||||
check_format(format::b_fs_yx_fsv16) ||
|
|
||||||
check_format(format::b_fs_yx_fsv32) ||
|
|
||||||
check_format(format::b_fs_zyx_fsv2) ||
|
|
||||||
check_format(format::b_fs_zyx_fsv4) ||
|
|
||||||
check_format(format::b_fs_zyx_fsv32) ||
|
|
||||||
check_format(format::b_fs_zyx_fsv16) ||
|
|
||||||
check_format(format::bs_fs_yx_bsv4_fsv4) ||
|
|
||||||
check_format(format::bs_fs_yx_bsv8_fsv4) ||
|
|
||||||
check_format(format::bs_fs_zyx_bsv8_fsv4) ||
|
|
||||||
check_format(format::bs_fs_yx_bsv8_fsv2) ||
|
|
||||||
check_format(format::bs_fs_zyx_bsv8_fsv2) ||
|
|
||||||
check_format(format::bs_fs_yx_bsv4_fsv2) ||
|
|
||||||
check_format(format::bs_fs_yx_bsv32_fsv16) ||
|
|
||||||
check_format(format::bs_fs_yx_bsv32_fsv32) ||
|
|
||||||
check_format(format::bs_fs_yx_bsv16_fsv16) ||
|
|
||||||
check_format(format::bs_fs_yx_bsv16_fsv32) ||
|
|
||||||
check_format(format::bs_fs_zyx_bsv16_fsv32) ||
|
|
||||||
check_format(format::bs_fs_zyx_bsv16_fsv16) ||
|
|
||||||
check_format(format::bs_fs_zyx_bsv32_fsv16) ||
|
|
||||||
check_format(format::bs_fs_zyx_bsv32_fsv32))
|
|
||||||
return {false, false};
|
|
||||||
|
|
||||||
auto l1_pitch = l1.get_pitches();
|
|
||||||
auto l2_pitch = l2.get_pitches();
|
|
||||||
|
|
||||||
// ignore pitches which will never be used (for dims with size == 1)
|
|
||||||
for (size_t i = 0; i < tensor_dim_max; ++i)
|
|
||||||
if (l1_size.raw[i] == 1)
|
|
||||||
l1_pitch.raw[i] = 0;
|
|
||||||
for (size_t i = 0; i < tensor_dim_max; ++i)
|
|
||||||
if (l2_size.raw[i] == 1)
|
|
||||||
l2_pitch.raw[i] = 0;
|
|
||||||
|
|
||||||
auto l1_offset = l1.get_linear_offset();
|
|
||||||
auto l2_offset = l2.get_linear_offset();
|
|
||||||
if (l1_pitch == l2_pitch && l1_offset == l2_offset)
|
|
||||||
return {false, true};
|
|
||||||
|
|
||||||
return {false, false};
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<cldnn::tensor::value_type> convert_dimensions(const std::vector<cldnn::tensor::value_type>& sizes, std::string in_order, std::string out_order) {
|
std::vector<cldnn::tensor::value_type> convert_dimensions(const std::vector<cldnn::tensor::value_type>& sizes, std::string in_order, std::string out_order) {
|
||||||
std::vector<cldnn::tensor::value_type> new_sizes(out_order.size(), {-1});
|
std::vector<cldnn::tensor::value_type> new_sizes(out_order.size(), {-1});
|
||||||
@ -497,12 +417,100 @@ layout layout::with_padding(padding const& padd) const {
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// tells whether l1 and l2 can be reinterpreted to each other without need of reordering
|
||||||
|
// note: layouts can only be considered identical if data size described by both layouts match (so no data are genereted
|
||||||
|
// nor dropped) note: if layouts describe two buffers with different size, consider them not to be identical even if
|
||||||
|
// smaller buffer can be considered to hold subsequence of larger buffer,
|
||||||
|
// this behavior is required to force buffer allocation for smaller buffer which, currently, should always be
|
||||||
|
// performed
|
||||||
bool layout::compatible(const layout& other) const {
|
bool layout::compatible(const layout& other) const {
|
||||||
return are_layouts_identical(*this, other).second;
|
auto& l1 = *this;
|
||||||
|
auto& l2 = other;
|
||||||
|
const auto& l1_pad = l1.data_padding;
|
||||||
|
const auto& l2_pad = l2.data_padding;
|
||||||
|
|
||||||
|
if (l1.is_dynamic() || l2.is_dynamic())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
auto l1_size = l1.get_tensor();
|
||||||
|
auto l2_size = l2.get_tensor();
|
||||||
|
if (l1 == l2)
|
||||||
|
return true;
|
||||||
|
if (check_redundant_1d_along_feature(l1, l2))
|
||||||
|
return true;
|
||||||
|
if (l1.data_type != l2.data_type)
|
||||||
|
return false;
|
||||||
|
// Reorders between bfyx, bfzyx, bfwzyx can be reinterpeted as reshape when
|
||||||
|
// there is no padding and both hold same number of elements.
|
||||||
|
if (format::is_default_format(l1.format) && format::is_default_format(l2.format) &&
|
||||||
|
!l1_pad && !l2_pad && l1.get_linear_size() == l2.get_linear_size())
|
||||||
|
return true;
|
||||||
|
if (l1_size != l2_size)
|
||||||
|
return false;
|
||||||
|
if (l1.get_linear_size() != l2.get_linear_size())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
auto check_format = [&l1, &l2](cldnn::format format) {
|
||||||
|
return (l1.format == format && l2.format != format) ||
|
||||||
|
(l2.format == format && l1.format != format);
|
||||||
|
};
|
||||||
|
|
||||||
|
const auto& blocks1 = format::block_sizes(l1.format);
|
||||||
|
const auto& blocks2 = format::block_sizes(l2.format);
|
||||||
|
|
||||||
|
// TODO: Relax restrictions below
|
||||||
|
if (blocks1 != blocks2 ||
|
||||||
|
(!blocks1.empty() && format::traits(l1.format)._order != format::traits(l2.format)._order))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (check_format(format::b_fs_yx_fsv2) ||
|
||||||
|
check_format(format::b_fs_yx_fsv4) ||
|
||||||
|
check_format(format::fs_b_yx_fsv32) ||
|
||||||
|
check_format(format::b_fs_yx_fsv16) ||
|
||||||
|
check_format(format::b_fs_yx_fsv32) ||
|
||||||
|
check_format(format::b_fs_zyx_fsv2) ||
|
||||||
|
check_format(format::b_fs_zyx_fsv4) ||
|
||||||
|
check_format(format::b_fs_zyx_fsv32) ||
|
||||||
|
check_format(format::b_fs_zyx_fsv16) ||
|
||||||
|
check_format(format::bs_fs_yx_bsv4_fsv4) ||
|
||||||
|
check_format(format::bs_fs_yx_bsv8_fsv4) ||
|
||||||
|
check_format(format::bs_fs_zyx_bsv8_fsv4) ||
|
||||||
|
check_format(format::bs_fs_yx_bsv8_fsv2) ||
|
||||||
|
check_format(format::bs_fs_zyx_bsv8_fsv2) ||
|
||||||
|
check_format(format::bs_fs_yx_bsv4_fsv2) ||
|
||||||
|
check_format(format::bs_fs_yx_bsv32_fsv16) ||
|
||||||
|
check_format(format::bs_fs_yx_bsv32_fsv32) ||
|
||||||
|
check_format(format::bs_fs_yx_bsv16_fsv16) ||
|
||||||
|
check_format(format::bs_fs_yx_bsv16_fsv32) ||
|
||||||
|
check_format(format::bs_fs_zyx_bsv16_fsv32) ||
|
||||||
|
check_format(format::bs_fs_zyx_bsv16_fsv16) ||
|
||||||
|
check_format(format::bs_fs_zyx_bsv32_fsv16) ||
|
||||||
|
check_format(format::bs_fs_zyx_bsv32_fsv32))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
auto l1_pitch = l1.get_pitches();
|
||||||
|
auto l2_pitch = l2.get_pitches();
|
||||||
|
|
||||||
|
// ignore pitches which will never be used (for dims with size == 1)
|
||||||
|
for (size_t i = 0; i < tensor_dim_max; ++i)
|
||||||
|
if (l1_size.raw[i] == 1)
|
||||||
|
l1_pitch.raw[i] = 0;
|
||||||
|
for (size_t i = 0; i < tensor_dim_max; ++i)
|
||||||
|
if (l2_size.raw[i] == 1)
|
||||||
|
l2_pitch.raw[i] = 0;
|
||||||
|
|
||||||
|
auto l1_offset = l1.get_linear_offset();
|
||||||
|
auto l2_offset = l2.get_linear_offset();
|
||||||
|
if (l1_pitch == l2_pitch && l1_offset == l2_offset)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool layout::identical(const layout& other) const {
|
bool layout::identical(const layout& other) const {
|
||||||
return are_layouts_identical(*this, other).first;
|
if (is_dynamic() || other.is_dynamic())
|
||||||
|
return false;
|
||||||
|
return *this == other;
|
||||||
}
|
}
|
||||||
|
|
||||||
ov::PartialShape layout::transform(const ov::PartialShape& pshape, cldnn::format old_fmt, cldnn::format new_fmt) {
|
ov::PartialShape layout::transform(const ov::PartialShape& pshape, cldnn::format old_fmt, cldnn::format new_fmt) {
|
||||||
|
@ -196,8 +196,8 @@ class layout_cmp_test : public testing::TestWithParam<layouts_cmp_test_params> {
|
|||||||
TEST_P(layout_cmp_test, basic) {
|
TEST_P(layout_cmp_test, basic) {
|
||||||
auto p = GetParam();
|
auto p = GetParam();
|
||||||
|
|
||||||
EXPECT_EQ(p.l1.identical(p.l2), p.is_identical);
|
EXPECT_EQ(p.l1.identical(p.l2), p.is_identical) << p.l1.to_short_string() << " -> " << p.l2.to_short_string();
|
||||||
EXPECT_EQ(p.l1.compatible(p.l2), p.is_compatible);
|
EXPECT_EQ(p.l1.compatible(p.l2), p.is_compatible) << p.l1.to_short_string() << " -> " << p.l2.to_short_string();
|
||||||
}
|
}
|
||||||
|
|
||||||
INSTANTIATE_TEST_SUITE_P(smoke, layout_cmp_test,
|
INSTANTIATE_TEST_SUITE_P(smoke, layout_cmp_test,
|
||||||
@ -209,11 +209,35 @@ INSTANTIATE_TEST_SUITE_P(smoke, layout_cmp_test,
|
|||||||
{layout{ov::PartialShape{1, 2, 3, 4}, data_types::f32, format::bfyx},
|
{layout{ov::PartialShape{1, 2, 3, 4}, data_types::f32, format::bfyx},
|
||||||
layout{ov::PartialShape{1, 2, 3, 4}, data_types::f16, format::bfyx}, false, false},
|
layout{ov::PartialShape{1, 2, 3, 4}, data_types::f16, format::bfyx}, false, false},
|
||||||
{layout{ov::PartialShape{1, 2, 3, 4}, data_types::f16, format::bfyx},
|
{layout{ov::PartialShape{1, 2, 3, 4}, data_types::f16, format::bfyx},
|
||||||
layout{ov::PartialShape{1, 2, 3, 4, 1}, data_types::f16, format::bfzyx}, false, true},
|
layout{ov::PartialShape{1, 2, 1, 3, 4}, data_types::f16, format::bfzyx}, false, true},
|
||||||
{layout{ov::PartialShape{1, 2, 3, 4}, data_types::f16, format::bfyx},
|
{layout{ov::PartialShape{1, 2, 3, 4}, data_types::f16, format::bfyx},
|
||||||
layout{ov::PartialShape{1, 2, 3, 4, 1, 1}, data_types::f16, format::bfwzyx}, false, true},
|
layout{ov::PartialShape{1, 2, 3, 4, 1, 1}, data_types::f16, format::bfwzyx}, false, true},
|
||||||
|
{layout{ov::PartialShape{1, 2, 3, 4, 1, 1}, data_types::f16, format::bfwzyx},
|
||||||
|
layout{ov::PartialShape{1, 2, 3, 4}, data_types::f16, format::bfyx}, false, true},
|
||||||
|
{layout{ov::PartialShape{1, 2, 3, 4}, data_types::f16, format::bfyx},
|
||||||
|
layout{ov::PartialShape{1, 2, 1, 1, 3, 4}, data_types::f16, format::bfwzyx}, false, true},
|
||||||
{layout{ov::PartialShape{1, 32, 4, 4}, data_types::f32, format::b_fs_yx_fsv32, padding({0, 0, 1, 1}, 0)},
|
{layout{ov::PartialShape{1, 32, 4, 4}, data_types::f32, format::b_fs_yx_fsv32, padding({0, 0, 1, 1}, 0)},
|
||||||
layout{ov::PartialShape{1, 32, 4, 4}, data_types::f32, format::b_fs_yx_fsv32, padding({0, 0, 0, 0}, 0)}, false, false},
|
layout{ov::PartialShape{1, 32, 4, 4}, data_types::f32, format::b_fs_yx_fsv32, padding({0, 0, 0, 0}, 0)}, false, false},
|
||||||
{layout{ov::PartialShape{1, 32, 4, 4}, data_types::f32, format::b_fs_yx_fsv32, padding({0, 0, 1, 1}, 0)},
|
{layout{ov::PartialShape{1, 32, 4, 4}, data_types::f32, format::b_fs_yx_fsv32, padding({0, 0, 1, 1}, 0)},
|
||||||
layout{ov::PartialShape{1, 32, 4, 4}, data_types::f32, format::b_fs_yx_fsv32, padding({0, 0, 1, 1}, 0)}, true, true},
|
layout{ov::PartialShape{1, 32, 4, 4}, data_types::f32, format::b_fs_yx_fsv32, padding({0, 0, 1, 1}, 0)}, true, true},
|
||||||
|
{layout{ov::PartialShape{10, 20}, data_types::f16, format::bfyx},
|
||||||
|
layout{ov::PartialShape{10, 20}, data_types::f16, format::os_iyx_osv16}, false, false},
|
||||||
|
{layout{ov::PartialShape{1, 2, 3, 4}, data_types::f16, format::bfyx},
|
||||||
|
layout{ov::PartialShape{1, 2, 3, 4}, data_types::f16, format::oiyx}, false, true},
|
||||||
|
{layout{ov::PartialShape{128, 10}, data_types::f16, format::bfyx},
|
||||||
|
layout{ov::PartialShape{128, 10}, data_types::f16, format::os_iyx_osv32}, false, false},
|
||||||
|
{layout{ov::PartialShape{1, 2, 3, 4}, data_types::f16, format::bfyx},
|
||||||
|
layout{ov::PartialShape{1, 2, 3, 4}, data_types::f16, format::yxfb}, false, false},
|
||||||
|
{layout{ov::PartialShape{1, 2, 1, 1}, data_types::f16, format::bfyx},
|
||||||
|
layout{ov::PartialShape{1, 2, 1, 1}, data_types::f16, format::b_fs_yx_fsv16}, false, false},
|
||||||
|
{layout{ov::PartialShape{1, 2, 1, 1, 1}, data_types::f16, format::b_fs_zyx_fsv16},
|
||||||
|
layout{ov::PartialShape{1, 2, 1, 1}, data_types::f16, format::b_fs_yx_fsv16}, false, false},
|
||||||
|
{layout{ov::PartialShape{4, 2, 3, 4, 5}, data_types::f16, format::os_is_zyx_isv16_osv16},
|
||||||
|
layout{ov::PartialShape{4, 2, 3, 4, 5}, data_types::f16, format::is_os_zyx_isv16_osv16}, false, false},
|
||||||
|
{layout{ov::PartialShape{4, 2, 3, 4, 5}, data_types::f16, format::g_os_yx_is_osv8_isv2},
|
||||||
|
layout{ov::PartialShape{4, 2, 3, 4, 5}, data_types::f16, format::g_os_y_is_x_osv8_isv2}, false, false},
|
||||||
|
{layout{ov::PartialShape{4, 2, 3, 4, 5}, data_types::f16, format::goiyx},
|
||||||
|
layout{ov::PartialShape{4, 2, 3, 4, 5}, data_types::f16, format::gioyx}, false, false},
|
||||||
|
{layout{ov::PartialShape{9, 17, 3, 2, 5}, data_types::f16, format::is_os_zyx_isa8_osv8_isv2},
|
||||||
|
layout{ov::PartialShape{9, 17, 3, 2, 5}, data_types::f16, format::os_is_zyx_isa8_osv8_isv2}, false, false},
|
||||||
}));
|
}));
|
||||||
|
@ -85,7 +85,7 @@ TEST(prepare_buffer_fusing, static_node_after_optimized_out_dyn_reshape) {
|
|||||||
program_wrapper::apply_opt_pass<prepare_buffer_fusing>(*prog);
|
program_wrapper::apply_opt_pass<prepare_buffer_fusing>(*prog);
|
||||||
program_wrapper::apply_opt_pass<compile_graph>(*prog);
|
program_wrapper::apply_opt_pass<compile_graph>(*prog);
|
||||||
ASSERT_NO_THROW(prog->get_node("reshape"));
|
ASSERT_NO_THROW(prog->get_node("reshape"));
|
||||||
ASSERT_FALSE(prog->get_node("reshape").can_be_optimized());
|
ASSERT_TRUE(prog->get_node("reshape").can_be_optimized());
|
||||||
program_wrapper::apply_opt_pass<build_implementations>(*prog);
|
program_wrapper::apply_opt_pass<build_implementations>(*prog);
|
||||||
|
|
||||||
ASSERT_TRUE(has_node_with_type<reshape>(*prog));
|
ASSERT_TRUE(has_node_with_type<reshape>(*prog));
|
||||||
|
@ -11,6 +11,8 @@
|
|||||||
#include <intel_gpu/primitives/input_layout.hpp>
|
#include <intel_gpu/primitives/input_layout.hpp>
|
||||||
#include <intel_gpu/primitives/eltwise.hpp>
|
#include <intel_gpu/primitives/eltwise.hpp>
|
||||||
|
|
||||||
|
#include "reshape_inst.h"
|
||||||
|
|
||||||
using namespace cldnn;
|
using namespace cldnn;
|
||||||
using namespace ::tests;
|
using namespace ::tests;
|
||||||
using namespace testing;
|
using namespace testing;
|
||||||
@ -942,6 +944,54 @@ TEST(reshape_gpu_f32, basic_runtime_dynamic_shape_with_const_optimized_out) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(reshape_gpu_f32, basic_dynamic_shape_to_static_optimized_out) {
|
||||||
|
auto& engine = get_test_engine();
|
||||||
|
|
||||||
|
auto input = engine.allocate_memory(layout{ov::PartialShape{2, 10}, data_types::f32, format::bfyx});
|
||||||
|
topology topology;
|
||||||
|
topology.add(input_layout("input", layout{ov::PartialShape::dynamic(2), data_types::f32, format::bfyx}));
|
||||||
|
topology.add(reshape("reshape", input_info("input"), false, {2, 10}, {2, 10}));
|
||||||
|
topology.add(reduce("reduce", input_info("reshape"), reduce_mode::max, {1}, true));
|
||||||
|
|
||||||
|
// clang-format off
|
||||||
|
std::vector<float> input_data = {
|
||||||
|
0.0, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f,
|
||||||
|
0.0, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f,
|
||||||
|
};
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
|
set_values(input, input_data);
|
||||||
|
|
||||||
|
ExecutionConfig config = get_test_default_config(engine);
|
||||||
|
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
|
||||||
|
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||||
|
network network(engine, topology, config);
|
||||||
|
network.set_input_data("input", input);
|
||||||
|
auto outputs = network.execute();
|
||||||
|
|
||||||
|
ASSERT_TRUE(network.get_primitive("reshape")->can_be_optimized());
|
||||||
|
|
||||||
|
ASSERT_EQ(outputs.size(), size_t(1));
|
||||||
|
ASSERT_EQ(outputs.begin()->first, "reduce");
|
||||||
|
|
||||||
|
auto output = outputs.at("reduce").get_memory();
|
||||||
|
|
||||||
|
ASSERT_EQ(output->get_layout().data_type, input->get_layout().data_type);
|
||||||
|
ASSERT_EQ(output->get_layout().format, format::bfyx);
|
||||||
|
ASSERT_TRUE(output->get_layout().is_static());
|
||||||
|
ov::PartialShape expected_shape = {2, 1};
|
||||||
|
ASSERT_EQ(output->get_layout().get_partial_shape(), expected_shape);
|
||||||
|
|
||||||
|
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
|
||||||
|
std::vector<float> expected_res = {9.f, 9.f};
|
||||||
|
ASSERT_EQ(output_ptr.size(), expected_res.size());
|
||||||
|
|
||||||
|
|
||||||
|
for (size_t i = 0; i < expected_res.size(); i++) {
|
||||||
|
ASSERT_EQ(expected_res[i], output_ptr[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef RUN_ALL_MODEL_CACHING_TESTS
|
#ifdef RUN_ALL_MODEL_CACHING_TESTS
|
||||||
TEST(reshape_gpu_f32, basic_2dim_in_place_cached) {
|
TEST(reshape_gpu_f32, basic_2dim_in_place_cached) {
|
||||||
generic_reshape_test<float>(
|
generic_reshape_test<float>(
|
||||||
|
Loading…
Reference in New Issue
Block a user