[GPU] Reduce host time overhead in ReadValue execution for stateful model (#21521)
This commit is contained in:
parent
3fb60dc41c
commit
71e7015d39
@ -14,6 +14,7 @@
|
|||||||
#include "resample_inst.h"
|
#include "resample_inst.h"
|
||||||
#include "loop_inst.h"
|
#include "loop_inst.h"
|
||||||
#include "strided_slice_inst.h"
|
#include "strided_slice_inst.h"
|
||||||
|
#include "shape_of_inst.h"
|
||||||
#include "non_max_suppression_inst.h"
|
#include "non_max_suppression_inst.h"
|
||||||
#include "experimental_detectron_roi_feature_extractor_inst.hpp"
|
#include "experimental_detectron_roi_feature_extractor_inst.hpp"
|
||||||
#include "border_inst.h"
|
#include "border_inst.h"
|
||||||
@ -407,6 +408,19 @@ static bool can_crop_be_optimized_along_batch(const crop_node& node) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool can_read_value_be_optimize(const read_value_node& node) {
|
||||||
|
if (node.get_users().size() == 1)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
const auto non_shape_of_users_count = std::count_if(node.get_users().begin(), node.get_users().end(), [](const program_node* user) {
|
||||||
|
return !user->is_type<shape_of>();
|
||||||
|
});
|
||||||
|
if (non_shape_of_users_count <= 1)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
static void propagate_padding_to_opt_out_users(program_node& node, cldnn::padding padding_data) {
|
static void propagate_padding_to_opt_out_users(program_node& node, cldnn::padding padding_data) {
|
||||||
if (padding_data == cldnn::padding())
|
if (padding_data == cldnn::padding())
|
||||||
return;
|
return;
|
||||||
@ -632,10 +646,10 @@ void prepare_buffer_fusing::run(program& p) {
|
|||||||
// ┌────┴──────┐
|
// ┌────┴──────┐
|
||||||
// │ Result │
|
// │ Result │
|
||||||
// └───────────┘
|
// └───────────┘
|
||||||
// If read_value here returns virable memory w/o copy, then based on Add-s and Assign execution order we may have different results
|
// If read_value here returns variable memory w/o copy, then based on Add-s and Assign execution order we may have different results
|
||||||
// TODO: Allow optimizations for the case above too. Looks like it can be achieved by more careful
|
// TODO: Allow optimizations for the case above too. Looks like it can be achieved by more careful
|
||||||
// topological sort (i.e. if we ensure that all read_value users are completed before assign is run)
|
// topological sort (i.e. if we ensure that all read_value users are completed before assign is run)
|
||||||
node.can_be_optimized(node.get_users().size() == 1);
|
node.can_be_optimized(can_read_value_be_optimize(node));
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -445,10 +445,17 @@ event::ptr primitive_inst::realloc_if_needed() {
|
|||||||
if (_node->is_type<input_layout>())
|
if (_node->is_type<input_layout>())
|
||||||
return ev;
|
return ev;
|
||||||
|
|
||||||
|
// read_value/assign nodes are supposed to always use variable memory
|
||||||
if (auto stateful_prim = dynamic_cast<memory_state::variable*>(this)) {
|
if (auto stateful_prim = dynamic_cast<memory_state::variable*>(this)) {
|
||||||
std::string variable_id = stateful_prim->variable_id();
|
std::string variable_id = stateful_prim->variable_id();
|
||||||
auto variable = get_network().get_variable(variable_id);
|
auto variable = get_network().get_variable(variable_id);
|
||||||
variable.set_layout(actual_layout);
|
variable.set_layout(actual_layout);
|
||||||
|
GPU_DEBUG_TRACE_DETAIL << id() << ": use variable memory " << variable.get_memory()
|
||||||
|
<< " (size=" << variable.get_memory()->size() << ")" << std::endl;
|
||||||
|
// For nodes that can be optimized, variable memory is used as output memory
|
||||||
|
// so there is no need for output memory reallocation
|
||||||
|
if (can_be_optimized())
|
||||||
|
return ev;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update output layout with respect to FC's fake alignment
|
// Update output layout with respect to FC's fake alignment
|
||||||
|
Loading…
Reference in New Issue
Block a user