[GPU] Host time optimizations for in order queue (#11255)
* [GPU] Host time optimizations * Fix failed fusings_gpu/permute_eltwise_loop.basic/* tests
This commit is contained in:
parent
f13b6252e9
commit
cd703580b6
@ -102,7 +102,10 @@ public:
|
||||
}
|
||||
|
||||
network_output get_output(const primitive_id& output_id) {
|
||||
return network_output(get_primitive_event(output_id), get_output_memory(output_id), get_stream_ptr());
|
||||
event::ptr evt;
|
||||
if (get_stream().get_queue_type() == queue_types::out_of_order)
|
||||
evt = get_primitive_event(output_id);
|
||||
return network_output(evt, get_output_memory(output_id), get_stream_ptr());
|
||||
}
|
||||
|
||||
memory::ptr get_output_memory(const primitive_id& output_id);
|
||||
@ -133,8 +136,12 @@ public:
|
||||
}
|
||||
std::map<primitive_id, event::ptr> result;
|
||||
for (auto& id : primitive_ids) {
|
||||
if (std::find(optimized_primitives.begin(), optimized_primitives.end(), id) == optimized_primitives.end())
|
||||
if (std::find(optimized_primitives.begin(), optimized_primitives.end(), id) == optimized_primitives.end()) {
|
||||
if (has_event(id))
|
||||
result.emplace(id, get_primitive_event(id));
|
||||
else
|
||||
result.emplace(id, nullptr);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
@ -118,7 +118,9 @@ struct loop_impl : typed_primitive_impl<loop> {
|
||||
|
||||
loop_carried_dep.clear();
|
||||
for (const auto& backedge : node.get_back_edges()) {
|
||||
event::ptr body_event = body_network->get_primitive_event(backedge.from);
|
||||
event::ptr body_event;
|
||||
if (body_network->has_event(backedge.from))
|
||||
body_event = body_network->get_primitive_event(backedge.from);
|
||||
loop_carried_dep.emplace_back(body_event);
|
||||
}
|
||||
|
||||
|
@ -209,14 +209,6 @@ protected:
|
||||
if (profiling) {
|
||||
stream.finish();
|
||||
event->set();
|
||||
} else {
|
||||
// Create and set user event as complete
|
||||
event = stream.create_user_event(true);
|
||||
}
|
||||
|
||||
if (!event) {
|
||||
std::string error_msg = "Event was not created properly for " + instance.id();
|
||||
throw std::runtime_error(error_msg);
|
||||
}
|
||||
|
||||
return event;
|
||||
|
@ -700,6 +700,10 @@ void network::execute_impl(const std::vector<event::ptr>& events) {
|
||||
}
|
||||
}
|
||||
|
||||
// Store events only in case of OOO queue or enabled Profiling
|
||||
auto store_events = get_stream().get_queue_type() == queue_types::out_of_order ||
|
||||
get_engine().configuration().enable_profiling;
|
||||
if (store_events) {
|
||||
for (auto& inst : _program->get_processing_order()) {
|
||||
// Special handling for mutable data. The event should be the same as the user or dependency with highest
|
||||
// processing_num as the mutable_data can be updated when is both user or dependency.
|
||||
@ -729,6 +733,7 @@ void network::execute_impl(const std::vector<event::ptr>& events) {
|
||||
// them valid events manually
|
||||
_events[dout->id()] = get_stream().create_user_event(true);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto& prim : _primitives) {
|
||||
prim.second->reset_output_change();
|
||||
@ -829,17 +834,15 @@ std::vector<std::shared_ptr<primitive_inst>> network::get_primitives(const std::
|
||||
|
||||
void network::execute_primitive(const std::shared_ptr<primitive_inst>& primitive,
|
||||
const std::vector<event::ptr>& events) {
|
||||
auto id = primitive->id();
|
||||
auto it = _events.find(id);
|
||||
bool found = (it != _events.end());
|
||||
CLDNN_ERROR_BOOL(id,
|
||||
"Invalid primitive call ",
|
||||
found,
|
||||
"Primitive " + id + " is tried to be executed for the second time");
|
||||
|
||||
event::ptr ev = primitive->execute(events);
|
||||
|
||||
// Collect events only for OOO queue and Profiling mode
|
||||
if (get_stream().get_queue_type() == queue_types::out_of_order ||
|
||||
get_engine().configuration().enable_profiling) {
|
||||
auto id = primitive->id();
|
||||
_events.insert({id, ev});
|
||||
}
|
||||
}
|
||||
|
||||
void network::allocate_primitive_instance(program_node const& node) {
|
||||
if (_primitives.count(node.id()))
|
||||
|
@ -148,11 +148,13 @@ event::ptr primitive_inst::execute(const std::vector<event::ptr>& events) {
|
||||
return _impl->execute(events, *this);
|
||||
|
||||
std::vector<event::ptr> dependencies;
|
||||
auto queue_type = get_network().get_stream().get_queue_type();
|
||||
if (queue_type == queue_types::out_of_order) {
|
||||
dependencies.reserve(_exec_deps.size());
|
||||
for (auto& input : _exec_deps) {
|
||||
auto id = input->id();
|
||||
try {
|
||||
// if the requested event does not exits it means that it has not been executed, so the processing_order is
|
||||
// if the requested event does not exists it means that it has not been executed, so the processing_order is
|
||||
// wrong or synchronization failed.
|
||||
auto ev = get_network().get_primitive_event(id);
|
||||
dependencies.emplace_back(ev);
|
||||
@ -162,6 +164,7 @@ event::ptr primitive_inst::execute(const std::vector<event::ptr>& events) {
|
||||
CLDNN_ERROR_MESSAGE(id, temp);
|
||||
}
|
||||
}
|
||||
}
|
||||
return _impl->execute(dependencies, *this);
|
||||
}
|
||||
|
||||
|
@ -52,7 +52,7 @@ public:
|
||||
};
|
||||
|
||||
class permute_eltwise_loop: public LoopFusingTest {};
|
||||
TEST_P(permute_eltwise_loop, basic_taylor) {
|
||||
TEST_P(permute_eltwise_loop, basic) {
|
||||
auto p = GetParam();
|
||||
auto num_iteration_mem = engine.allocate_memory({data_types::i64, format::bfyx, {1, 1, 1, 1}});
|
||||
auto trip_count_mem = engine.allocate_memory({data_types::i64, format::bfyx, {1, 1, 1, 1}});
|
||||
|
Loading…
Reference in New Issue
Block a user