[GPU] Remove clFinish call from USM memory lock function (#17830)
This commit is contained in:
parent
43d67b0a32
commit
5afbd4cf92
@ -14,6 +14,8 @@
|
||||
#include <vector>
|
||||
#include <set>
|
||||
|
||||
#include "openvino/core/except.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
|
||||
struct primitive;
|
||||
@ -137,7 +139,8 @@ inline derived_type* downcast(base_type* base) {
|
||||
if (auto casted = dynamic_cast<derived_type*>(base))
|
||||
return casted;
|
||||
|
||||
throw std::runtime_error("Unable to cast pointer from base to derived type");
|
||||
OPENVINO_THROW("Unable to cast pointer from base (", typeid(base_type).name(), ") ",
|
||||
"type to derived (", typeid(derived_type).name(), ") type");
|
||||
}
|
||||
|
||||
template <typename derived_type, typename base_type, typename std::enable_if<std::is_base_of<base_type, derived_type>::value, int>::type = 0>
|
||||
|
@ -55,7 +55,7 @@ struct loop_impl : typed_primitive_impl<loop> {
|
||||
instance.preprocess_input_memory();
|
||||
instance.preprocess_backedge_memory();
|
||||
|
||||
// set input data for current_iteration primitive if current_it`eration is used
|
||||
// set input data for current_iteration primitive if current_iteration is used
|
||||
if (!primitive->current_iteration_id.empty()) {
|
||||
auto current_iteration_prim = body_network->get_primitive(primitive->current_iteration_id);
|
||||
auto input_layout_prim = std::dynamic_pointer_cast<input_layout_inst>(current_iteration_prim);
|
||||
@ -91,6 +91,14 @@ struct loop_impl : typed_primitive_impl<loop> {
|
||||
const auto& concatenated_input_mem_mappings = instance.concatenated_input_mem_mappings;
|
||||
const auto& concatenated_output_mem_mappings = instance.concatenated_output_mem_mappings;
|
||||
|
||||
// If there are concatenated_output_mem_mappings or backedge_memory_mappings we need to wait for
|
||||
// previous tasks before accessing memory in get_sliced_mem() and setup_iteration() functions
|
||||
if (!concatenated_input_mem_mappings.empty() || !instance.backedge_memory_mappings.empty()) {
|
||||
for (auto e : events) {
|
||||
e->wait();
|
||||
}
|
||||
}
|
||||
|
||||
// Set sliced input data
|
||||
for (size_t i = 0; i < concatenated_input_mem_mappings.size(); ++i) {
|
||||
const auto& concatenated_input = concatenated_input_mem_mappings.at(i);
|
||||
|
@ -366,7 +366,6 @@ void* gpu_usm::lock(const stream& stream, mem_lock_type type) {
|
||||
std::lock_guard<std::mutex> locker(_mutex);
|
||||
if (0 == _lock_count) {
|
||||
auto& cl_stream = downcast<const ocl_stream>(stream);
|
||||
cl_stream.finish(); // Synchronization needed for OOOQ.
|
||||
if (get_allocation_type() == allocation_type::usm_device) {
|
||||
if (type != mem_lock_type::read) {
|
||||
throw std::runtime_error("Unable to lock allocation_type::usm_device with write lock_type.");
|
||||
|
@ -84,8 +84,8 @@ public:
|
||||
ASSERT_EQ(outputs_ref.size(), outputs_fused.size());
|
||||
ASSERT_EQ(outputs_ref.size(), size_t(1));
|
||||
|
||||
auto val_ref=get_output_values_to_float(not_fused, outputs_ref.begin()->first);
|
||||
auto val_opt=get_output_values_to_float(fused, outputs_fused.begin()->first);
|
||||
auto val_ref = get_output_values_to_float(not_fused, outputs_ref.begin()->second);
|
||||
auto val_opt = get_output_values_to_float(fused, outputs_fused.begin()->second);
|
||||
ASSERT_EQ(val_ref.size(), val_opt.size());
|
||||
for (size_t i = 0; i < val_ref.size(); i++) {
|
||||
ASSERT_NEAR(val_ref[i], val_opt[i], tolerance)
|
||||
|
@ -9292,12 +9292,13 @@ TEST_P(convolution_gpu_onednn, conv_onednn_cases) {
|
||||
network network(engine, topology, config);
|
||||
|
||||
network.set_input_data("input", input_mem);
|
||||
network.execute();
|
||||
auto outputs = network.execute();
|
||||
ASSERT_EQ(outputs.size(), size_t(1));
|
||||
|
||||
for (auto& p : network.get_primitives_info())
|
||||
std::cerr << p.original_id << " " << p.kernel_id << std::endl;
|
||||
|
||||
auto out_ptr = get_output_values_to_float<FLOAT16>(network, "conv_fsv");
|
||||
auto out_ptr = get_output_values_to_float<FLOAT16>(network, outputs.begin()->second);
|
||||
auto out_lay = network.get_node_output_layout("conv_fsv");
|
||||
ASSERT_EQ(out_lay.batch(), expected_result.size());
|
||||
ASSERT_EQ(out_lay.feature(), expected_result[0].size());
|
||||
|
@ -1406,8 +1406,8 @@ public:
|
||||
ASSERT_EQ(outputs_ocl.size(), outputs_onednn.size());
|
||||
ASSERT_EQ(outputs_ocl.size(), size_t(1));
|
||||
|
||||
auto val_ocl = get_output_values_to_float(network_ocl, outputs_ocl.begin()->first);
|
||||
auto val_onednn = get_output_values_to_float(network_onednn, outputs_onednn.begin()->first);
|
||||
auto val_ocl = get_output_values_to_float(network_ocl, outputs_ocl.begin()->second);
|
||||
auto val_onednn = get_output_values_to_float(network_onednn, outputs_onednn.begin()->second);
|
||||
|
||||
ASSERT_EQ(val_ocl.size(), val_onednn.size());
|
||||
|
||||
|
@ -579,13 +579,9 @@ T div_up(const T a, const U b) {
|
||||
}
|
||||
|
||||
template <class T>
|
||||
std::vector<float> get_output_values_to_float(cldnn::network& net, const cldnn::primitive_id& output_id, size_t max_cnt = std::numeric_limits<size_t>::max()) {
|
||||
std::vector<float> get_output_values_to_float(cldnn::network& net, const cldnn::network_output& output, size_t max_cnt = std::numeric_limits<size_t>::max()) {
|
||||
std::vector<float> ret;
|
||||
auto ptr = net.get_output_memory(output_id);
|
||||
auto out_ids = net.get_output_ids();
|
||||
if (find(out_ids.begin(), out_ids.end(), output_id) == out_ids.end())
|
||||
IE_THROW() << "Non output node's memory may have been reused. "
|
||||
"Make target node to output by using ov::intel_gpu::custom_outputs in ExecutionConfig.";
|
||||
auto ptr = output.get_memory();
|
||||
cldnn::mem_lock<T, cldnn::mem_lock_type::read> mem(ptr, net.get_stream());
|
||||
if (ptr->get_layout().data_type != cldnn::type_to_data_type<T>::value)
|
||||
IE_THROW() << "target type " << cldnn::data_type_traits::name(cldnn::type_to_data_type<T>::value)
|
||||
@ -595,20 +591,20 @@ std::vector<float> get_output_values_to_float(cldnn::network& net, const cldnn::
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline std::vector<float> get_output_values_to_float(cldnn::network& net, const cldnn::primitive_id& output_id, size_t max_cnt = std::numeric_limits<size_t>::max()) {
|
||||
switch(net.get_output_layout(output_id).data_type){
|
||||
inline std::vector<float> get_output_values_to_float(cldnn::network& net, const cldnn::network_output& output, size_t max_cnt = std::numeric_limits<size_t>::max()) {
|
||||
switch(output.get_layout().data_type){
|
||||
case cldnn::data_types::f16:
|
||||
return get_output_values_to_float<FLOAT16>(net, output_id, max_cnt);
|
||||
return get_output_values_to_float<FLOAT16>(net, output, max_cnt);
|
||||
case cldnn::data_types::f32:
|
||||
return get_output_values_to_float<float>(net, output_id, max_cnt);
|
||||
return get_output_values_to_float<float>(net, output, max_cnt);
|
||||
case cldnn::data_types::i8:
|
||||
return get_output_values_to_float<int8_t>(net, output_id, max_cnt);
|
||||
return get_output_values_to_float<int8_t>(net, output, max_cnt);
|
||||
case cldnn::data_types::u8:
|
||||
return get_output_values_to_float<uint8_t>(net, output_id, max_cnt);
|
||||
return get_output_values_to_float<uint8_t>(net, output, max_cnt);
|
||||
case cldnn::data_types::i32:
|
||||
return get_output_values_to_float<int32_t>(net, output_id, max_cnt);
|
||||
return get_output_values_to_float<int32_t>(net, output, max_cnt);
|
||||
case cldnn::data_types::i64:
|
||||
return get_output_values_to_float<int64_t>(net, output_id, max_cnt);
|
||||
return get_output_values_to_float<int64_t>(net, output, max_cnt);
|
||||
default:
|
||||
IE_THROW() << "Unknown output data_type";
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user