diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp index 9efe98c1b5c..c8ee0ae8514 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp @@ -329,31 +329,39 @@ void prepare_primitive_fusing::fuse_bias(program &p) { new_node.recalc_output_layout(); }; - auto recalculate_biases = [&](data_node& original_node, data_node& new_node) -> bool { + auto recalculate_biases = [&](data_node& original_node, data_node& second_node) -> bool { auto original_mem = original_node.get_attached_memory_ptr(); - auto new_mem = new_node.get_attached_memory_ptr(); - if (original_mem->count() != new_mem->count() || original_mem->get_layout().data_type != new_mem->get_layout().data_type) + auto second_mem = second_node.get_attached_memory_ptr(); + if (original_mem->count() != second_mem->count() || original_mem->get_layout().data_type != second_mem->get_layout().data_type) return false; switch (original_mem->get_layout().data_type) { case data_types::f32: { - mem_lock original_bias_mem(original_mem, p.get_stream()); - mem_lock new_bias_mem(new_mem, p.get_stream()); + cldnn::memory_ptr new_mem = p.get_engine().allocate_memory(original_mem->get_layout()); + mem_lock new_bias_mem(new_mem, p.get_stream()); + mem_lock original_bias_mem(original_mem, p.get_stream()); + mem_lock second_bias_mem(second_mem, p.get_stream()); float* original_data = original_bias_mem.data(); - float* new_data = new_bias_mem.data(); + float* new_data = second_bias_mem.data(); for (size_t i = 0; i < original_bias_mem.size(); i++) - original_data[i] += new_data[i]; + new_bias_mem[i] = original_data[i] + new_data[i]; + + original_node.attach_memory(new_mem); break; } case data_types::f16: { - mem_lock original_bias_mem(original_mem, p.get_stream()); - mem_lock new_bias_mem(new_mem, p.get_stream()); + cldnn::memory_ptr new_mem = p.get_engine().allocate_memory(original_mem->get_layout()); + mem_lock new_bias_mem(new_mem, p.get_stream()); + mem_lock original_bias_mem(original_mem, p.get_stream()); + mem_lock second_bias_mem(second_mem, p.get_stream()); uint16_t* original_data = original_bias_mem.data(); - uint16_t* new_data = new_bias_mem.data(); + uint16_t* new_data = second_bias_mem.data(); for (size_t i = 0; i < original_bias_mem.size(); i++) { float new_val = half_to_float(original_data[i]) + half_to_float(new_data[i]); - original_data[i] = float_to_half(new_val); + new_bias_mem[i] = float_to_half(new_val); } + + original_node.attach_memory(new_mem); break; } default: diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp index de28fcde035..db3457c09d7 100644 --- a/src/plugins/intel_gpu/src/graph/network.cpp +++ b/src/plugins/intel_gpu/src/graph/network.cpp @@ -1197,6 +1197,9 @@ void network::transfer_memory_to_device(std::shared_ptr instance if (!get_engine().supports_allocation(allocation_type::usm_device)) return; + if (get_engine().get_device_info().dev_type != device_type::discrete_gpu) + return; + if (alloc_type == allocation_type::usm_host || alloc_type == allocation_type::usm_shared) { // Allocate and transfer memory auto device_mem = inst_mem.get_engine()->allocate_memory(inst_mem.get_layout(), allocation_type::usm_device, false); diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index 7e7b921636a..04c1452eb27 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -563,7 +563,8 @@ void program::build_program(bool is_internal) { if (!is_internal) { prim_info = get_current_stage_info(); - transfer_memory_to_device(); + if (get_engine().get_device_info().dev_type == device_type::discrete_gpu) + transfer_memory_to_device(); } cleanup();