diff --git a/inference-engine/thirdparty/clDNN/api/tensor.hpp b/inference-engine/thirdparty/clDNN/api/tensor.hpp index 09bb93c4d4b..b8ce09b7de9 100644 --- a/inference-engine/thirdparty/clDNN/api/tensor.hpp +++ b/inference-engine/thirdparty/clDNN/api/tensor.hpp @@ -390,6 +390,12 @@ struct format { } return false; } + /// @brief Checks if @p format is simple data format + static bool is_simple_data_format(type fmt) { + return (fmt == yxfb || fmt == byxf || + fmt == bfyx || fmt == fyxb || + fmt == bfzyx || fmt == bfwzyx); + } /// @brief Checks if @p format is of grouped type static bool is_grouped(type fmt) { return group_num(fmt) != 0; } /// @brief Checks if @p format is of image type diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/lstm_dynamic_timeloop_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/lstm_dynamic_timeloop_ref.cl index 5703366176f..444b4e87e02 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/lstm_dynamic_timeloop_ref.cl +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/lstm_dynamic_timeloop_ref.cl @@ -61,7 +61,7 @@ KERNEL(lstm_dynamic_timeloop_ref)( #else bool use_cell = false; #endif //cell_term - + for(int timestep = 0; timestep < MAX_SEQUENCE_LENGTH; timestep++) { //not all workitems will do computations @@ -155,6 +155,6 @@ KERNEL(lstm_dynamic_timeloop_ref)( } //second if(timestep < unroll_timesteps) //all workitems needs to hit the barrier after writing to global output memory - barrier(CLK_GLOBAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE); } } diff --git a/inference-engine/thirdparty/clDNN/src/gpu/memory_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/memory_gpu.cpp index ae7f12eac16..347ab507d4a 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/memory_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/memory_gpu.cpp @@ -31,7 +31,7 @@ gpu_buffer::gpu_buffer(const refcounted_obj_ptr& engine, bool reset) : lockable_gpu_mem(engine), memory_impl(engine, layout, net_id, allocation_type::cl_mem, false), _buffer(_context->context(), CL_MEM_READ_WRITE, size()) { - if (reset && is_memory_reset_needed(_layout)) zero_buffer(); + if (reset || is_memory_reset_needed(_layout)) zero_buffer(); } gpu_buffer::gpu_buffer(const refcounted_obj_ptr& engine, @@ -256,7 +256,7 @@ gpu_usm::gpu_usm(const refcounted_obj_ptr& engine, const layout& la "Unknown unified shared memory type!"); } - if (reset && is_memory_reset_needed(_layout)) zero_buffer(); + if (reset || is_memory_reset_needed(_layout)) zero_buffer(); } void* gpu_usm::lock() { diff --git a/inference-engine/thirdparty/clDNN/src/include/memory_impl.h b/inference-engine/thirdparty/clDNN/src/include/memory_impl.h index e346f545019..0219319994b 100644 --- a/inference-engine/thirdparty/clDNN/src/include/memory_impl.h +++ b/inference-engine/thirdparty/clDNN/src/include/memory_impl.h @@ -48,7 +48,7 @@ struct memory_impl : refcounted_obj { // - To be Weights format (Data memory can be reused by memory_pool, which can lead to errors) // - To have zero paddings // - To be completely filled with data - if (!format::is_weights_format(l.format) || format::is_winograd(l.format) || format::is_image_2d(l.format)) { + if ((!format::is_weights_format(l.format) && !format::is_simple_data_format(l.format)) || format::is_winograd(l.format) || format::is_image_2d(l.format)) { return true; } diff --git a/inference-engine/thirdparty/clDNN/src/primitive_inst.cpp b/inference-engine/thirdparty/clDNN/src/primitive_inst.cpp index 30ec5be2a5e..611baac3308 100644 --- a/inference-engine/thirdparty/clDNN/src/primitive_inst.cpp +++ b/inference-engine/thirdparty/clDNN/src/primitive_inst.cpp @@ -188,7 +188,11 @@ memory_impl::ptr primitive_inst::allocate_output() { false); } else if (_network.is_internal() && _node.is_output() && _node.is_type() && engine.supports_allocation(allocation_type::usm_device)) { - return engine.allocate_memory(layout, allocation_type::usm_device, net_id); + return engine.allocate_memory(layout, allocation_type::usm_device, net_id, false); + } else if (_network.is_internal() && !_node.is_output() && _node.is_type()) { + // Skip memory reset for input_layout primitives, since data will be copied from cldnn::data primitive + // or just reuse primitive's memory + return engine.allocate_memory(layout, alloc_type, net_id, false); } else if (_network.is_internal() || (!_node.can_share_buffer()) || _node.can_be_optimized() || _node.is_output()) { return engine.allocate_memory(layout, alloc_type, net_id); } diff --git a/inference-engine/thirdparty/clDNN/src/program.cpp b/inference-engine/thirdparty/clDNN/src/program.cpp index 7718e655ef7..272c81c45ad 100644 --- a/inference-engine/thirdparty/clDNN/src/program.cpp +++ b/inference-engine/thirdparty/clDNN/src/program.cpp @@ -532,7 +532,8 @@ void program_impl::transfer_memory_to_device() { // Allocate and transfer memory auto device_mem = mem.get_engine()->allocate_memory(mem.get_layout(), allocation_type::usm_device, - mem.get_net_id()); + mem.get_net_id(), + false); dynamic_cast(*device_mem).copy_from_other(dynamic_cast(mem)); data_node.attach_memory(*device_mem); const_cast(data_node.get_primitive()->mem).reset();