diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/concatenation.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/concatenation.hpp index 6e3a0065bbb..a0e58b9b767 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/concatenation.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/concatenation.hpp @@ -39,16 +39,6 @@ namespace cldnn { /// @li outputIdx : index of destination feature struct concatenation : public primitive_base { CLDNN_DECLARE_PRIMITIVE(concatenation) - - enum concatenation_axis { - along_b, - along_f, - along_x, - along_y, - along_z, - along_w - }; - /// @li Constructs concatenation primitive. /// @param id This primitive id. /// @param input Vector of input primitives ids. @@ -56,7 +46,7 @@ struct concatenation : public primitive_base { concatenation( const primitive_id& id, const std::vector& input, - const concatenation_axis axis, + const int64_t axis, const primitive_id& ext_prim_id = "", const padding& output_padding = padding()) : primitive_base(id, {input}, ext_prim_id, output_padding), axis(axis) {} @@ -69,14 +59,14 @@ struct concatenation : public primitive_base { concatenation( const primitive_id& id, const std::vector& input, - const concatenation_axis axis, + const int64_t axis, const data_types output_dt, const primitive_id& ext_prim_id = "", const padding& output_padding = padding()) : primitive_base(id, {input}, ext_prim_id, output_padding, optional_data_type{output_dt}), axis(axis) {} /// @brief Dimension along which concatenation should take place - concatenation_axis axis; + int64_t axis; }; /// @} /// @} diff --git a/src/plugins/intel_gpu/src/graph/concatenation.cpp b/src/plugins/intel_gpu/src/graph/concatenation.cpp index 766c62285ef..c7368daf50b 100644 --- a/src/plugins/intel_gpu/src/graph/concatenation.cpp +++ b/src/plugins/intel_gpu/src/graph/concatenation.cpp @@ -22,7 +22,7 @@ layout concatenation_inst::calc_output_layout(concatenation_node const& node) { auto input_layout = node.input(0).get_output_layout(); auto output_format = input_layout.format; - auto result_sizes = input_layout.size.sizes(); + auto result_sizes = input_layout.get_dims(); auto output_dt = desc->output_data_type ? *desc->output_data_type : input_layout.data_type; @@ -31,14 +31,16 @@ layout concatenation_inst::calc_output_layout(concatenation_node const& node) { // calculate sum of features from all inputs result_sizes[axis_index] = 0; for (size_t i = 0; i < desc->input.size(); ++i) { - auto input_sizes = node.input(i).get_output_layout().size.sizes(); + auto input_sizes = node.input(i).get_output_layout().get_dims(); if (node.input(i).get_output_layout().format == format::b_fs_yx_fsv16) output_format = format::b_fs_yx_fsv16; result_sizes[axis_index] += input_sizes[axis_index]; } - return layout {output_dt, output_format, (tensor) result_sizes}; + auto def_fmt = format::get_default_format(input_layout.get_rank()); + + return layout {output_dt, output_format, tensor(def_fmt, result_sizes)}; } std::string concatenation_inst::to_string(concatenation_node const& node) { @@ -58,7 +60,6 @@ std::string concatenation_inst::to_string(concatenation_node const& node) { concat_info.add("concat axis", desc->axis); concat_info.add("inputs count", node.inputs_count()); concat_info.add("inputs", ss_inputs.str()); - concat_info.dump(primitive_description); node_info->add("concat info", concat_info); node_info->dump(primitive_description); @@ -72,39 +73,39 @@ concatenation_inst::typed_primitive_inst(network& network, concatenation_node co auto output_layout = node.get_output_layout(); tensor::value_type concat_count = 0; - auto input_size = input_layout.size; - auto output_size = output_layout.size; + auto input_size = input_layout.get_dims(); + auto output_size = output_layout.get_dims(); for (const auto& i : node.get_dependencies()) { auto input_i_layout = i->get_output_layout(); - auto input_mem_size = input_i_layout.size; - for (int dim = concatenation::along_b; dim <= concatenation::along_w; ++dim) { + auto input_mem_size = input_i_layout.get_dims(); + for (int64_t dim = 0; dim < output_layout.get_rank(); ++dim) { if (dim == node.get_primitive()->axis) { - concat_count += input_mem_size.raw[dim]; + concat_count += input_mem_size[dim]; } else { CLDNN_ERROR_NOT_EQUAL(node.id(), "Input size dim: " + std::to_string(dim), - input_size.raw[dim], + input_size[dim], "input memory dim: " + std::to_string(dim), - input_mem_size.raw[dim], + input_mem_size[dim], "Every input must have the same size"); } } } - for (int dim = concatenation::along_b; dim <= concatenation::along_w; ++dim) { + for (int64_t dim = 0; dim < output_layout.get_rank(); ++dim) { if (dim == node.get_primitive()->axis) { CLDNN_ERROR_NOT_EQUAL(node.id(), "Concat count", concat_count, "output size dim:" + std::to_string(dim), - output_size.raw[dim], + output_size[dim], "Output size in concatenated dimension mismatch sum of inputs!"); } else { CLDNN_ERROR_NOT_EQUAL(node.id(), "Input size dim: " + std::to_string(dim), - input_size.raw[dim], + input_size[dim], "output size dim:" + std::to_string(dim), - output_size.raw[dim], + output_size[dim], "Output size in non-concatenated dimension mistmatch input"); } } diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp index b88ffc92fe6..215b115a711 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/add_required_reorders.cpp @@ -9,7 +9,6 @@ #include "pass_manager.h" #include "program_node.h" #include "mutable_data_inst.h" -#include "concatenation_inst.h" #include "tensor_type.h" #include #include diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/concat_input_order.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/concat_input_order.cpp index c47167e5dbd..832df989537 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/concat_input_order.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/concat_input_order.cpp @@ -126,7 +126,7 @@ void concat_input_order::run(program& p) { auto& concat_node = node->as(); auto prim = concat_node.get_primitive(); - bool along_f = prim->axis == concatenation::along_f; + bool along_f = prim->axis == 1; size_t inputs_count = prim->input_size(); bool no_fusing = !concat_node.has_fused_primitives() && concat_node.get_dependencies().size() == inputs_count; diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp index 13e50ed512b..5a7b314c9db 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/graph_initializations.cpp @@ -298,8 +298,7 @@ void graph_initializations::handle_lstm_node(program& p, lstm_node& node) { output_ids_offsets.push_back(e.second.first); } primitive_id concatenation_id = node.id() + ":concat"; - auto concatenation_primitive = - std::make_shared(concatenation_id, output_ids_offsets, concatenation::along_f); + auto concatenation_primitive = std::make_shared(concatenation_id, output_ids_offsets, 1); auto& concatenation_node = p.get_or_create(concatenation_primitive); for (auto& e : output_map) { p.add_connection(*e.second.second, concatenation_node); diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp index a2bd0420a39..e13e457af13 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp @@ -125,6 +125,7 @@ bool concat_in_place_optimization::match(concatenation_node& node) { auto output_format = node.get_output_layout().format; auto output_datatype = node.get_output_layout().data_type; auto concat_axis = node.get_primitive()->axis; + auto def_fmt = format::get_default_format(node.get_output_layout().get_rank()); size_t idx = 0; for (auto& input : node.get_dependencies()) { @@ -145,22 +146,22 @@ bool concat_in_place_optimization::match(concatenation_node& node) { // It however would make normal optimizations possible in others, so this is a trade-off to be investigated. if (idx != node.get_dependencies().size() - 1) { if ((l.format == format::b_fs_yx_fsv16 || l.format == format::b_fs_zyx_fsv16) && - (l.size.feature[0] % 16 != 0 || node.get_primitive()->axis != concatenation::along_f)) + (l.size.feature[0] % 16 != 0 || node.get_primitive()->axis != 1)) return false; if ((l.format == format::b_fs_yx_fsv32 || l.format == format::b_fs_zyx_fsv32) && - (l.size.feature[0] % 32 != 0 || node.get_primitive()->axis != concatenation::along_f)) + (l.size.feature[0] % 32 != 0 || node.get_primitive()->axis != 1)) return false; - if (l.format == format::b_fs_yx_fsv4 && (l.size.feature[0] != 4 || node.get_primitive()->axis != concatenation::along_f)) + if (l.format == format::b_fs_yx_fsv4 && (l.size.feature[0] != 4 || node.get_primitive()->axis != 1)) return false; } idx++; } - auto lower_padd_in_axis = node.get_output_layout().data_padding.lower_size().raw[concat_axis]; + auto lower_padd_in_axis = node.get_output_layout().data_padding.lower_size().sizes(def_fmt)[concat_axis]; lower_padd_in_axis = std::max(lower_padd_in_axis, - node.get_dependency(0).get_output_layout().data_padding.lower_size().raw[concat_axis]); + node.get_dependency(0).get_output_layout().data_padding.lower_size().sizes(def_fmt)[concat_axis]); // check if concatenation in place can be applied for inputs set idx = 0; @@ -208,13 +209,13 @@ bool concat_in_place_optimization::match(concatenation_node& node) { // Check that there isn't already some padding between inputs in concat axis. // If node has already been optimized we skip this check - this is just cascade adjustment. if (!node.can_be_optimized()) { - if (idx != node.get_dependencies().size() && input_padd.upper_size().raw[concat_axis] != 0) + if (idx != node.get_dependencies().size() && input_padd.upper_size().sizes(def_fmt)[concat_axis] != 0) return false; - if (idx != 0 && input_padd.lower_size().raw[concat_axis] != 0) + if (idx != 0 && input_padd.lower_size().sizes(def_fmt)[concat_axis] != 0) return false; } - lower_padd_in_axis += input->get_output_layout().size.raw[concat_axis]; + lower_padd_in_axis += input->get_output_layout().size.sizes(def_fmt)[concat_axis]; idx += 1; } @@ -222,31 +223,43 @@ bool concat_in_place_optimization::match(concatenation_node& node) { } void concat_in_place_optimization::optimize_cascade(concatenation_node& node, std::list& need_reoptimization) { + auto out_layout = node.get_output_layout(); + auto out_rank = out_layout.get_rank(); + auto def_fmt = format::get_default_format(out_rank); auto concat_axis = node.get_primitive()->axis; + // We need to transform axis from bf[w][z]yx order to bfxy[z][w] due to tensor.sizes() usages here + // should be removed once pad representation is changed + auto concat_axis_legacy = concat_axis; + if (concat_axis_legacy >= 2) { + auto spatial_axis = concat_axis_legacy - 2; + // Default and minimum number of dimensions is 4 + auto spatial_size = std::max(out_rank, 4) - 2; + concat_axis_legacy = spatial_size - spatial_axis - 1 + 2; + } // Select output padding by propagating all required input paddings. - auto padd = node.get_output_layout().data_padding; + auto padd = out_layout.data_padding; for (auto input : node.get_dependencies()) { auto inputPadding = input->get_output_layout().data_padding; padd = padding::max(padd, inputPadding); } - auto lower_padd = padd.lower_size(); - auto upper_padd = padd.upper_size(); + auto lower_padd = padd.lower_size().sizes(); + auto upper_padd = padd.upper_size().sizes(); // For cascade adjustment override padding in concat axis to output padding. // In other case match(...) already checked that only first/last input have lower/upper padding. if (node.can_be_optimized()) { - lower_padd.raw[concat_axis] = node.get_output_layout().data_padding.lower_size().raw[concat_axis]; - upper_padd.raw[concat_axis] = node.get_output_layout().data_padding.upper_size().raw[concat_axis]; + lower_padd[concat_axis_legacy] = out_layout.data_padding.lower_size().sizes()[concat_axis_legacy]; + upper_padd[concat_axis_legacy] = out_layout.data_padding.upper_size().sizes()[concat_axis_legacy]; } - node.set_output_padding(padding(lower_padd.sizes(), upper_padd.sizes())); + node.set_output_padding(padding(lower_padd, upper_padd)); - upper_padd.raw[concat_axis] += node.get_output_layout().size.raw[concat_axis]; + upper_padd[concat_axis_legacy] += out_layout.get_dims()[concat_axis]; // apply concatenation in place optimization for (auto input : node.get_dependencies()) { - auto input_length = input->get_output_layout().size.raw[concat_axis]; + auto input_length = input->get_output_layout().get_dims()[concat_axis]; if (input->is_type() && input->can_be_optimized()) need_reoptimization.push_back(&input->as()); @@ -255,16 +268,16 @@ void concat_in_place_optimization::optimize_cascade(concatenation_node& node, st // // |--- lower padd ---| |---------- upper padd -----------| // |-- output padd ---| ----- input1 ------|----- input2 -----|-- out padd --| - upper_padd.raw[concat_axis] -= input_length; + upper_padd[concat_axis_legacy] -= input_length; // set new padding for input - input->set_output_padding(padding(lower_padd.sizes(), upper_padd.sizes())); + input->set_output_padding(padding(lower_padd, upper_padd)); // move lower padd further // // |-------------- lower padd -------------|---------- upper padd -----------| // |-- output padd ---| ----- input1 ------|----- input2 -----|-- out padd --| - lower_padd.raw[concat_axis] += input_length; + lower_padd[concat_axis_legacy] += input_length; } node.can_be_optimized(true); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/concatenation.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/concatenation.cpp index f5f86e1c2f9..e0e22e31762 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/concatenation.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/concatenation.cpp @@ -16,24 +16,33 @@ namespace cldnn { namespace ocl { namespace { -kernel_selector::concat_axis convert_axis(concatenation::concatenation_axis axis) { - switch (axis) { - case concatenation::along_x: - return kernel_selector::concat_axis::X; - case concatenation::along_y: - return kernel_selector::concat_axis::Y; - case concatenation::along_z: - return kernel_selector::concat_axis::Z; - case concatenation::along_w: - return kernel_selector::concat_axis::W; - case concatenation::along_f: - return kernel_selector::concat_axis::FEATURE; - case concatenation::along_b: - return kernel_selector::concat_axis::BATCH; - default: - return kernel_selector::concat_axis::X; +kernel_selector::concat_axis convert_axis(int64_t axis, size_t rank) { + unsigned cldnn_axis = axis >= 0 ? axis : axis + static_cast(rank); + if (cldnn_axis >= rank) + IE_THROW() << "Concatenation axis exceeds number of dimensions"; + + // Difference in dimension ordering between IE and GPU plugin, + // reverse spatial dimensions after batch and feature. + if (cldnn_axis >= 2) { + auto spatial_axis = cldnn_axis - 2; + // Default and minimum number of dimensions is 4 + auto spatial_size = std::max(rank, 4) - 2; + cldnn_axis = spatial_size - spatial_axis - 1 + 2; } + + switch (cldnn_axis) { + case 0: return kernel_selector::concat_axis::BATCH; + case 1: return kernel_selector::concat_axis::FEATURE; + case 2: return kernel_selector::concat_axis::X; + case 3: return kernel_selector::concat_axis::Y; + case 4: return kernel_selector::concat_axis::Z; + case 5: return kernel_selector::concat_axis::W; + default: IE_THROW() << "Unsupported concatenation axis: " << axis; + } + + return kernel_selector::concat_axis::FEATURE; // shouldn't get here } + } // namespace struct concatenation_impl : typed_primitive_impl_ocl { @@ -76,7 +85,7 @@ public: concat_params.inputs[i] = convert_data_tensor(input_layout); } - concat_params.axis = convert_axis(axis); + concat_params.axis = convert_axis(axis, arg.get_output_layout().get_rank()); concat_optional_params.kernelPerInput = true; auto& kernel_selector = kernel_selector::concatenation_kernel_selector::Instance(); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.cpp index 98198fd86e5..c1c4e6b915b 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.cpp @@ -54,18 +54,9 @@ protected: input_mds.push_back(onednn::layout_to_memory_desc(input->get_output_layout())); } auto output_md = onednn::layout_to_memory_desc(arg.get_output_layout()); - int axis = 0; - switch (prim->axis) { - case concatenation::concatenation_axis::along_b: axis = 0; break; - case concatenation::concatenation_axis::along_f: axis = 1; break; - case concatenation::concatenation_axis::along_y: axis = 2; break; - case concatenation::concatenation_axis::along_x: axis = 3; break; - default: throw std::runtime_error("unsupported concat axis"); - } - return std::make_shared( output_md, - axis, + prim->axis, input_mds, engine.get_onednn_engine()); } diff --git a/src/plugins/intel_gpu/src/plugin/ops/concat.cpp b/src/plugins/intel_gpu/src/plugin/ops/concat.cpp index 2c101f299b8..60220dcc20e 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/concat.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/concat.cpp @@ -13,40 +13,17 @@ namespace ov { namespace runtime { namespace intel_gpu { -static cldnn::concatenation::concatenation_axis GetConcatAxis(int32_t axis, size_t rank) { - unsigned cldnn_axis = axis >= 0 ? axis : axis + static_cast(rank); - if (cldnn_axis >= rank) - IE_THROW() << "Concatenation axis exceeds number of dimensions"; - - // Difference in dimension ordering between IE and GPU plugin, - // reverse spatial dimensions after batch and feature. - if (cldnn_axis >= 2) { - auto spatial_axis = cldnn_axis - 2; - // Default and minimum number of dimensions is 4 - auto spatial_size = std::max(rank, 4) - 2; - cldnn_axis = spatial_size - spatial_axis - 1 + 2; - } - - switch (cldnn_axis) { - case 0: return cldnn::concatenation::concatenation_axis::along_b; - case 1: return cldnn::concatenation::concatenation_axis::along_f; - case 2: return cldnn::concatenation::concatenation_axis::along_x; - case 3: return cldnn::concatenation::concatenation_axis::along_y; - case 4: return cldnn::concatenation::concatenation_axis::along_z; - case 5: return cldnn::concatenation::concatenation_axis::along_w; - default: IE_THROW() << "Unsupported concatenation axis: " << axis; - } - - return cldnn::concatenation::concatenation_axis::along_f; // shouldn't get here -} - static void CreateConcatOp(Program& p, const std::shared_ptr& op) { auto inputPrimitives = p.GetInputPrimitiveIDs(op); std::string layerName = layer_type_name_ID(op); + int64_t axis = op->get_axis(); + if (axis < 0) + axis = axis + static_cast(op->get_input_partial_shape(0).rank().get_length()); + auto concatPrim = cldnn::concatenation( layerName, inputPrimitives, - GetConcatAxis(op->get_axis(), op->get_input_shape(0).size()), + axis, DataTypeFromPrecision(op->get_output_element_type(0)), op->get_friendly_name()); diff --git a/src/plugins/intel_gpu/src/plugin/ops/convert_color.cpp b/src/plugins/intel_gpu/src/plugin/ops/convert_color.cpp index 4b743e706bd..30fd9678097 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/convert_color.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/convert_color.cpp @@ -51,7 +51,7 @@ static void CreateCommonConvertColorOp(Program& p, const std::shared_ptrget_friendly_name())); } - p.AddPrimitive(cldnn::concatenation(layerName, convert_color_names, cldnn::concatenation::along_b, op->get_friendly_name())); + p.AddPrimitive(cldnn::concatenation(layerName, convert_color_names, 0, op->get_friendly_name())); } else { p.AddPrimitive(cldnn::convert_color(layerName, inputPrimitives, diff --git a/src/plugins/intel_gpu/src/plugin/ops/parameter.cpp b/src/plugins/intel_gpu/src/plugin/ops/parameter.cpp index 901271f07d4..d5820fea9c0 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/parameter.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/parameter.cpp @@ -293,7 +293,7 @@ static void CreateParameterOp(Program& p, const std::shared_ptr 1) { auto concatPrimID = "concat:" + inputName + Program::m_preProcessTag; - p.AddPrimitive(cldnn::concatenation(concatPrimID, reorders, cldnn::concatenation::along_b, op->get_friendly_name())); + p.AddPrimitive(cldnn::concatenation(concatPrimID, reorders, 0, op->get_friendly_name())); p.primitiveIDs[inputName] = concatPrimID; } } else { diff --git a/src/plugins/intel_gpu/src/plugin/ops/rnn.cpp b/src/plugins/intel_gpu/src/plugin/ops/rnn.cpp index 8dc8ffab82f..a8b877ef0e8 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/rnn.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/rnn.cpp @@ -140,7 +140,7 @@ static void CreateLSTMCellOp(Program& p, const std::shared_ptrget_friendly_name())); p.AddPrimitive(cldnn::concatenation(input_concatID, { permuteID, hiddenInStr }, - cldnn::concatenation::concatenation_axis::along_x, + 3, op->get_friendly_name())); p.AddInnerPrimitiveToProfiler(hiddenInResh, op->get_friendly_name(), op); @@ -159,7 +159,7 @@ static void CreateLSTMCellOp(Program& p, const std::shared_ptrget_friendly_name())); + p.AddPrimitive(cldnn::concatenation(WRconcatID, { weightID, recurrentID }, 1, op->get_friendly_name())); p.AddInnerPrimitiveToProfiler(WRconcatID, op->get_friendly_name(), op); p.AddPrimitive(cldnn::fully_connected(lstm_fc_id, input_concatID, WRconcatID, hasBias ? biasID : "", op->get_friendly_name())); @@ -273,7 +273,7 @@ static void CreateLSTMSequenceOp(Program& p, const std::shared_ptrget_friendly_name())); + p.AddPrimitive(cldnn::concatenation(WRconcatID, { weightID, recurrentID }, 2, op->get_friendly_name())); p.AddInnerPrimitiveToProfiler(WRconcatID, op->get_friendly_name(), op); std::vector WRreshapeSize = { 4 * size_t(lstm_hidden_size), size_t(lstm_input_size + lstm_hidden_size) }; @@ -300,7 +300,7 @@ static void CreateLSTMSequenceOp(Program& p, const std::shared_ptrget_friendly_name())); p.AddInnerPrimitiveToProfiler(inputCrop_id, op->get_friendly_name(), op); - p.AddPrimitive(cldnn::concatenation(concatID, { inputCrop_id, hiddenStr }, cldnn::concatenation::concatenation_axis::along_x, op->get_friendly_name())); + p.AddPrimitive(cldnn::concatenation(concatID, { inputCrop_id, hiddenStr }, 3, op->get_friendly_name())); p.AddInnerPrimitiveToProfiler(concatID, op->get_friendly_name(), op); p.AddPrimitive(cldnn::fully_connected(lstm_fc_id, concatID, WRreshapeID, biasID, op->get_friendly_name())); p.AddInnerPrimitiveToProfiler(lstm_fc_id, op->get_friendly_name(), op); @@ -345,7 +345,7 @@ static void CreateLSTMSequenceOp(Program& p, const std::shared_ptr