From ed8c9d6f9ac76e82081bba65b6d3d38a141c1cd9 Mon Sep 17 00:00:00 2001 From: Vladislav Volkov Date: Wed, 16 Mar 2022 17:16:29 +0300 Subject: [PATCH] CPU Plugin refactoring: class names (#10639) --- docs/IE_PLUGIN_DG/ExecutableNetwork.md | 2 +- docs/IE_PLUGIN_DG/Plugin.md | 2 +- samples/cpp/benchmark_app/main.cpp | 4 +- .../convert_strided_slice_to_crop.cpp | 2 +- .../snippets/src/pass/collapse_subgraph.cpp | 2 +- src/core/tests/runtime/ie/unit_test.manifest | 4 +- .../dev_api/threading/ie_executor_manager.hpp | 2 +- .../intel_cpu/src/async_infer_request.cpp | 10 +- .../intel_cpu/src/async_infer_request.h | 10 +- .../intel_cpu/src/cache/multi_cache.cpp | 8 +- src/plugins/intel_cpu/src/cpu_memory.cpp | 40 +- src/plugins/intel_cpu/src/cpu_memory.h | 38 +- src/plugins/intel_cpu/src/cpu_shape.cpp | 6 +- src/plugins/intel_cpu/src/cpu_types.cpp | 534 ++++++------- src/plugins/intel_cpu/src/cpu_types.h | 4 +- .../{descriptor.cpp => dnnl_descriptor.cpp} | 70 +- .../src/{descriptor.h => dnnl_descriptor.h} | 36 +- ...ion_utils.cpp => dnnl_extension_utils.cpp} | 31 +- ...tension_utils.h => dnnl_extension_utils.h} | 4 +- src/plugins/intel_cpu/src/edge.cpp | 80 +- src/plugins/intel_cpu/src/edge.h | 46 +- .../src/emitters/jit_snippets_emitters.hpp | 2 +- src/plugins/intel_cpu/src/exec_network.cpp | 143 ++-- src/plugins/intel_cpu/src/exec_network.h | 26 +- src/plugins/intel_cpu/src/extension.cpp | 14 +- src/plugins/intel_cpu/src/extension.h | 2 +- src/plugins/intel_cpu/src/extension_mngr.cpp | 34 +- src/plugins/intel_cpu/src/extension_mngr.h | 9 +- src/plugins/intel_cpu/src/graph.cpp | 294 ++++---- src/plugins/intel_cpu/src/graph.h | 84 ++- src/plugins/intel_cpu/src/graph_dumper.cpp | 24 +- src/plugins/intel_cpu/src/graph_dumper.h | 4 +- src/plugins/intel_cpu/src/graph_optimizer.cpp | 467 ++++++------ src/plugins/intel_cpu/src/graph_optimizer.h | 52 +- src/plugins/intel_cpu/src/infer_request.cpp | 120 +-- src/plugins/intel_cpu/src/infer_request.h | 46 +- .../src/memory_desc/blocked_memory_desc.cpp | 6 +- .../memory_desc/cpu_blocked_memory_desc.cpp | 6 +- .../src/memory_desc/cpu_memory_desc.h | 5 +- .../src/memory_desc/cpu_memory_desc_utils.cpp | 3 +- .../src/memory_desc/cpu_memory_desc_utils.h | 8 +- .../memory_desc/dnnl_blocked_memory_desc.cpp | 41 +- .../memory_desc/dnnl_blocked_memory_desc.h | 8 +- .../src/memory_desc/dnnl_memory_desc.cpp | 8 +- .../src/memory_desc/dnnl_memory_desc.h | 6 +- src/plugins/intel_cpu/src/memory_state.cpp | 4 +- src/plugins/intel_cpu/src/memory_state.h | 6 +- .../intel_cpu/src/mkldnn/iml_type_mapper.cpp | 10 +- .../intel_cpu/src/mkldnn/iml_type_mapper.h | 1 - src/plugins/intel_cpu/src/node.cpp | 301 ++++---- src/plugins/intel_cpu/src/node.h | 140 ++-- .../intel_cpu/src/nodes/adaptive_pooling.cpp | 39 +- .../intel_cpu/src/nodes/adaptive_pooling.h | 8 +- .../intel_cpu/src/nodes/batch_to_space.cpp | 31 +- .../intel_cpu/src/nodes/batch_to_space.h | 6 +- src/plugins/intel_cpu/src/nodes/bin_conv.cpp | 107 +-- src/plugins/intel_cpu/src/nodes/bin_conv.h | 8 +- src/plugins/intel_cpu/src/nodes/broadcast.cpp | 41 +- src/plugins/intel_cpu/src/nodes/broadcast.h | 7 +- src/plugins/intel_cpu/src/nodes/bucketize.cpp | 31 +- src/plugins/intel_cpu/src/nodes/bucketize.h | 6 +- .../intel_cpu/src/nodes/color_convert.cpp | 84 +-- .../intel_cpu/src/nodes/color_convert.h | 18 +- .../src/nodes/common/blocked_desc_creator.cpp | 8 +- .../src/nodes/common/cpu_convert.cpp | 102 +-- .../intel_cpu/src/nodes/common/cpu_convert.h | 6 + .../intel_cpu/src/nodes/common/cpu_memcpy.h | 6 + .../src/nodes/common/dnnl_executor.cpp | 7 +- .../src/nodes/common/dnnl_executor.h | 2 +- .../intel_cpu/src/nodes/common/fp16_utils.h | 5 + .../src/nodes/common/permute_kernel.cpp | 9 +- .../intel_cpu/src/nodes/common/softmax.cpp | 7 +- .../intel_cpu/src/nodes/common/softmax.h | 5 + .../src/nodes/common/tile_broadcast_utils.cpp | 19 +- .../src/nodes/common/tile_broadcast_utils.h | 6 +- src/plugins/intel_cpu/src/nodes/concat.cpp | 54 +- src/plugins/intel_cpu/src/nodes/concat.h | 6 +- src/plugins/intel_cpu/src/nodes/conv.cpp | 287 +++---- src/plugins/intel_cpu/src/nodes/conv.h | 28 +- src/plugins/intel_cpu/src/nodes/convert.cpp | 39 +- src/plugins/intel_cpu/src/nodes/convert.h | 12 +- .../src/nodes/ctc_greedy_decoder.cpp | 27 +- .../intel_cpu/src/nodes/ctc_greedy_decoder.h | 6 +- .../src/nodes/ctc_greedy_decoder_seq_len.cpp | 27 +- .../src/nodes/ctc_greedy_decoder_seq_len.h | 6 +- src/plugins/intel_cpu/src/nodes/ctc_loss.cpp | 25 +- src/plugins/intel_cpu/src/nodes/ctc_loss.h | 6 +- src/plugins/intel_cpu/src/nodes/cum_sum.cpp | 40 +- src/plugins/intel_cpu/src/nodes/cum_sum.h | 10 +- src/plugins/intel_cpu/src/nodes/deconv.cpp | 187 ++--- src/plugins/intel_cpu/src/nodes/deconv.h | 30 +- src/plugins/intel_cpu/src/nodes/def_conv.cpp | 47 +- src/plugins/intel_cpu/src/nodes/def_conv.h | 6 +- .../intel_cpu/src/nodes/depth_to_space.cpp | 46 +- .../intel_cpu/src/nodes/depth_to_space.h | 8 +- .../intel_cpu/src/nodes/detection_output.cpp | 48 +- .../intel_cpu/src/nodes/detection_output.h | 6 +- src/plugins/intel_cpu/src/nodes/dft.cpp | 38 +- src/plugins/intel_cpu/src/nodes/dft.h | 8 +- src/plugins/intel_cpu/src/nodes/eltwise.cpp | 704 ++++++++++-------- src/plugins/intel_cpu/src/nodes/eltwise.h | 36 +- .../src/nodes/embedding_bag_offset_sum.cpp | 37 +- .../src/nodes/embedding_bag_offset_sum.h | 6 +- .../src/nodes/embedding_bag_packed_sum.cpp | 37 +- .../src/nodes/embedding_bag_packed_sum.h | 6 +- .../intel_cpu/src/nodes/embedding_bag_sum.cpp | 21 +- .../intel_cpu/src/nodes/embedding_bag_sum.h | 8 +- .../src/nodes/embedding_segments_sum.cpp | 41 +- .../src/nodes/embedding_segments_sum.h | 6 +- ...xperimental_detectron_detection_output.cpp | 31 +- .../experimental_detectron_detection_output.h | 7 +- ...ectron_generate_proposals_single_image.cpp | 41 +- ...etectron_generate_proposals_single_image.h | 8 +- ...erimental_detectron_priorgridgenerator.cpp | 25 +- ...xperimental_detectron_priorgridgenerator.h | 6 +- ...rimental_detectron_roifeatureextractor.cpp | 22 +- ...perimental_detectron_roifeatureextractor.h | 6 +- .../nodes/experimental_detectron_topkrois.cpp | 23 +- .../nodes/experimental_detectron_topkrois.h | 6 +- .../src/nodes/extract_image_patches.cpp | 46 +- .../src/nodes/extract_image_patches.h | 6 +- .../intel_cpu/src/nodes/fake_quantize.cpp | 103 +-- .../intel_cpu/src/nodes/fake_quantize.h | 32 +- .../intel_cpu/src/nodes/fullyconnected.cpp | 108 +-- .../intel_cpu/src/nodes/fullyconnected.h | 10 +- src/plugins/intel_cpu/src/nodes/gather.cpp | 41 +- src/plugins/intel_cpu/src/nodes/gather.h | 6 +- .../intel_cpu/src/nodes/gather_elements.cpp | 39 +- .../intel_cpu/src/nodes/gather_elements.h | 6 +- src/plugins/intel_cpu/src/nodes/gather_nd.cpp | 49 +- src/plugins/intel_cpu/src/nodes/gather_nd.h | 18 +- .../intel_cpu/src/nodes/gather_tree.cpp | 27 +- src/plugins/intel_cpu/src/nodes/gather_tree.h | 6 +- src/plugins/intel_cpu/src/nodes/generic.cpp | 76 +- src/plugins/intel_cpu/src/nodes/generic.h | 11 +- src/plugins/intel_cpu/src/nodes/grn.cpp | 23 +- src/plugins/intel_cpu/src/nodes/grn.h | 6 +- src/plugins/intel_cpu/src/nodes/if.cpp | 44 +- src/plugins/intel_cpu/src/nodes/if.h | 26 +- src/plugins/intel_cpu/src/nodes/input.cpp | 79 +- src/plugins/intel_cpu/src/nodes/input.h | 18 +- .../intel_cpu/src/nodes/interpolate.cpp | 113 +-- src/plugins/intel_cpu/src/nodes/interpolate.h | 8 +- src/plugins/intel_cpu/src/nodes/list.hpp | 71 -- .../intel_cpu/src/nodes/log_softmax.cpp | 27 +- src/plugins/intel_cpu/src/nodes/log_softmax.h | 8 +- src/plugins/intel_cpu/src/nodes/lrn.cpp | 43 +- src/plugins/intel_cpu/src/nodes/lrn.h | 8 +- .../intel_cpu/src/nodes/mathematics.cpp | 155 ++-- src/plugins/intel_cpu/src/nodes/mathematics.h | 8 +- src/plugins/intel_cpu/src/nodes/matmul.cpp | 88 ++- src/plugins/intel_cpu/src/nodes/matmul.h | 10 +- .../intel_cpu/src/nodes/matrix_nms.cpp | 35 +- src/plugins/intel_cpu/src/nodes/matrix_nms.h | 6 +- src/plugins/intel_cpu/src/nodes/memory.cpp | 90 +-- src/plugins/intel_cpu/src/nodes/memory.hpp | 66 +- .../intel_cpu/src/nodes/multiclass_nms.cpp | 39 +- .../intel_cpu/src/nodes/multiclass_nms.hpp | 6 +- src/plugins/intel_cpu/src/nodes/mvn.cpp | 77 +- src/plugins/intel_cpu/src/nodes/mvn.h | 8 +- .../src/nodes/non_max_suppression.cpp | 46 +- .../intel_cpu/src/nodes/non_max_suppression.h | 6 +- src/plugins/intel_cpu/src/nodes/non_zero.cpp | 35 +- src/plugins/intel_cpu/src/nodes/non_zero.h | 8 +- src/plugins/intel_cpu/src/nodes/normalize.cpp | 61 +- src/plugins/intel_cpu/src/nodes/normalize.h | 14 +- src/plugins/intel_cpu/src/nodes/one_hot.cpp | 33 +- src/plugins/intel_cpu/src/nodes/one_hot.h | 8 +- src/plugins/intel_cpu/src/nodes/pad.cpp | 58 +- src/plugins/intel_cpu/src/nodes/pad.h | 22 +- src/plugins/intel_cpu/src/nodes/pooling.cpp | 73 +- src/plugins/intel_cpu/src/nodes/pooling.h | 8 +- src/plugins/intel_cpu/src/nodes/priorbox.cpp | 34 +- src/plugins/intel_cpu/src/nodes/priorbox.h | 6 +- .../src/nodes/priorbox_clustered.cpp | 31 +- .../intel_cpu/src/nodes/priorbox_clustered.h | 6 +- src/plugins/intel_cpu/src/nodes/proposal.cpp | 29 +- src/plugins/intel_cpu/src/nodes/proposal.h | 6 +- .../intel_cpu/src/nodes/psroi_pooling.cpp | 37 +- .../intel_cpu/src/nodes/psroi_pooling.h | 6 +- src/plugins/intel_cpu/src/nodes/range.cpp | 36 +- src/plugins/intel_cpu/src/nodes/range.h | 6 +- src/plugins/intel_cpu/src/nodes/reduce.cpp | 397 +++++----- src/plugins/intel_cpu/src/nodes/reduce.h | 10 +- src/plugins/intel_cpu/src/nodes/reference.cpp | 39 +- src/plugins/intel_cpu/src/nodes/reference.h | 6 +- .../intel_cpu/src/nodes/region_yolo.cpp | 31 +- src/plugins/intel_cpu/src/nodes/region_yolo.h | 6 +- src/plugins/intel_cpu/src/nodes/reorder.cpp | 87 ++- src/plugins/intel_cpu/src/nodes/reorder.h | 14 +- .../intel_cpu/src/nodes/reorg_yolo.cpp | 25 +- src/plugins/intel_cpu/src/nodes/reorg_yolo.h | 6 +- src/plugins/intel_cpu/src/nodes/reshape.cpp | 32 +- src/plugins/intel_cpu/src/nodes/reshape.h | 6 +- .../intel_cpu/src/nodes/reverse_sequence.cpp | 23 +- .../intel_cpu/src/nodes/reverse_sequence.h | 6 +- src/plugins/intel_cpu/src/nodes/rnn.cpp | 99 ++- src/plugins/intel_cpu/src/nodes/rnn.h | 6 +- src/plugins/intel_cpu/src/nodes/roi_align.cpp | 41 +- src/plugins/intel_cpu/src/nodes/roi_align.h | 8 +- .../intel_cpu/src/nodes/roi_pooling.cpp | 59 +- src/plugins/intel_cpu/src/nodes/roi_pooling.h | 12 +- src/plugins/intel_cpu/src/nodes/roll.cpp | 35 +- src/plugins/intel_cpu/src/nodes/roll.h | 6 +- .../intel_cpu/src/nodes/scatter_update.cpp | 59 +- .../intel_cpu/src/nodes/scatter_update.h | 6 +- src/plugins/intel_cpu/src/nodes/select.cpp | 35 +- src/plugins/intel_cpu/src/nodes/select.h | 6 +- src/plugins/intel_cpu/src/nodes/shapeof.cpp | 27 +- src/plugins/intel_cpu/src/nodes/shapeof.h | 8 +- .../intel_cpu/src/nodes/shuffle_channels.cpp | 41 +- .../intel_cpu/src/nodes/shuffle_channels.h | 8 +- src/plugins/intel_cpu/src/nodes/softmax.cpp | 40 +- src/plugins/intel_cpu/src/nodes/softmax.h | 6 +- .../intel_cpu/src/nodes/space_to_batch.cpp | 31 +- .../intel_cpu/src/nodes/space_to_batch.h | 6 +- .../intel_cpu/src/nodes/space_to_depth.cpp | 56 +- .../intel_cpu/src/nodes/space_to_depth.h | 6 +- src/plugins/intel_cpu/src/nodes/split.cpp | 57 +- src/plugins/intel_cpu/src/nodes/split.h | 6 +- .../intel_cpu/src/nodes/strided_slice.cpp | 53 +- .../intel_cpu/src/nodes/strided_slice.h | 8 +- src/plugins/intel_cpu/src/nodes/subgraph.cpp | 46 +- src/plugins/intel_cpu/src/nodes/subgraph.h | 14 +- .../intel_cpu/src/nodes/tensoriterator.cpp | 98 +-- .../intel_cpu/src/nodes/tensoriterator.h | 24 +- src/plugins/intel_cpu/src/nodes/tile.cpp | 39 +- src/plugins/intel_cpu/src/nodes/tile.h | 6 +- src/plugins/intel_cpu/src/nodes/topk.cpp | 69 +- src/plugins/intel_cpu/src/nodes/topk.h | 8 +- src/plugins/intel_cpu/src/nodes/transpose.cpp | 60 +- src/plugins/intel_cpu/src/nodes/transpose.h | 20 +- src/plugins/intel_cpu/src/nodes_factory.cpp | 183 ++--- .../intel_cpu/src/normalize_preprocess.cpp | 9 +- src/plugins/intel_cpu/src/plugin.cpp | 31 +- src/plugins/intel_cpu/src/plugin.h | 2 +- src/plugins/intel_cpu/src/primitive.cpp | 16 +- src/plugins/intel_cpu/src/primitive.h | 6 +- src/plugins/intel_cpu/src/serialize.cpp | 2 +- src/plugins/intel_cpu/src/serialize.h | 4 +- src/plugins/intel_cpu/src/utils/blob_dump.cpp | 4 +- src/plugins/intel_cpu/src/utils/blob_dump.h | 8 +- .../intel_cpu/src/utils/node_dumper.cpp | 10 +- src/plugins/intel_cpu/src/utils/node_dumper.h | 8 +- .../rt_info/memory_formats_attribute.cpp | 27 +- .../rt_info/memory_formats_attribute.hpp | 41 +- .../utils/shape_inference/shape_inference.cpp | 70 +- .../utils/shape_inference/shape_inference.hpp | 14 +- .../shape_inference/static_dimension.cpp | 8 +- .../shape_inference/static_dimension.hpp | 6 +- .../utils/shape_inference/static_shape.cpp | 47 +- .../utils/shape_inference/static_shape.hpp | 10 +- src/plugins/intel_cpu/src/utils/verbose.cpp | 4 +- src/plugins/intel_cpu/src/utils/verbose.h | 4 +- src/plugins/intel_cpu/src/weights_cache.cpp | 40 +- src/plugins/intel_cpu/src/weights_cache.hpp | 48 +- .../ngraph_reader/strided_slice_tests.cpp | 4 +- .../serialization/single_layer/mvn.cpp | 4 +- .../single_layer_tests/cum_sum.cpp | 16 +- .../single_layer_tests/dft.cpp | 8 +- .../single_layer_tests/mvn.cpp | 4 +- .../single_layer_tests/roll.cpp | 24 +- .../single_layer_tests/select.cpp | 4 +- .../single_layer_tests/strided_slice.cpp | 2 +- .../cpu/single_layer_tests/gru_sequence.cpp | 4 +- .../cpu/single_layer_tests/lstm_sequence.cpp | 4 +- .../plugin/cpu/single_layer_tests/matmul.cpp | 2 +- .../src/add_convert_to_reorder.cpp | 2 +- .../subgraph_tests/src/gather_add_avgpool.cpp | 2 +- .../plugin/cpu/test_utils/cpu_test_utils.cpp | 8 +- src/tests/ie_tsan.supp | 8 +- .../unit/cpu/mkldnn_memory_desc_test.cpp | 72 +- src/tests/unit/cpu/mkldnn_zero_dims_test.cpp | 8 +- .../unit/cpu/nodes/reorder_node_test.cpp | 36 +- .../assign_shape_inference.cpp | 2 + .../batch_to_space_shape_inference.cpp | 1 + .../broadcast_shape_inference.cpp | 1 + .../shape_inference_test/bucketize_test.cpp | 3 +- .../convolution_shape_inference.cpp | 1 + ...greedy_decoder_seq_len_shape_inference.cpp | 1 + .../ctc_greedy_decoder_shape_inference.cpp | 1 + .../ctc_loss_shape_inference.cpp | 1 + .../depth_to_space_shape_inference.cpp | 8 +- .../detection_output_shape_inference.cpp | 1 + .../cpu/shape_inference_test/einsum_test.cpp | 13 +- .../cpu/shape_inference_test/elementwises.cpp | 1 + .../embedding_segments_sum_test.cpp | 1 + .../embeddingbag_offsets_sum_test.cpp | 5 +- ...ctron_detection_output_shape_inference.cpp | 1 + ...perimental_detectron_generate_proposal.cpp | 11 +- ...n_prior_grid_generator_shape_inference.cpp | 1 + ...mental_detectron_roi_feature_extractor.cpp | 1 + ...tal_detectron_topkrois_shape_inference.cpp | 1 + .../extract_image_patches_shape_inference.cpp | 1 + .../fft_base_shape_inference.cpp | 1 + .../gather_elements_shape_inference.cpp | 1 + .../gather_shape_inference.cpp | 1 + .../gather_tree_shape_inference.cpp | 1 + .../interpolate_shape_inference.cpp | 1 + .../lstm_cell_shape_inference.cpp | 1 + .../make_shape_inference.cpp | 1 + .../matmul_shape_inference.cpp | 1 + .../one_hot_shape_inference.cpp | 1 + .../cpu/shape_inference_test/pad_test.cpp | 11 +- .../cpu/shape_inference_test/proposal.cpp | 17 +- .../cpu/shape_inference_test/range_test.cpp | 17 +- .../read_value_shape_inference.cpp | 1 + .../cpu/shape_inference_test/reduce_test.cpp | 2 +- .../shape_inference_test/region_yolo_test.cpp | 5 +- .../shape_inference_test/reorg_yolo_test.cpp | 3 +- .../reverse_sequence_shape_inference.cpp | 1 + .../roi_align_shape_inference.cpp | 9 +- .../roll_shape_inference.cpp | 18 +- ...catter_elements_update_shape_inference.cpp | 1 + .../scatter_nd_shape_inference.cpp | 1 + .../select_shape_inference.cpp | 1 + .../shape_inference_test/shape_node_tests.cpp | 2 +- .../shuffle_channels_shape_inference.cpp | 1 + .../space_to_batch_shape_inference.cpp | 1 + .../space_to_depth_shape_inference.cpp | 8 +- .../cpu/shape_inference_test/split_tests.cpp | 1 + .../strided_slice_test.cpp | 29 +- .../tile_shape_inference.cpp | 1 + .../cpu/shape_inference_test/topk_test.cpp | 1 + .../unit/cpu/shape_inference_test/utils.hpp | 18 +- .../variadic_split_tests.cpp | 1 + .../openvino/tools/benchmark/main.py | 2 +- tools/cross_check_tool/README.md | 2 +- .../openvino/tools/cross_check_tool/utils.py | 2 +- 329 files changed, 5649 insertions(+), 4849 deletions(-) rename src/plugins/intel_cpu/src/{descriptor.cpp => dnnl_descriptor.cpp} (65%) rename src/plugins/intel_cpu/src/{descriptor.h => dnnl_descriptor.h} (72%) rename src/plugins/intel_cpu/src/{extension_utils.cpp => dnnl_extension_utils.cpp} (79%) rename src/plugins/intel_cpu/src/{extension_utils.h => dnnl_extension_utils.h} (97%) delete mode 100644 src/plugins/intel_cpu/src/nodes/list.hpp diff --git a/docs/IE_PLUGIN_DG/ExecutableNetwork.md b/docs/IE_PLUGIN_DG/ExecutableNetwork.md index 5f703bcd880..0a7f7c7182a 100644 --- a/docs/IE_PLUGIN_DG/ExecutableNetwork.md +++ b/docs/IE_PLUGIN_DG/ExecutableNetwork.md @@ -38,7 +38,7 @@ The implementation `CompileNetwork` is fully device-specific. The function accepts a const shared pointer to `ngraph::Function` object and performs the following steps: 1. Applies ngraph passes using `TransformNetwork` function, which defines plugin-specific conversion pipeline. To support low precision inference, the pipeline can include Low Precision Transformations. These transformations are usually hardware specific. You can find how to use and configure Low Precisions Transformations in [Low Precision Transformations](@ref openvino_docs_IE_DG_lpt) guide. -2. Maps the transformed graph to a backend specific graph representation (for example, to MKLDNN graph for Intel CPU). +2. Maps the transformed graph to a backend specific graph representation (for example, to CPU plugin internal graph representation). 3. Allocates and fills memory for graph weights, backend specific memory handles and so on. @snippet src/template_executable_network.cpp executable_network:map_graph diff --git a/docs/IE_PLUGIN_DG/Plugin.md b/docs/IE_PLUGIN_DG/Plugin.md index 410bed856c9..a7dc7ecfd2b 100644 --- a/docs/IE_PLUGIN_DG/Plugin.md +++ b/docs/IE_PLUGIN_DG/Plugin.md @@ -2,7 +2,7 @@ Inference Engine Plugin usually represents a wrapper around a backend. Backends can be: - OpenCL-like backend (e.g. clDNN library) for GPU devices. -- MKLDNN backend for Intel CPU devices. +- oneDNN backend for Intel CPU devices. - NVIDIA cuDNN for NVIDIA GPUs. The responsibility of Inference Engine Plugin: diff --git a/samples/cpp/benchmark_app/main.cpp b/samples/cpp/benchmark_app/main.cpp index 4604d919fea..a609d951df4 100644 --- a/samples/cpp/benchmark_app/main.cpp +++ b/samples/cpp/benchmark_app/main.cpp @@ -210,9 +210,9 @@ int main(int argc, char* argv[]) { ov::Core core; if (FLAGS_d.find("CPU") != std::string::npos && !FLAGS_l.empty()) { - // CPU (MKLDNN) extensions is loaded as a shared library + // CPU plugin extensions is loaded as a shared library core.add_extension(FLAGS_l); - slog::info << "CPU (MKLDNN) extensions is loaded " << FLAGS_l << slog::endl; + slog::info << "CPU plugin extensions is loaded " << FLAGS_l << slog::endl; } // Load clDNN Extensions diff --git a/src/common/legacy/src/transformations/convert_opset1_to_legacy/convert_strided_slice_to_crop.cpp b/src/common/legacy/src/transformations/convert_opset1_to_legacy/convert_strided_slice_to_crop.cpp index c260625652a..adea1d8c3e7 100644 --- a/src/common/legacy/src/transformations/convert_opset1_to_legacy/convert_strided_slice_to_crop.cpp +++ b/src/common/legacy/src/transformations/convert_opset1_to_legacy/convert_strided_slice_to_crop.cpp @@ -202,7 +202,7 @@ ngraph::pass::ConvertStridedSliceToCropMatcher::ConvertStridedSliceToCropMatcher } auto data_node_shape = data_output.get_shape(); - // MKLDNN: "Crop supports only 2d, 4d and 5d blobs." + // Crop supports only 2d, 4d and 5d blobs if (data_node_shape.size() != 2 && data_node_shape.size() != 4 && data_node_shape.size() != 5) { return false; } diff --git a/src/common/snippets/src/pass/collapse_subgraph.cpp b/src/common/snippets/src/pass/collapse_subgraph.cpp index 0586d456eec..4491739e099 100644 --- a/src/common/snippets/src/pass/collapse_subgraph.cpp +++ b/src/common/snippets/src/pass/collapse_subgraph.cpp @@ -137,7 +137,7 @@ auto get_num_result_children(const std::shared_ptr &node) -> size_t } return result; } -// Need to update tensor name manually, since MKLDNNGraph::Replicate() looks at input.get_tensor().get_name(); +// Need to update tensor name manually, since intel_cpu::Graph::Replicate() looks at input.get_tensor().get_name(); // If subgraph->get_output_size() == 1, then the name will be restored correctly from the node name auto update_out_tensor_name(std::shared_ptr &subgraph) -> void { bool not_set = true; diff --git a/src/core/tests/runtime/ie/unit_test.manifest b/src/core/tests/runtime/ie/unit_test.manifest index a41ff45fa3a..953247df3e0 100644 --- a/src/core/tests/runtime/ie/unit_test.manifest +++ b/src/core/tests/runtime/ie/unit_test.manifest @@ -209,7 +209,7 @@ onnx_model_eye_like_dyn_rank # Constant network -# MKLDNNGraph::CreateGraph: No inputs for the topology +# intel_cpu::Graph::CreateGraph: No inputs for the topology onnx_size_op_single onnx_size_op_graph_end onnx_size_op_graph_middle @@ -496,7 +496,7 @@ relu_4Dbackprop # data [] doesn't exist parameter_as_output -# MKLDNNGraph::CreateGraph: No inputs for the topology +# intel_cpu::Graph::CreateGraph: No inputs for the topology range_v0_int32 range_v0_float32 range_v4_int32 diff --git a/src/inference/dev_api/threading/ie_executor_manager.hpp b/src/inference/dev_api/threading/ie_executor_manager.hpp index 35d9c6dd8b0..089766a26e0 100644 --- a/src/inference/dev_api/threading/ie_executor_manager.hpp +++ b/src/inference/dev_api/threading/ie_executor_manager.hpp @@ -25,7 +25,7 @@ namespace InferenceEngine { * @brief Interface for tasks execution manager. * This is global point for getting task executor objects by string id. * It's necessary in multiple asynchronous requests for having unique executors to avoid oversubscription. - * E.g. There 2 task executors for CPU device: one - in FPGA, another - in MKLDNN. Parallel execution both of them leads + * E.g. There 2 task executors for CPU device: one - in FPGA, another - in OneDNN. Parallel execution both of them leads * to not optimal CPU usage. More efficient to run the corresponding tasks one by one via single executor. * @ingroup ie_dev_api_threading */ diff --git a/src/plugins/intel_cpu/src/async_infer_request.cpp b/src/plugins/intel_cpu/src/async_infer_request.cpp index 0682138129e..b4768ae72d7 100644 --- a/src/plugins/intel_cpu/src/async_infer_request.cpp +++ b/src/plugins/intel_cpu/src/async_infer_request.cpp @@ -5,13 +5,13 @@ #include "async_infer_request.h" #include -ov::intel_cpu::MKLDNNAsyncInferRequest::MKLDNNAsyncInferRequest(const InferenceEngine::IInferRequestInternal::Ptr& inferRequest, - const InferenceEngine::ITaskExecutor::Ptr& taskExecutor, - const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor) +ov::intel_cpu::AsyncInferRequest::AsyncInferRequest(const InferenceEngine::IInferRequestInternal::Ptr& inferRequest, + const InferenceEngine::ITaskExecutor::Ptr& taskExecutor, + const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor) : InferenceEngine::AsyncInferRequestThreadSafeDefault(inferRequest, taskExecutor, callbackExecutor) { - static_cast(inferRequest.get())->SetAsyncRequest(this); + static_cast(inferRequest.get())->SetAsyncRequest(this); } -ov::intel_cpu::MKLDNNAsyncInferRequest::~MKLDNNAsyncInferRequest() { +ov::intel_cpu::AsyncInferRequest::~AsyncInferRequest() { StopAndWait(); } diff --git a/src/plugins/intel_cpu/src/async_infer_request.h b/src/plugins/intel_cpu/src/async_infer_request.h index a7522fdc615..8639cfc981e 100644 --- a/src/plugins/intel_cpu/src/async_infer_request.h +++ b/src/plugins/intel_cpu/src/async_infer_request.h @@ -12,12 +12,12 @@ namespace ov { namespace intel_cpu { -class MKLDNNAsyncInferRequest : public InferenceEngine::AsyncInferRequestThreadSafeDefault { +class AsyncInferRequest : public InferenceEngine::AsyncInferRequestThreadSafeDefault { public: - MKLDNNAsyncInferRequest(const InferenceEngine::IInferRequestInternal::Ptr &inferRequest, - const InferenceEngine::ITaskExecutor::Ptr &taskExecutor, - const InferenceEngine::ITaskExecutor::Ptr &callbackExecutor); - ~MKLDNNAsyncInferRequest(); + AsyncInferRequest(const InferenceEngine::IInferRequestInternal::Ptr &inferRequest, + const InferenceEngine::ITaskExecutor::Ptr &taskExecutor, + const InferenceEngine::ITaskExecutor::Ptr &callbackExecutor); + ~AsyncInferRequest(); }; } // namespace intel_cpu diff --git a/src/plugins/intel_cpu/src/cache/multi_cache.cpp b/src/plugins/intel_cpu/src/cache/multi_cache.cpp index 172420ff73c..e2fe38fc0ae 100644 --- a/src/plugins/intel_cpu/src/cache/multi_cache.cpp +++ b/src/plugins/intel_cpu/src/cache/multi_cache.cpp @@ -4,6 +4,10 @@ #include "multi_cache.h" -using namespace ov::intel_cpu; +namespace ov { +namespace intel_cpu { -std::atomic_size_t MultiCache::_typeIdCounter{0}; \ No newline at end of file +std::atomic_size_t MultiCache::_typeIdCounter{0}; + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/cpu_memory.cpp b/src/plugins/intel_cpu/src/cpu_memory.cpp index a54d5549416..bf127acf9b0 100644 --- a/src/plugins/intel_cpu/src/cpu_memory.cpp +++ b/src/plugins/intel_cpu/src/cpu_memory.cpp @@ -34,12 +34,12 @@ namespace { } } // namespace -MKLDNNMemory::MKLDNNMemory(const mkldnn::engine& eng) : +Memory::Memory(const mkldnn::engine& eng) : eng(eng), mgrHandle(std::make_shared(std::unique_ptr(new MemoryMngrWithReuse())), this) {} -MKLDNNMemory::MKLDNNMemory(const mkldnn::engine& eng, std::unique_ptr mngr) : +Memory::Memory(const mkldnn::engine& eng, std::unique_ptr mngr) : eng(eng), mgrHandle(std::make_shared(std::move(mngr)), this) {} -size_t MKLDNNMemory::GetSize() const { +size_t Memory::GetSize() const { auto size = getDesc().getCurrentMemSize(); if (size == MemoryDesc::UNDEFINED_SIZE) { IE_THROW() << "Can't get memory size for undefined shape"; @@ -47,8 +47,8 @@ size_t MKLDNNMemory::GetSize() const { return size; } -void MKLDNNMemory::Create(const mkldnn::memory::desc& desc, const void *data, bool pads_zeroing) { - // MKLDNN accepts not a const data, probably need to remove some level of consteness in a call stack +void Memory::Create(const mkldnn::memory::desc& desc, const void *data, bool pads_zeroing) { + // OneDNN accepts not a const data, probably need to remove some level of consteness in a call stack // ======================== // Equivalent of constructor memory(const primitive_desc &desc, void *hdl) @@ -64,11 +64,11 @@ void MKLDNNMemory::Create(const mkldnn::memory::desc& desc, const void *data, bo } } -void MKLDNNMemory::Create(const MemoryDesc &desc, const void *data, bool pads_zeroing) { +void Memory::Create(const MemoryDesc &desc, const void *data, bool pads_zeroing) { Create(desc.clone(), data, pads_zeroing); } -void MKLDNNMemory::Create(MemoryDescPtr desc, const void* data, bool pads_zeroing) { +void Memory::Create(MemoryDescPtr desc, const void* data, bool pads_zeroing) { pMemDesc = desc; size_t memSize = MemoryDesc::UNDEFINED_SIZE; @@ -93,8 +93,8 @@ void MKLDNNMemory::Create(MemoryDescPtr desc, const void* data, bool pads_zeroin } } -void MKLDNNMemory::SetData(const MKLDNNMemory& src, bool ftz) const { - MKLDNNReorderNode::reorderData(src, *this); +void Memory::SetData(const Memory& src, bool ftz) const { + node::Reorder::reorderData(src, *this); if (ftz && src.GetDataType() == memory::data_type::f32 @@ -109,13 +109,13 @@ void MKLDNNMemory::SetData(const MKLDNNMemory& src, bool ftz) const { } } -void MKLDNNMemory::FillZero() { +void Memory::FillZero() { void* dataPtr = GetData(); if (dataPtr != nullptr) memset(dataPtr, 0, getDesc().getMaxMemSize()); } -void *MKLDNNMemory::GetPtr() const { +void *Memory::GetPtr() const { auto ptr = static_cast(GetData()); const mkldnn_memory_desc_t md = prim->get_desc().data; mkldnn::impl::memory_desc_wrapper wrapper(md); @@ -123,7 +123,7 @@ void *MKLDNNMemory::GetPtr() const { return ptr; } -void MKLDNNMemory::redefineDesc(MemoryDescPtr desc) { +void Memory::redefineDesc(MemoryDescPtr desc) { if (!desc->hasDefinedMaxSize()) { IE_THROW() << "Can not reset descriptor, memory upper bound is unknown."; } @@ -132,27 +132,27 @@ void MKLDNNMemory::redefineDesc(MemoryDescPtr desc) { } template<> -DnnlMemoryDescPtr MKLDNNMemory::GetDescWithType() const { +DnnlMemoryDescPtr Memory::GetDescWithType() const { return MemoryDescUtils::convertToDnnlMemoryDesc(pMemDesc); } -void MKLDNNMemory::setDataHandle(void *data) { +void Memory::setDataHandle(void *data) { size_t maxMemSize = pMemDesc->hasDefinedMaxSize() ? pMemDesc->getMaxMemSize() : 0; mgrHandle->setExtBuff(data, maxMemSize); prim->set_data_handle(mgrHandle->getRawPtr()); // for pads zeroing, to preserve mkldnn::memory::set_data_handle behaviour } -void MKLDNNMemory::update() { +void Memory::update() { if (isAllocated()) { prim->set_data_handle_no_pads_proc(mgrHandle->getRawPtr()); } } -void MKLDNNMemory::Create(const MemoryDesc &desc, DnnlMemoryMngrPtr memMgr) { +void Memory::Create(const MemoryDesc &desc, DnnlMemoryMngrPtr memMgr) { Create(desc.clone(), memMgr); } -void MKLDNNMemory::Create(MemoryDescPtr desc, DnnlMemoryMngrPtr memMgr) { +void Memory::Create(MemoryDescPtr desc, DnnlMemoryMngrPtr memMgr) { mgrHandle = DnnlMemMngrHandle(memMgr, this); bool memAllocated = mgrHandle->getRawPtr(); @@ -160,7 +160,7 @@ void MKLDNNMemory::Create(MemoryDescPtr desc, DnnlMemoryMngrPtr memMgr) { } template<> -BlockedMemoryDescPtr MKLDNNMemory::GetDescWithType() const { +BlockedMemoryDescPtr Memory::GetDescWithType() const { return MemoryDescUtils::convertToBlockedMemoryDesc(pMemDesc); } @@ -221,13 +221,13 @@ bool DnnlMemoryMngr::hasExtBuffer() const noexcept { return _pMemMngr->hasExtBuffer(); } -void DnnlMemoryMngr::registerMemory(MKLDNNMemory* memPtr) { +void DnnlMemoryMngr::registerMemory(Memory* memPtr) { if (memPtr) { _setMemPtrs.insert(memPtr); } } -void DnnlMemoryMngr::unregisterMemory(MKLDNNMemory* memPtr) { +void DnnlMemoryMngr::unregisterMemory(Memory* memPtr) { if (memPtr) { _setMemPtrs.erase(memPtr); } diff --git a/src/plugins/intel_cpu/src/cpu_memory.h b/src/plugins/intel_cpu/src/cpu_memory.h index 3887f1ae1ac..8196bd1d32e 100644 --- a/src/plugins/intel_cpu/src/cpu_memory.h +++ b/src/plugins/intel_cpu/src/cpu_memory.h @@ -6,7 +6,7 @@ #include "ie_layouts.h" #include "memory_desc/cpu_memory_desc.h" -#include "extension_utils.h" +#include "dnnl_extension_utils.h" #include "memory_desc/cpu_memory_desc_utils.h" #include #include @@ -23,7 +23,7 @@ /** * @file contains a concept classes to work with memory/tensor/blob abstractions on plugin level. * - * MKLDNNMemory is an abstraction of some real tensor which contains some data. As in short it's a pair of + * Memory is an abstraction of some real tensor which contains some data. As in short it's a pair of * memory descriptor and raw buffer handler to contains data. In case of system memory raw buffer it's simple * "void*" on some system memory buffer. * @@ -32,7 +32,7 @@ namespace ov { namespace intel_cpu { -class MKLDNNMemory; +class Memory; /** * @interface IMemoryMngr @@ -100,14 +100,14 @@ public: void setExtBuff(void* ptr, size_t size) override; bool resize(size_t size) override; bool hasExtBuffer() const noexcept override; - void registerMemory(MKLDNNMemory* memPtr); - void unregisterMemory(MKLDNNMemory* memPtr); + void registerMemory(Memory* memPtr); + void unregisterMemory(Memory* memPtr); private: void notifyUpdate(); private: - std::unordered_set _setMemPtrs; + std::unordered_set _setMemPtrs; std::unique_ptr _pMemMngr; }; @@ -116,7 +116,7 @@ using DnnlMemoryMngrCPtr = std::shared_ptr; class DnnlMemMngrHandle { public: - DnnlMemMngrHandle(DnnlMemoryMngrPtr pMgr, MKLDNNMemory* pMem) : _pMgr(pMgr), _pMem(pMem) { + DnnlMemMngrHandle(DnnlMemoryMngrPtr pMgr, Memory* pMem) : _pMgr(pMgr), _pMem(pMem) { if (_pMgr) { _pMgr->registerMemory(_pMem); } @@ -151,19 +151,19 @@ public: private: DnnlMemoryMngrPtr _pMgr = nullptr; - MKLDNNMemory* _pMem = nullptr; + Memory* _pMem = nullptr; }; -class MKLDNNMemory { +class Memory { public: - explicit MKLDNNMemory(const mkldnn::engine& eng); - MKLDNNMemory(const mkldnn::engine& eng, std::unique_ptr mngr); + explicit Memory(const mkldnn::engine& eng); + Memory(const mkldnn::engine& eng, std::unique_ptr mngr); - MKLDNNMemory(const MKLDNNMemory&) = delete; - MKLDNNMemory& operator= (const MKLDNNMemory&) = delete; + Memory(const Memory&) = delete; + Memory& operator= (const Memory&) = delete; - MKLDNNMemory(MKLDNNMemory&&) = delete; - MKLDNNMemory& operator= (MKLDNNMemory&&) = delete; + Memory(Memory&&) = delete; + Memory& operator= (Memory&&) = delete; mkldnn::memory GetPrimitive() const { if (isAllocated()) { @@ -213,7 +213,7 @@ public: void* GetPtr() const; mkldnn::memory::data_type GetDataType() const { - return MKLDNNExtensionUtils::IEPrecisionToDataType(getDesc().getPrecision()); + return DnnlExtensionUtils::IEPrecisionToDataType(getDesc().getPrecision()); } size_t GetSize() const; @@ -233,7 +233,7 @@ public: // Caution!!! This action invalidates the previous data layout. The old data may become unreachable. void redefineDesc(MemoryDescPtr desc); - void SetData(const MKLDNNMemory& memory, bool ftz = true) const; + void SetData(const Memory& memory, bool ftz = true) const; void FillZero(); const VectorDims& getStaticDims() const { @@ -266,8 +266,8 @@ private: DnnlMemMngrHandle mgrHandle; }; -using MKLDNNMemoryPtr = std::shared_ptr; -using MKLDNNMemoryCPtr = std::shared_ptr; +using MemoryPtr = std::shared_ptr; +using MemoryCPtr = std::shared_ptr; } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/cpu_shape.cpp b/src/plugins/intel_cpu/src/cpu_shape.cpp index 79318c343d4..8e018240a90 100644 --- a/src/plugins/intel_cpu/src/cpu_shape.cpp +++ b/src/plugins/intel_cpu/src/cpu_shape.cpp @@ -6,7 +6,8 @@ #include "utils/general_utils.h" #include "memory_desc/cpu_memory_desc_utils.h" -using namespace ov::intel_cpu; +namespace ov { +namespace intel_cpu { bool Shape::isCompatible(const VectorDims &vecDims) const { if (getRank() != vecDims.size()) { @@ -47,3 +48,6 @@ std::string Shape::toString() const { output << "}"; return output.str(); } + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/cpu_types.cpp b/src/plugins/intel_cpu/src/cpu_types.cpp index f67e9f4135b..3d00f214222 100644 --- a/src/plugins/intel_cpu/src/cpu_types.cpp +++ b/src/plugins/intel_cpu/src/cpu_types.cpp @@ -13,184 +13,184 @@ using Dim = std::size_t; using VectorDims = std::vector; const InferenceEngine::details::caseless_unordered_map type_to_name_tbl = { - { "Constant", Input }, - { "Parameter", Input }, - { "Result", Output }, - { "Convolution", Convolution }, - { "GroupConvolution", Convolution }, - { "MatMul", MatMul }, - { "FullyConnected", FullyConnected }, - { "MaxPool", Pooling }, - { "AvgPool", Pooling }, - { "AdaptiveMaxPool", AdaptivePooling}, - { "AdaptiveAvgPool", AdaptivePooling}, - { "Add", Eltwise }, - { "Subtract", Eltwise }, - { "Multiply", Eltwise }, - { "Divide", Eltwise }, - { "SquaredDifference", Eltwise }, - { "Maximum", Eltwise }, - { "Minimum", Eltwise }, - { "Mod", Eltwise }, - { "FloorMod", Eltwise }, - { "Power", Eltwise }, - { "PowerStatic", Eltwise }, - { "Equal", Eltwise }, - { "NotEqual", Eltwise }, - { "Greater", Eltwise }, - { "GreaterEqual", Eltwise }, - { "Less", Eltwise }, - { "LessEqual", Eltwise }, - { "LogicalAnd", Eltwise }, - { "LogicalOr", Eltwise }, - { "LogicalXor", Eltwise }, - { "LogicalNot", Eltwise }, - { "Relu", Eltwise }, - { "LeakyRelu", Eltwise }, - { "Gelu", Eltwise }, - { "Elu", Eltwise }, - { "Tanh", Eltwise }, - { "Sigmoid", Eltwise }, - { "Abs", Eltwise }, - { "Sqrt", Eltwise }, - { "Clamp", Eltwise }, - { "Exp", Eltwise }, - { "SwishCPU", Eltwise }, - { "HSwish", Eltwise }, - { "Mish", Eltwise }, - { "HSigmoid", Eltwise }, - { "Round", Eltwise }, - { "PRelu", Eltwise }, - { "Erf", Eltwise }, - { "SoftPlus", Eltwise }, - { "Reshape", Reshape }, - { "Squeeze", Reshape }, - { "Unsqueeze", Reshape }, - { "ShapeOf", ShapeOf }, - { "NonZero", NonZero }, - { "Softmax", Softmax }, - { "Reorder", Reorder }, - { "BatchToSpace", BatchToSpace }, - { "SpaceToBatch", SpaceToBatch }, - { "DepthToSpace", DepthToSpace }, - { "SpaceToDepth", SpaceToDepth }, - { "Roll", Roll }, - { "LRN", Lrn }, - { "Split", Split }, - { "VariadicSplit", Split }, - { "Concat", Concatenation }, - { "ConvolutionBackpropData", Deconvolution }, - { "GroupConvolutionBackpropData", Deconvolution }, - { "StridedSlice", StridedSlice }, - { "Slice", StridedSlice }, - { "Tile", Tile }, - { "ROIAlign", ROIAlign }, - { "ROIPooling", ROIPooling }, - { "PSROIPooling", PSROIPooling }, - { "DeformablePSROIPooling", PSROIPooling }, - { "Pad", Pad }, - { "Transpose", Transpose }, - { "LSTMCell", RNNCell }, - { "GRUCell", RNNCell }, - { "RNNCell", RNNCell }, - { "LSTMSequence", RNNSeq }, - { "GRUSequence", RNNSeq }, - { "RNNSequence", RNNSeq }, - { "FakeQuantize", FakeQuantize }, - { "BinaryConvolution", BinaryConvolution }, - { "DeformableConvolution", DeformableConvolution }, - { "TensorIterator", TensorIterator }, - { "Loop", TensorIterator }, - { "ReadValue", MemoryInput}, // for construction from name ctor, arbitrary name is used - { "Assign", MemoryOutput }, // for construction from layer ctor - { "Convert", Convert }, - { "NV12toRGB", ColorConvert }, - { "NV12toBGR", ColorConvert }, - { "I420toRGB", ColorConvert }, - { "I420toBGR", ColorConvert }, - { "MVN", MVN}, - { "NormalizeL2", NormalizeL2}, - { "ScatterUpdate", ScatterUpdate}, - { "ScatterElementsUpdate", ScatterElementsUpdate}, - { "ScatterNDUpdate", ScatterNDUpdate}, - { "Interpolate", Interpolate}, - { "ReduceL1", Reduce}, - { "ReduceL2", Reduce}, - { "ReduceLogicalAnd", Reduce}, - { "ReduceLogicalOr", Reduce}, - { "ReduceMax", Reduce}, - { "ReduceMean", Reduce}, - { "ReduceMin", Reduce}, - { "ReduceProd", Reduce}, - { "ReduceSum", Reduce}, - { "ReduceLogSum", Reduce}, - { "ReduceLogSumExp", Reduce}, - { "ReduceSumSquare", Reduce}, - { "Broadcast", Broadcast}, - { "EmbeddingSegmentsSum", EmbeddingSegmentsSum}, - { "EmbeddingBagPackedSum", EmbeddingBagPackedSum}, - { "EmbeddingBagOffsetsSum", EmbeddingBagOffsetsSum}, - { "Gather", Gather}, - { "GatherElements", GatherElements}, - { "GatherND", GatherND}, - { "OneHot", OneHot}, - { "RegionYolo", RegionYolo}, - { "Select", Select}, - { "ShuffleChannels", ShuffleChannels}, - { "DFT", DFT}, - { "IDFT", DFT}, - { "Abs", Math}, - { "Acos", Math}, - { "Acosh", Math}, - { "Asin", Math}, - { "Asinh", Math}, - { "Atan", Math}, - { "Atanh", Math}, - { "Ceil", Math}, - { "Ceiling", Math}, - { "Cos", Math}, - { "Cosh", Math}, - { "Floor", Math}, - { "HardSigmoid", Math}, - { "If", If}, - { "Log", Math}, - { "Neg", Math}, - { "Reciprocal", Math}, - { "Selu", Math}, - { "Sign", Math}, - { "Sin", Math}, - { "Sinh", Math}, - { "SoftPlus", Math}, - { "Softsign", Math}, - { "Tan", Math}, - { "CTCLoss", CTCLoss}, - { "Bucketize", Bucketize}, - { "CTCGreedyDecoder", CTCGreedyDecoder}, - { "CTCGreedyDecoderSeqLen", CTCGreedyDecoderSeqLen}, - { "CumSum", CumSum}, - { "DetectionOutput", DetectionOutput}, - { "ExperimentalDetectronDetectionOutput", ExperimentalDetectronDetectionOutput}, - { "LogSoftmax", LogSoftmax}, - { "TopK", TopK}, - { "GatherTree", GatherTree}, - { "GRN", GRN}, - { "Range", Range}, - { "Proposal", Proposal}, - { "ReorgYolo", ReorgYolo}, - { "ReverseSequence", ReverseSequence}, - { "ExperimentalDetectronTopKROIs", ExperimentalDetectronTopKROIs}, - { "ExperimentalDetectronROIFeatureExtractor", ExperimentalDetectronROIFeatureExtractor}, - { "ExperimentalDetectronPriorGridGenerator", ExperimentalDetectronPriorGridGenerator}, - { "ExperimentalDetectronGenerateProposalsSingleImage", ExperimentalDetectronGenerateProposalsSingleImage}, - { "ExtractImagePatches", ExtractImagePatches}, - { "NonMaxSuppression", NonMaxSuppression}, - { "NonMaxSuppressionIEInternal", NonMaxSuppression}, - { "MatrixNms", MatrixNms}, - { "MulticlassNms", MulticlassNms}, - { "Reference", Reference}, - { "Subgraph", Subgraph}, - { "PriorBox", PriorBox}, - { "PriorBoxClustered", PriorBoxClustered}, + { "Constant", Type::Input }, + { "Parameter", Type::Input }, + { "Result", Type::Output }, + { "Convolution", Type::Convolution }, + { "GroupConvolution", Type::Convolution }, + { "MatMul", Type::MatMul }, + { "FullyConnected", Type::FullyConnected }, + { "MaxPool", Type::Pooling }, + { "AvgPool", Type::Pooling }, + { "AdaptiveMaxPool", Type::AdaptivePooling}, + { "AdaptiveAvgPool", Type::AdaptivePooling}, + { "Add", Type::Eltwise }, + { "Subtract", Type::Eltwise }, + { "Multiply", Type::Eltwise }, + { "Divide", Type::Eltwise }, + { "SquaredDifference", Type::Eltwise }, + { "Maximum", Type::Eltwise }, + { "Minimum", Type::Eltwise }, + { "Mod", Type::Eltwise }, + { "FloorMod", Type::Eltwise }, + { "Power", Type::Eltwise }, + { "PowerStatic", Type::Eltwise }, + { "Equal", Type::Eltwise }, + { "NotEqual", Type::Eltwise }, + { "Greater", Type::Eltwise }, + { "GreaterEqual", Type::Eltwise }, + { "Less", Type::Eltwise }, + { "LessEqual", Type::Eltwise }, + { "LogicalAnd", Type::Eltwise }, + { "LogicalOr", Type::Eltwise }, + { "LogicalXor", Type::Eltwise }, + { "LogicalNot", Type::Eltwise }, + { "Relu", Type::Eltwise }, + { "LeakyRelu", Type::Eltwise }, + { "Gelu", Type::Eltwise }, + { "Elu", Type::Eltwise }, + { "Tanh", Type::Eltwise }, + { "Sigmoid", Type::Eltwise }, + { "Abs", Type::Eltwise }, + { "Sqrt", Type::Eltwise }, + { "Clamp", Type::Eltwise }, + { "Exp", Type::Eltwise }, + { "SwishCPU", Type::Eltwise }, + { "HSwish", Type::Eltwise }, + { "Mish", Type::Eltwise }, + { "HSigmoid", Type::Eltwise }, + { "Round", Type::Eltwise }, + { "PRelu", Type::Eltwise }, + { "Erf", Type::Eltwise }, + { "SoftPlus", Type::Eltwise }, + { "Reshape", Type::Reshape }, + { "Squeeze", Type::Reshape }, + { "Unsqueeze", Type::Reshape }, + { "ShapeOf", Type::ShapeOf }, + { "NonZero", Type::NonZero }, + { "Softmax", Type::Softmax }, + { "Reorder", Type::Reorder }, + { "BatchToSpace", Type::BatchToSpace }, + { "SpaceToBatch", Type::SpaceToBatch }, + { "DepthToSpace", Type::DepthToSpace }, + { "SpaceToDepth", Type::SpaceToDepth }, + { "Roll", Type::Roll }, + { "LRN", Type::Lrn }, + { "Split", Type::Split }, + { "VariadicSplit", Type::Split }, + { "Concat", Type::Concatenation }, + { "ConvolutionBackpropData", Type::Deconvolution }, + { "GroupConvolutionBackpropData", Type::Deconvolution }, + { "StridedSlice", Type::StridedSlice }, + { "Slice", Type::StridedSlice }, + { "Tile", Type::Tile }, + { "ROIAlign", Type::ROIAlign }, + { "ROIPooling", Type::ROIPooling }, + { "PSROIPooling", Type::PSROIPooling }, + { "DeformablePSROIPooling", Type::PSROIPooling }, + { "Pad", Type::Pad }, + { "Transpose", Type::Transpose }, + { "LSTMCell", Type::RNNCell }, + { "GRUCell", Type::RNNCell }, + { "RNNCell", Type::RNNCell }, + { "LSTMSequence", Type::RNNSeq }, + { "GRUSequence", Type::RNNSeq }, + { "RNNSequence", Type::RNNSeq }, + { "FakeQuantize", Type::FakeQuantize }, + { "BinaryConvolution", Type::BinaryConvolution }, + { "DeformableConvolution", Type::DeformableConvolution }, + { "TensorIterator", Type::TensorIterator }, + { "Loop", Type::TensorIterator }, + { "ReadValue", Type::MemoryInput}, // for construction from name ctor, arbitrary name is used + { "Assign", Type::MemoryOutput }, // for construction from layer ctor + { "Convert", Type::Convert }, + { "NV12toRGB", Type::ColorConvert }, + { "NV12toBGR", Type::ColorConvert }, + { "I420toRGB", Type::ColorConvert }, + { "I420toBGR", Type::ColorConvert }, + { "MVN", Type::MVN}, + { "NormalizeL2", Type::NormalizeL2}, + { "ScatterUpdate", Type::ScatterUpdate}, + { "ScatterElementsUpdate", Type::ScatterElementsUpdate}, + { "ScatterNDUpdate", Type::ScatterNDUpdate}, + { "Interpolate", Type::Interpolate}, + { "ReduceL1", Type::Reduce}, + { "ReduceL2", Type::Reduce}, + { "ReduceLogicalAnd", Type::Reduce}, + { "ReduceLogicalOr", Type::Reduce}, + { "ReduceMax", Type::Reduce}, + { "ReduceMean", Type::Reduce}, + { "ReduceMin", Type::Reduce}, + { "ReduceProd", Type::Reduce}, + { "ReduceSum", Type::Reduce}, + { "ReduceLogSum", Type::Reduce}, + { "ReduceLogSumExp", Type::Reduce}, + { "ReduceSumSquare", Type::Reduce}, + { "Broadcast", Type::Broadcast}, + { "EmbeddingSegmentsSum", Type::EmbeddingSegmentsSum}, + { "EmbeddingBagPackedSum", Type::EmbeddingBagPackedSum}, + { "EmbeddingBagOffsetsSum", Type::EmbeddingBagOffsetsSum}, + { "Gather", Type::Gather}, + { "GatherElements", Type::GatherElements}, + { "GatherND", Type::GatherND}, + { "OneHot", Type::OneHot}, + { "RegionYolo", Type::RegionYolo}, + { "Select", Type::Select}, + { "ShuffleChannels", Type::ShuffleChannels}, + { "DFT", Type::DFT}, + { "IDFT", Type::DFT}, + { "Abs", Type::Math}, + { "Acos", Type::Math}, + { "Acosh", Type::Math}, + { "Asin", Type::Math}, + { "Asinh", Type::Math}, + { "Atan", Type::Math}, + { "Atanh", Type::Math}, + { "Ceil", Type::Math}, + { "Ceiling", Type::Math}, + { "Cos", Type::Math}, + { "Cosh", Type::Math}, + { "Floor", Type::Math}, + { "HardSigmoid", Type::Math}, + { "If", Type::If}, + { "Log", Type::Math}, + { "Neg", Type::Math}, + { "Reciprocal", Type::Math}, + { "Selu", Type::Math}, + { "Sign", Type::Math}, + { "Sin", Type::Math}, + { "Sinh", Type::Math}, + { "SoftPlus", Type::Math}, + { "Softsign", Type::Math}, + { "Tan", Type::Math}, + { "CTCLoss", Type::CTCLoss}, + { "Bucketize", Type::Bucketize}, + { "CTCGreedyDecoder", Type::CTCGreedyDecoder}, + { "CTCGreedyDecoderSeqLen", Type::CTCGreedyDecoderSeqLen}, + { "CumSum", Type::CumSum}, + { "DetectionOutput", Type::DetectionOutput}, + { "ExperimentalDetectronDetectionOutput", Type::ExperimentalDetectronDetectionOutput}, + { "LogSoftmax", Type::LogSoftmax}, + { "TopK", Type::TopK}, + { "GatherTree", Type::GatherTree}, + { "GRN", Type::GRN}, + { "Range", Type::Range}, + { "Proposal", Type::Proposal}, + { "ReorgYolo", Type::ReorgYolo}, + { "ReverseSequence", Type::ReverseSequence}, + { "ExperimentalDetectronTopKROIs", Type::ExperimentalDetectronTopKROIs}, + { "ExperimentalDetectronROIFeatureExtractor", Type::ExperimentalDetectronROIFeatureExtractor}, + { "ExperimentalDetectronPriorGridGenerator", Type::ExperimentalDetectronPriorGridGenerator}, + { "ExperimentalDetectronGenerateProposalsSingleImage", Type::ExperimentalDetectronGenerateProposalsSingleImage}, + { "ExtractImagePatches", Type::ExtractImagePatches}, + { "NonMaxSuppression", Type::NonMaxSuppression}, + { "NonMaxSuppressionIEInternal", Type::NonMaxSuppression}, + { "MatrixNms", Type::MatrixNms}, + { "MulticlassNms", Type::MulticlassNms}, + { "Reference", Type::Reference}, + { "Subgraph", Type::Subgraph}, + { "PriorBox", Type::PriorBox}, + { "PriorBoxClustered", Type::PriorBoxClustered}, }; Type TypeFromName(const std::string& type) { @@ -198,183 +198,183 @@ Type TypeFromName(const std::string& type) { if (type_to_name_tbl.end() != itType) { return itType->second; } else { - return Unknown; + return Type::Unknown; } } std::string NameFromType(const Type type) { switch (type) { - case Generic: + case Type::Generic: return "Generic"; - case Reorder: + case Type::Reorder: return "Reorder"; - case Input: + case Type::Input: return "Input"; - case Output: + case Type::Output: return "Output"; - case Convolution: + case Type::Convolution: return "Convolution"; - case Deconvolution: + case Type::Deconvolution: return "Deconvolution"; - case Lrn: + case Type::Lrn: return "Lrn"; - case Pooling: + case Type::Pooling: return "Pooling"; - case AdaptivePooling: + case Type::AdaptivePooling: return "AdaptivePooling"; - case FullyConnected: + case Type::FullyConnected: return "FullyConnected"; - case MatMul: + case Type::MatMul: return "MatMul"; - case Softmax: + case Type::Softmax: return "Softmax"; - case Split: + case Type::Split: return "Split"; - case Concatenation: + case Type::Concatenation: return "Concatenation"; - case StridedSlice: + case Type::StridedSlice: return "StridedSlice"; - case Reshape: + case Type::Reshape: return "Reshape"; - case ShapeOf: + case Type::ShapeOf: return "ShapeOf"; - case NonZero: + case Type::NonZero: return "NonZero"; - case Tile: + case Type::Tile: return "Tile"; - case ROIAlign: + case Type::ROIAlign: return "ROIAlign"; - case ROIPooling: + case Type::ROIPooling: return "ROIPooling"; - case PSROIPooling: + case Type::PSROIPooling: return "PSROIPooling"; - case DepthToSpace: + case Type::DepthToSpace: return "DepthToSpace"; - case BatchToSpace: + case Type::BatchToSpace: return "BatchToSpace"; - case Pad: + case Type::Pad: return "Pad"; - case Transpose: + case Type::Transpose: return "Transpose"; - case SpaceToDepth: + case Type::SpaceToDepth: return "SpaceToDepth"; - case SpaceToBatch: + case Type::SpaceToBatch: return "SpaceToBatch"; - case MemoryOutput: + case Type::MemoryOutput: return "MemoryOutput"; - case MemoryInput: + case Type::MemoryInput: return "MemoryInput"; - case RNNSeq: + case Type::RNNSeq: return "RNNSeq"; - case RNNCell: + case Type::RNNCell: return "RNNCell"; - case Eltwise: + case Type::Eltwise: return "Eltwise"; - case FakeQuantize: + case Type::FakeQuantize: return "FakeQuantize"; - case BinaryConvolution: + case Type::BinaryConvolution: return "BinaryConvolution"; - case DeformableConvolution: + case Type::DeformableConvolution: return "DeformableConvolution"; - case MVN: + case Type::MVN: return "MVN"; - case TensorIterator: + case Type::TensorIterator: return "TensorIterator"; - case Convert: + case Type::Convert: return "Convert"; - case ColorConvert: + case Type::ColorConvert: return "ColorConvert"; - case NormalizeL2: + case Type::NormalizeL2: return "NormalizeL2"; - case ScatterUpdate: + case Type::ScatterUpdate: return "ScatterUpdate"; - case ScatterElementsUpdate: + case Type::ScatterElementsUpdate: return "ScatterElementsUpdate"; - case ScatterNDUpdate: + case Type::ScatterNDUpdate: return "ScatterNDUpdate"; - case Interpolate: + case Type::Interpolate: return "Interpolate"; - case Reduce: + case Type::Reduce: return "Reduce"; - case Broadcast: + case Type::Broadcast: return "Broadcast"; - case EmbeddingSegmentsSum: + case Type::EmbeddingSegmentsSum: return "EmbeddingSegmentsSum"; - case EmbeddingBagPackedSum: + case Type::EmbeddingBagPackedSum: return "EmbeddingBagPackedSum"; - case EmbeddingBagOffsetsSum: + case Type::EmbeddingBagOffsetsSum: return "EmbeddingBagOffsetsSum"; - case Gather: + case Type::Gather: return "Gather"; - case GatherElements: + case Type::GatherElements: return "GatherElements"; - case GatherND: + case Type::GatherND: return "GatherND"; - case OneHot: + case Type::OneHot: return "OneHot"; - case RegionYolo: + case Type::RegionYolo: return "RegionYolo"; - case Select: + case Type::Select: return "Select"; - case Roll: + case Type::Roll: return "Roll"; - case ShuffleChannels: + case Type::ShuffleChannels: return "ShuffleChannels"; - case DFT: + case Type::DFT: return "DFT"; - case Math: + case Type::Math: return "Math"; - case CTCLoss: + case Type::CTCLoss: return "CTCLoss"; - case Bucketize: + case Type::Bucketize: return "Bucketize"; - case CTCGreedyDecoder: + case Type::CTCGreedyDecoder: return "CTCGreedyDecoder"; - case CTCGreedyDecoderSeqLen: + case Type::CTCGreedyDecoderSeqLen: return "CTCGreedyDecoderSeqLen"; - case CumSum: + case Type::CumSum: return "CumSum"; - case DetectionOutput: + case Type::DetectionOutput: return "DetectionOutput"; - case ExperimentalDetectronDetectionOutput: + case Type::ExperimentalDetectronDetectionOutput: return "ExperimentalDetectronDetectionOutput"; - case If: + case Type::If: return "If"; - case LogSoftmax: + case Type::LogSoftmax: return "LogSoftmax"; - case TopK: + case Type::TopK: return "TopK"; - case GatherTree: + case Type::GatherTree: return "GatherTree"; - case GRN: + case Type::GRN: return "GRN"; - case Range: + case Type::Range: return "Range"; - case Proposal: + case Type::Proposal: return "Proposal"; - case ReorgYolo: + case Type::ReorgYolo: return "ReorgYolo"; - case ReverseSequence: + case Type::ReverseSequence: return "ReverseSequence"; - case ExperimentalDetectronTopKROIs: + case Type::ExperimentalDetectronTopKROIs: return "ExperimentalDetectronTopKROIs"; - case ExperimentalDetectronROIFeatureExtractor: + case Type::ExperimentalDetectronROIFeatureExtractor: return "ExperimentalDetectronROIFeatureExtractor"; - case ExperimentalDetectronPriorGridGenerator: + case Type::ExperimentalDetectronPriorGridGenerator: return "ExperimentalDetectronPriorGridGenerator"; - case ExperimentalDetectronGenerateProposalsSingleImage: + case Type::ExperimentalDetectronGenerateProposalsSingleImage: return "ExperimentalDetectronGenerateProposalsSingleImage"; - case ExtractImagePatches: + case Type::ExtractImagePatches: return "ExtractImagePatches"; - case NonMaxSuppression: + case Type::NonMaxSuppression: return "NonMaxSuppression"; - case MatrixNms: + case Type::MatrixNms: return "MatrixNms"; - case MulticlassNms: + case Type::MulticlassNms: return "MulticlassNms"; - case Reference: + case Type::Reference: return "Reference"; - case Subgraph: + case Type::Subgraph: return "Subgraph"; default: return "Unknown"; @@ -382,8 +382,8 @@ std::string NameFromType(const Type type) { } std::string algToString(const Algorithm alg) { -#define CASE(_alg) do { \ - if (alg == _alg) return #_alg; \ +#define CASE(_alg) do { \ + if (alg == Algorithm::_alg) return #_alg; \ } while (0) CASE(Default); CASE(PoolingMax); diff --git a/src/plugins/intel_cpu/src/cpu_types.h b/src/plugins/intel_cpu/src/cpu_types.h index 38e97fb1733..bb37e21bc42 100644 --- a/src/plugins/intel_cpu/src/cpu_types.h +++ b/src/plugins/intel_cpu/src/cpu_types.h @@ -15,7 +15,7 @@ namespace intel_cpu { using Dim = std::size_t; using VectorDims = std::vector; -enum Type { +enum class Type { Unknown, Generic, If, @@ -107,7 +107,7 @@ enum Type { PriorBoxClustered, }; -enum Algorithm { +enum class Algorithm { Default, // Pooling algorithms diff --git a/src/plugins/intel_cpu/src/descriptor.cpp b/src/plugins/intel_cpu/src/dnnl_descriptor.cpp similarity index 65% rename from src/plugins/intel_cpu/src/descriptor.cpp rename to src/plugins/intel_cpu/src/dnnl_descriptor.cpp index 154c528aac6..4e71410a480 100644 --- a/src/plugins/intel_cpu/src/descriptor.cpp +++ b/src/plugins/intel_cpu/src/dnnl_descriptor.cpp @@ -4,30 +4,33 @@ #include -#include "descriptor.h" +#include "dnnl_descriptor.h" -mkldnn::primitive_desc_iterator MKLDNNDescriptor::createPrimitiveDescriptorIterator(const mkldnn::engine &engine, +namespace ov { +namespace intel_cpu { + +mkldnn::primitive_desc_iterator DnnlDesriptor::createPrimitiveDescriptorIterator(const mkldnn::engine &engine, const mkldnn::primitive_attr &attr) const { return desc->createPrimitiveDescriptorIterator(attr, engine); } -MKLDNNDescriptor::operator bool() { +DnnlDesriptor::operator bool() { return desc != nullptr; } -size_t MKLDNNDescriptor::inputNumbers() const { +size_t DnnlDesriptor::inputNumbers() const { return 1; } -size_t MKLDNNDescriptor::outputNumbers() const { +size_t DnnlDesriptor::outputNumbers() const { return 1; } -MKLDNNDescriptor::MKLDNNDescriptor(std::shared_ptr desc) { +DnnlDesriptor::DnnlDesriptor(std::shared_ptr desc) { this->desc.reset(new DescFwdImpl(desc)); } -MKLDNNDescriptor::operator std::shared_ptr() { +DnnlDesriptor::operator std::shared_ptr() { auto typeDesc = std::dynamic_pointer_cast>(desc); if (typeDesc == nullptr) { IE_THROW() << "Cannot cast descriptor!"; @@ -35,11 +38,11 @@ MKLDNNDescriptor::operator std::shared_ptr() return typeDesc->getPtr(); } -MKLDNNDescriptor::MKLDNNDescriptor(std::shared_ptr desc) { +DnnlDesriptor::DnnlDesriptor(std::shared_ptr desc) { this->desc.reset(new DescFwdImpl(desc)); } -MKLDNNDescriptor::operator std::shared_ptr() { +DnnlDesriptor::operator std::shared_ptr() { auto typeDesc = std::dynamic_pointer_cast>(desc); if (typeDesc == nullptr) { IE_THROW() << "Cannot cast descriptor!"; @@ -47,14 +50,14 @@ MKLDNNDescriptor::operator std::shared_ptr( return typeDesc->getPtr(); } -MKLDNNDescriptor::MKLDNNDescriptor(std::shared_ptr desc, +DnnlDesriptor::DnnlDesriptor(std::shared_ptr desc, std::shared_ptr prim) { this->desc.reset( new DescBwdImpl(desc, prim)); } -MKLDNNDescriptor::operator std::shared_ptr() { +DnnlDesriptor::operator std::shared_ptr() { auto typeDesc = std::dynamic_pointer_cast>(desc); if (typeDesc == nullptr) { IE_THROW() << "Cannot cast descriptor!"; @@ -62,7 +65,7 @@ MKLDNNDescriptor::operator std::shared_ptrgetPtr(); } -MKLDNNDescriptor::operator std::shared_ptr() { +DnnlDesriptor::operator std::shared_ptr() { auto typeDesc = std::dynamic_pointer_cast>(desc); if (typeDesc == nullptr) { IE_THROW() << "Cannot cast descriptor!"; @@ -70,11 +73,11 @@ MKLDNNDescriptor::operator std::shared_ptrgetPrimPtr(); } -MKLDNNDescriptor::MKLDNNDescriptor(std::shared_ptr desc) { +DnnlDesriptor::DnnlDesriptor(std::shared_ptr desc) { this->desc.reset(new DescFwdImpl(desc)); } -MKLDNNDescriptor::operator std::shared_ptr() { +DnnlDesriptor::operator std::shared_ptr() { auto typeDesc = std::dynamic_pointer_cast>(desc); if (typeDesc == nullptr) { IE_THROW() << "Cannot cast descriptor!"; @@ -82,11 +85,11 @@ MKLDNNDescriptor::operator std::shared_ptr( return typeDesc->getPtr(); } -MKLDNNDescriptor::MKLDNNDescriptor(std::shared_ptr desc) { +DnnlDesriptor::DnnlDesriptor(std::shared_ptr desc) { this->desc.reset(new DescFwdImpl(desc)); } -MKLDNNDescriptor::operator std::shared_ptr() { +DnnlDesriptor::operator std::shared_ptr() { auto typeDesc = std::dynamic_pointer_cast>(desc); if (typeDesc == nullptr) { IE_THROW() << "Cannot cast descriptor!"; @@ -94,11 +97,11 @@ MKLDNNDescriptor::operator std::shared_ptr() { return typeDesc->getPtr(); } -MKLDNNDescriptor::MKLDNNDescriptor(std::shared_ptr desc) { +DnnlDesriptor::DnnlDesriptor(std::shared_ptr desc) { this->desc.reset(new DescFwdImpl(desc)); } -MKLDNNDescriptor::operator std::shared_ptr() { +DnnlDesriptor::operator std::shared_ptr() { auto typeDesc = std::dynamic_pointer_cast>(desc); if (typeDesc == nullptr) { IE_THROW() << "Cannot cast descriptor!"; @@ -106,11 +109,11 @@ MKLDNNDescriptor::operator std::shared_ptr() { return typeDesc->getPtr(); } -MKLDNNDescriptor::MKLDNNDescriptor(std::shared_ptr desc) { +DnnlDesriptor::DnnlDesriptor(std::shared_ptr desc) { this->desc.reset(new DescFwdImpl(desc)); } -MKLDNNDescriptor::operator std::shared_ptr() { +DnnlDesriptor::operator std::shared_ptr() { auto typeDesc = std::dynamic_pointer_cast>(desc); if (typeDesc == nullptr) { IE_THROW() << "Cannot cast descriptor!"; @@ -118,11 +121,11 @@ MKLDNNDescriptor::operator std::shared_ptr() { return typeDesc->getPtr(); } -MKLDNNDescriptor::MKLDNNDescriptor(std::shared_ptr desc) { +DnnlDesriptor::DnnlDesriptor(std::shared_ptr desc) { this->desc.reset(new DescFwdImpl(desc)); } -MKLDNNDescriptor::operator std::shared_ptr() { +DnnlDesriptor::operator std::shared_ptr() { auto typeDesc = std::dynamic_pointer_cast>(desc); if (typeDesc == nullptr) { IE_THROW() << "Cannot cast descriptor!"; @@ -130,11 +133,11 @@ MKLDNNDescriptor::operator std::shared_ptr() return typeDesc->getPtr(); } -MKLDNNDescriptor::MKLDNNDescriptor(std::shared_ptr desc) { +DnnlDesriptor::DnnlDesriptor(std::shared_ptr desc) { this->desc.reset(new DescFwdImpl(desc)); } -MKLDNNDescriptor::operator std::shared_ptr() { +DnnlDesriptor::operator std::shared_ptr() { auto typeDesc = std::dynamic_pointer_cast>(desc); if (typeDesc == nullptr) { IE_THROW() << "Cannot cast descriptor!"; @@ -142,11 +145,11 @@ MKLDNNDescriptor::operator std::shared_ptr() { return typeDesc->getPtr(); } -MKLDNNDescriptor::MKLDNNDescriptor(std::shared_ptr desc) { +DnnlDesriptor::DnnlDesriptor(std::shared_ptr desc) { this->desc.reset(new DescFwdImpl(desc)); } -MKLDNNDescriptor::operator std::shared_ptr() { +DnnlDesriptor::operator std::shared_ptr() { auto typeDesc = std::dynamic_pointer_cast>(desc); if (typeDesc == nullptr) { IE_THROW() << "Cannot cast descriptor!"; @@ -154,11 +157,11 @@ MKLDNNDescriptor::operator std::shared_ptr() { return typeDesc->getPtr(); } -MKLDNNDescriptor::MKLDNNDescriptor(std::shared_ptr desc) { +DnnlDesriptor::DnnlDesriptor(std::shared_ptr desc) { this->desc.reset(new DescFwdImpl(desc)); } -MKLDNNDescriptor::operator std::shared_ptr() { +DnnlDesriptor::operator std::shared_ptr() { auto typeDesc = std::dynamic_pointer_cast>(desc); if (typeDesc == nullptr) { IE_THROW() << "Cannot cast descriptor!"; @@ -166,11 +169,11 @@ MKLDNNDescriptor::operator std::shared_ptr() { return typeDesc->getPtr(); } -MKLDNNDescriptor::MKLDNNDescriptor(std::shared_ptr desc) { +DnnlDesriptor::DnnlDesriptor(std::shared_ptr desc) { this->desc.reset(new DescFwdImpl(desc)); } -MKLDNNDescriptor::operator std::shared_ptr() { +DnnlDesriptor::operator std::shared_ptr() { auto typeDesc = std::dynamic_pointer_cast>(desc); if (typeDesc == nullptr) { IE_THROW() << "Cannot cast descriptor!"; @@ -178,14 +181,17 @@ MKLDNNDescriptor::operator std::shared_ptr() { return typeDesc->getPtr(); } -MKLDNNDescriptor::MKLDNNDescriptor(std::shared_ptr desc) { +DnnlDesriptor::DnnlDesriptor(std::shared_ptr desc) { this->desc.reset(new DescFwdImpl(desc)); } -MKLDNNDescriptor::operator std::shared_ptr() { +DnnlDesriptor::operator std::shared_ptr() { auto typeDesc = std::dynamic_pointer_cast>(desc); if (typeDesc == nullptr) { IE_THROW() << "Cannot cast descriptor!"; } return typeDesc->getPtr(); } + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/descriptor.h b/src/plugins/intel_cpu/src/dnnl_descriptor.h similarity index 72% rename from src/plugins/intel_cpu/src/descriptor.h rename to src/plugins/intel_cpu/src/dnnl_descriptor.h index 6589833efed..b2800de1fbf 100644 --- a/src/plugins/intel_cpu/src/descriptor.h +++ b/src/plugins/intel_cpu/src/dnnl_descriptor.h @@ -8,48 +8,51 @@ #include #include "mkldnn/ie_mkldnn.h" -class MKLDNNDescriptor { +namespace ov { +namespace intel_cpu { + +class DnnlDesriptor { public: - explicit MKLDNNDescriptor(std::shared_ptr desc); + explicit DnnlDesriptor(std::shared_ptr desc); operator std::shared_ptr(); - MKLDNNDescriptor(std::shared_ptr desc, - std::shared_ptr prim); + DnnlDesriptor(std::shared_ptr desc, + std::shared_ptr prim); - explicit MKLDNNDescriptor(std::shared_ptr desc); + explicit DnnlDesriptor(std::shared_ptr desc); operator std::shared_ptr(); operator std::shared_ptr(); operator std::shared_ptr(); - explicit MKLDNNDescriptor(std::shared_ptr desc); + explicit DnnlDesriptor(std::shared_ptr desc); operator std::shared_ptr(); - explicit MKLDNNDescriptor(std::shared_ptr desc); + explicit DnnlDesriptor(std::shared_ptr desc); operator std::shared_ptr(); - explicit MKLDNNDescriptor(std::shared_ptr desc); + explicit DnnlDesriptor(std::shared_ptr desc); operator std::shared_ptr(); - explicit MKLDNNDescriptor(std::shared_ptr desc); + explicit DnnlDesriptor(std::shared_ptr desc); operator std::shared_ptr(); - explicit MKLDNNDescriptor(std::shared_ptr desc); + explicit DnnlDesriptor(std::shared_ptr desc); operator std::shared_ptr(); - explicit MKLDNNDescriptor(std::shared_ptr desc); + explicit DnnlDesriptor(std::shared_ptr desc); operator std::shared_ptr(); - explicit MKLDNNDescriptor(std::shared_ptr desc); + explicit DnnlDesriptor(std::shared_ptr desc); operator std::shared_ptr(); - explicit MKLDNNDescriptor(std::shared_ptr desc); + explicit DnnlDesriptor(std::shared_ptr desc); operator std::shared_ptr(); - explicit MKLDNNDescriptor(std::shared_ptr desc); + explicit DnnlDesriptor(std::shared_ptr desc); operator std::shared_ptr(); - explicit MKLDNNDescriptor(std::shared_ptr desc); + explicit DnnlDesriptor(std::shared_ptr desc); operator std::shared_ptr(); mkldnn::primitive_desc_iterator createPrimitiveDescriptorIterator(const mkldnn::engine &engine, @@ -110,3 +113,6 @@ private: std::shared_ptr desc; }; + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/extension_utils.cpp b/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp similarity index 79% rename from src/plugins/intel_cpu/src/extension_utils.cpp rename to src/plugins/intel_cpu/src/dnnl_extension_utils.cpp index a215d45166a..c863c63bbd5 100644 --- a/src/plugins/intel_cpu/src/extension_utils.cpp +++ b/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp @@ -2,15 +2,17 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "extension_utils.h" +#include "dnnl_extension_utils.h" #include "utils/general_utils.h" #include #include "memory_desc/dnnl_blocked_memory_desc.h" using namespace mkldnn; -using namespace ov::intel_cpu; -uint8_t MKLDNNExtensionUtils::sizeOfDataType(mkldnn::memory::data_type dataType) { +namespace ov { +namespace intel_cpu { + +uint8_t DnnlExtensionUtils::sizeOfDataType(mkldnn::memory::data_type dataType) { switch (dataType) { case mkldnn::memory::data_type::f32: return 4; @@ -31,7 +33,7 @@ uint8_t MKLDNNExtensionUtils::sizeOfDataType(mkldnn::memory::data_type dataType) } } -memory::data_type MKLDNNExtensionUtils::IEPrecisionToDataType(const InferenceEngine::Precision& prec) { +memory::data_type DnnlExtensionUtils::IEPrecisionToDataType(const InferenceEngine::Precision& prec) { switch (prec) { case InferenceEngine::Precision::FP32: return memory::data_type::f32; @@ -54,7 +56,7 @@ memory::data_type MKLDNNExtensionUtils::IEPrecisionToDataType(const InferenceEng } } -InferenceEngine::Precision MKLDNNExtensionUtils::DataTypeToIEPrecision(memory::data_type dataType) { +InferenceEngine::Precision DnnlExtensionUtils::DataTypeToIEPrecision(memory::data_type dataType) { switch (dataType) { case memory::data_type::f32: return InferenceEngine::Precision::FP32; @@ -76,14 +78,14 @@ InferenceEngine::Precision MKLDNNExtensionUtils::DataTypeToIEPrecision(memory::d } } -Dim MKLDNNExtensionUtils::convertToDim(const dnnl::memory::dim &dim) { +Dim DnnlExtensionUtils::convertToDim(const dnnl::memory::dim &dim) { return dim == DNNL_RUNTIME_DIM_VAL ? Shape::UNDEFINED_DIM : static_cast(dim); } -dnnl::memory::dim MKLDNNExtensionUtils::convertToDnnlDim(const Dim &dim) { +dnnl::memory::dim DnnlExtensionUtils::convertToDnnlDim(const Dim &dim) { return dim == Shape::UNDEFINED_DIM ? DNNL_RUNTIME_DIM_VAL : static_cast(dim); } -VectorDims MKLDNNExtensionUtils::convertToVectorDims(const memory::dims& dims) { +VectorDims DnnlExtensionUtils::convertToVectorDims(const memory::dims& dims) { std::vector vecResult; vecResult.reserve(dims.size()); std::back_insert_iterator> itr(vecResult); @@ -91,7 +93,7 @@ VectorDims MKLDNNExtensionUtils::convertToVectorDims(const memory::dims& dims) { return vecResult; } -memory::dims MKLDNNExtensionUtils::convertToDnnlDims(const VectorDims& dims) { +memory::dims DnnlExtensionUtils::convertToDnnlDims(const VectorDims& dims) { memory::dims vecResult; vecResult.reserve(dims.size()); std::back_insert_iterator itr(vecResult); @@ -99,7 +101,7 @@ memory::dims MKLDNNExtensionUtils::convertToDnnlDims(const VectorDims& dims) { return vecResult; } -memory::format_tag MKLDNNExtensionUtils::GetPlainFormatByRank(size_t rank) { +memory::format_tag DnnlExtensionUtils::GetPlainFormatByRank(size_t rank) { switch (rank) { case 0: case 1: @@ -119,7 +121,7 @@ memory::format_tag MKLDNNExtensionUtils::GetPlainFormatByRank(size_t rank) { } } -DnnlMemoryDescPtr MKLDNNExtensionUtils::makeDescriptor(const mkldnn::memory::desc &desc) { +DnnlMemoryDescPtr DnnlExtensionUtils::makeDescriptor(const mkldnn::memory::desc &desc) { if (desc.data.format_kind == dnnl_blocked) { return std::shared_ptr(new DnnlBlockedMemoryDesc(desc)); } else { @@ -127,7 +129,7 @@ DnnlMemoryDescPtr MKLDNNExtensionUtils::makeDescriptor(const mkldnn::memory::des } } -size_t MKLDNNExtensionUtils::getMemSizeForDnnlDesc(const mkldnn::memory::desc& desc) { +size_t DnnlExtensionUtils::getMemSizeForDnnlDesc(const mkldnn::memory::desc& desc) { auto tmpDesc = desc; const auto offset0 = tmpDesc.data.offset0; tmpDesc.data.offset0 = 0; @@ -138,10 +140,13 @@ size_t MKLDNNExtensionUtils::getMemSizeForDnnlDesc(const mkldnn::memory::desc& d return size; } -std::shared_ptr MKLDNNExtensionUtils::makeUndefinedDesc(const memory::desc &desc, const Shape &shape) { +std::shared_ptr DnnlExtensionUtils::makeUndefinedDesc(const memory::desc &desc, const Shape &shape) { if (desc.data.format_kind == dnnl_blocked) { return std::shared_ptr(new DnnlBlockedMemoryDesc(desc, shape)); } else { IE_THROW(Unexpected) << "Cannot make undefined descriptor. Only dnnl_blocked type is allowed."; } } + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/extension_utils.h b/src/plugins/intel_cpu/src/dnnl_extension_utils.h similarity index 97% rename from src/plugins/intel_cpu/src/extension_utils.h rename to src/plugins/intel_cpu/src/dnnl_extension_utils.h index 45ca96550c3..e52156c7747 100644 --- a/src/plugins/intel_cpu/src/extension_utils.h +++ b/src/plugins/intel_cpu/src/dnnl_extension_utils.h @@ -4,7 +4,7 @@ /** * @brief Convinience wrapper class for handling MKL-DNN memory formats. - * @file extension_utils.h + * @file dnnl_extension_utils.h */ #pragma once @@ -18,7 +18,7 @@ namespace intel_cpu { class DnnlMemoryDesc; -class MKLDNNExtensionUtils { +class DnnlExtensionUtils { public: static uint8_t sizeOfDataType(mkldnn::memory::data_type dataType); static mkldnn::memory::data_type IEPrecisionToDataType(const InferenceEngine::Precision& prec); diff --git a/src/plugins/intel_cpu/src/edge.cpp b/src/plugins/intel_cpu/src/edge.cpp index a95e983a9a6..869f0a8e390 100644 --- a/src/plugins/intel_cpu/src/edge.cpp +++ b/src/plugins/intel_cpu/src/edge.cpp @@ -4,7 +4,7 @@ #include "edge.h" #include "node.h" -#include "extension_utils.h" +#include "dnnl_extension_utils.h" #include #include "nodes/input.h" @@ -12,28 +12,28 @@ using namespace mkldnn; namespace ov { namespace intel_cpu { -MKLDNNEdge::MKLDNNEdge(const MKLDNNNodePtr &parent, const MKLDNNNodePtr &child, int pr_port, int ch_port) : +Edge::Edge(const NodePtr &parent, const NodePtr &child, int pr_port, int ch_port) : parent(parent), child(child), parent_port(pr_port), child_port(ch_port) {} -const MKLDNNNodePtr MKLDNNEdge::getParent() const { +const NodePtr Edge::getParent() const { auto parentPtr = parent.lock(); if (!parentPtr) IE_THROW() << "Edge contains empty parent node"; return parentPtr; } -const MKLDNNNodePtr MKLDNNEdge::getChild() const { +const NodePtr Edge::getChild() const { auto childPtr = child.lock(); if (!childPtr) IE_THROW() << "Edge contains empty child node"; return childPtr; } -bool MKLDNNEdge::isUseExternalMemory() const { +bool Edge::isUseExternalMemory() const { return useExternalMemory; } -bool MKLDNNEdge::isDropped() const { +bool Edge::isDropped() const { bool not_in_parent = true; bool not_in_child = true; @@ -53,10 +53,10 @@ bool MKLDNNEdge::isDropped() const { return not_in_parent && not_in_child; } -void MKLDNNEdge::drop() { - auto _drop_from = [&] (std::vector &list) { +void Edge::drop() { + auto _drop_from = [&] (std::vector &list) { auto myself = std::find_if(list.begin(), list.end(), - [&] (MKLDNNEdgeWeakPtr edge) { return edge.lock().get() == this; }); + [&] (EdgeWeakPtr edge) { return edge.lock().get() == this; }); if (myself != list.end()) list.erase(myself); @@ -66,7 +66,7 @@ void MKLDNNEdge::drop() { _drop_from(getChild()->parentEdges); } -bool MKLDNNEdge::enforceReorder() { +bool Edge::enforceReorder() { bool canBeInPlaceConflicts = false; auto parentNode = getParent(); auto parentSPD = parentNode->getSelectedPrimitiveDescriptor(); @@ -83,7 +83,7 @@ bool MKLDNNEdge::enforceReorder() { childCanChangeMem = true; } - const auto& detectInPlaceChildrenNum = [](const std::vector& edges) -> size_t { + const auto& detectInPlaceChildrenNum = [](const std::vector& edges) -> size_t { size_t count = 0; for (const auto& edge : edges) { auto childSPD = edge->getChild()->getSelectedPrimitiveDescriptor(); @@ -105,7 +105,7 @@ bool MKLDNNEdge::enforceReorder() { for (auto &p_edge_peer : portChildEdges) { if (p_edge_peer.get() == this) continue; - if (p_edge_peer->getChild()->getType() != Reorder && p_edge_peer->inPlace(LOOK_DOWN)) + if (p_edge_peer->getChild()->getType() != Type::Reorder && p_edge_peer->inPlace(LOOK_DOWN)) canBeInPlaceConflicts = true; } } @@ -126,7 +126,7 @@ bool MKLDNNEdge::enforceReorder() { if ((childSPD->getImplementationType() & impl_desc_type::sse42) && Type::Input == parentNode->getType() && parentNode->isConstant()) { - if (auto pInputNode = std::dynamic_pointer_cast(parentNode)) { + if (auto pInputNode = std::dynamic_pointer_cast(parentNode)) { auto rawMemPtr = pInputNode->getMemoryPtr()->GetData(); bool isAligned = (reinterpret_cast(rawMemPtr) & 15) == 0; if (!isAligned) { @@ -217,7 +217,7 @@ static inline bool isPhycicalMemCompatible(const MemoryDesc& lhsMemDesc, const M return true; } -MKLDNNEdge::ReorderStatus MKLDNNEdge::needReorder() { +Edge::ReorderStatus Edge::needReorder() { bool optimized = false; auto inputPortDesc = getInputPortDesc(); auto outPortDesc = getOutputPortDesc(); @@ -243,22 +243,22 @@ MKLDNNEdge::ReorderStatus MKLDNNEdge::needReorder() { return ReorderStatus::No; } -void MKLDNNEdge::reuse(MKLDNNMemoryPtr ptr) { +void Edge::reuse(MemoryPtr ptr) { if (status != Status::NeedAllocation) return; memoryPtr = ptr; status = Status::Allocated; } -int MKLDNNEdge::getInputNum() const { +int Edge::getInputNum() const { return parent_port; } -int MKLDNNEdge::getOutputNum() const { +int Edge::getOutputNum() const { return child_port; } -void MKLDNNEdge::allocate(const void* mem_ptr) { +void Edge::allocate(const void* mem_ptr) { if (status != Status::NeedAllocation) return; @@ -271,13 +271,13 @@ void MKLDNNEdge::allocate(const void* mem_ptr) { IE_THROW() << "Cannot allocate memory for incompatible descriptors."; auto parentPtr = getParent(); - memoryPtr.reset(new MKLDNNMemory(parentPtr->getEngine())); + memoryPtr.reset(new Memory(parentPtr->getEngine())); memoryPtr->Create(inputDesc, mem_ptr, false); // no pads zeroing status = Status::Allocated; } -std::string MKLDNNEdge::name() const { +std::string Edge::name() const { auto parentPtr = getParent(); auto childPtr = getChild(); @@ -288,10 +288,8 @@ std::string MKLDNNEdge::name() const { return result.str(); } - - -void MKLDNNEdge::externalAllocate(MKLDNNWeightsSharing::Ptr weightsCache) { - auto isInPlace = [](const MKLDNNNodePtr node, int port) -> bool { +void Edge::externalAllocate(WeightsSharing::Ptr weightsCache) { + auto isInPlace = [](const NodePtr node, int port) -> bool { const auto& selected_pd = node->getSelectedPrimitiveDescriptor(); if (selected_pd == nullptr) IE_THROW() << "Preferable primitive descriptor is not set."; @@ -333,7 +331,7 @@ void MKLDNNEdge::externalAllocate(MKLDNNWeightsSharing::Ptr weightsCache) { } } -void MKLDNNEdge::changeStatus(MKLDNNEdge::Status state) { +void Edge::changeStatus(Edge::Status state) { if (state == Status::NotAllocated) { IE_THROW() << "Incorrect behaviour! Use method sharedMemFrom()"; } @@ -347,7 +345,7 @@ void MKLDNNEdge::changeStatus(MKLDNNEdge::Status state) { status = state; } -PortDescBaseCPtr MKLDNNEdge::getInputPortDesc() const { +PortDescBaseCPtr Edge::getInputPortDesc() const { auto parentPtr = getParent(); if (parentPtr->getSelectedPrimitiveDescriptor() == nullptr) IE_THROW() << "Primitive descriptor for node " << parentPtr->getName() << " is not selected."; @@ -371,7 +369,7 @@ PortDescBaseCPtr MKLDNNEdge::getInputPortDesc() const { return inputPortDesc; } -PortDescBaseCPtr MKLDNNEdge::getOutputPortDesc() const { +PortDescBaseCPtr Edge::getOutputPortDesc() const { auto childPtr = getChild(); if (childPtr->getSelectedPrimitiveDescriptor() == nullptr) @@ -396,7 +394,7 @@ PortDescBaseCPtr MKLDNNEdge::getOutputPortDesc() const { return outPortDesc; } -const MemoryDesc& MKLDNNEdge::getInputDesc() const { +const MemoryDesc& Edge::getInputDesc() const { auto memDescPtr = getInputPortDesc()->getMemDesc(); if (!memDescPtr) { IE_THROW() << "Cannot get input memory descriptor for edge: " << getParent()->getName() << "->" @@ -405,7 +403,7 @@ const MemoryDesc& MKLDNNEdge::getInputDesc() const { return *memDescPtr; } -const MemoryDesc& MKLDNNEdge::getOutputDesc() const { +const MemoryDesc& Edge::getOutputDesc() const { auto memDescPtr = getOutputPortDesc()->getMemDesc(); if (!memDescPtr) { IE_THROW() << "Cannot get output memory descriptor for edge: " << getParent()->getName() << "->" @@ -414,7 +412,7 @@ const MemoryDesc& MKLDNNEdge::getOutputDesc() const { return *memDescPtr; } -const MemoryDesc& MKLDNNEdge::getDesc() const { +const MemoryDesc& Edge::getDesc() const { if (!getInputDesc().isCompatible(getOutputDesc())) IE_THROW() << "Cannot get descriptor for edge: " << getParent()->getName() << "->" << getChild()->getName(); @@ -422,13 +420,13 @@ const MemoryDesc& MKLDNNEdge::getDesc() const { return getInputDesc(); } -const MKLDNNMemory &MKLDNNEdge::getMemory() { +const Memory &Edge::getMemory() { return *getMemoryPtr(); } -MKLDNNMemoryPtr &MKLDNNEdge::getMemoryPtr() { +MemoryPtr &Edge::getMemoryPtr() { if (status == Status::NotAllocated) { - memoryPtr.reset(new MKLDNNMemory(getParent()->getEngine())); + memoryPtr.reset(new Memory(getParent()->getEngine())); const auto &desc = getDesc(); auto sharedEdge = getSharedEdge(); auto sharedEdgeParent = sharedEdge->getParent(); @@ -444,12 +442,12 @@ MKLDNNMemoryPtr &MKLDNNEdge::getMemoryPtr() { return memoryPtr; } -void MKLDNNEdge::sharedMemFrom(const MKLDNNEdgePtr &edge) { +void Edge::sharedMemFrom(const EdgePtr &edge) { memoryFromEdge = edge; status = Status::NotAllocated; } -void MKLDNNEdge::validate() { +void Edge::validate() { if (status == Status::Validated) return; getMemory(); @@ -462,7 +460,7 @@ void MKLDNNEdge::validate() { status = Status::Validated; } -MKLDNNEdgePtr MKLDNNEdge::getSharedEdge() const { +EdgePtr Edge::getSharedEdge() const { auto memoryFromEdgePtr = memoryFromEdge.lock(); if (!memoryFromEdgePtr) { IE_THROW() << "Cannot get memory ptr for edge( " << name() << " ). The pointer on the edge with memory is empty!"; @@ -470,14 +468,14 @@ MKLDNNEdgePtr MKLDNNEdge::getSharedEdge() const { return memoryFromEdgePtr; } -MKLDNNEdgePtr MKLDNNEdge::getSharedEdge(std::nothrow_t) const { +EdgePtr Edge::getSharedEdge(std::nothrow_t) const { return memoryFromEdge.lock(); } -void MKLDNNEdge::init() { +void Edge::init() { if (status != Status::NeedAllocation && status != Status::Uninitialized) return; - MKLDNNEdgePtr edgePtr = getBaseEdge(); + EdgePtr edgePtr = getBaseEdge(); if (edgePtr.get() == this) { changeStatus(Status::NeedAllocation); } else { @@ -511,7 +509,7 @@ void MKLDNNEdge::init() { * @param type some magic enum values... description needed * @return root of view-on-memory subgraph */ -MKLDNNEdgePtr MKLDNNEdge::getBaseEdge(int look) { +EdgePtr Edge::getBaseEdge(int look) { auto parentConfig = getParent()->getSelectedPrimitiveDescriptor()->getConfig(); auto childConfig = getChild()->getSelectedPrimitiveDescriptor()->getConfig(); int inputNum = getInputNum(); @@ -562,7 +560,7 @@ MKLDNNEdgePtr MKLDNNEdge::getBaseEdge(int look) { return edges_for_same_port[0]; } -bool MKLDNNEdge::inPlace(LOOK look) { +bool Edge::inPlace(LOOK look) { auto parentSPD = getParent()->getSelectedPrimitiveDescriptor(); auto childSPD = getChild()->getSelectedPrimitiveDescriptor(); if (!parentSPD || !childSPD) diff --git a/src/plugins/intel_cpu/src/edge.h b/src/plugins/intel_cpu/src/edge.h index 9759a945b92..8b5001256cf 100644 --- a/src/plugins/intel_cpu/src/edge.h +++ b/src/plugins/intel_cpu/src/edge.h @@ -17,17 +17,17 @@ namespace ov { namespace intel_cpu { -class MKLDNNNode; -class MKLDNNEdge; +class Node; +class Edge; -using MKLDNNEdgePtr = std::shared_ptr; -using MKLDNNEdgeWeakPtr = std::weak_ptr; +using EdgePtr = std::shared_ptr; +using EdgeWeakPtr = std::weak_ptr; -class MKLDNNEdge { +class Edge { public: - MKLDNNEdge(const std::shared_ptr& parent, - const std::shared_ptr& child, - int pr_port = 0, int ch_port = 0); + Edge(const std::shared_ptr& parent, + const std::shared_ptr& child, + int pr_port = 0, int ch_port = 0); enum class Status { Uninitialized, @@ -51,16 +51,16 @@ public: void init(); void allocate(const void* mem_ptr = nullptr); - void externalAllocate(MKLDNNWeightsSharing::Ptr weightsCache); - void reuse(MKLDNNMemoryPtr ptr); + void externalAllocate(WeightsSharing::Ptr weightsCache); + void reuse(MemoryPtr ptr); void validate(); void drop(); - const std::shared_ptr getParent() const; - const std::shared_ptr getChild() const; + const std::shared_ptr getParent() const; + const std::shared_ptr getChild() const; - const MKLDNNMemory& getMemory(); - MKLDNNMemoryPtr& getMemoryPtr(); + const Memory& getMemory(); + MemoryPtr& getMemoryPtr(); ReorderStatus needReorder(); bool isDropped() const; @@ -71,9 +71,9 @@ public: void setChildPort(const size_t port) { child_port = port; } - void sharedMemFrom(const MKLDNNEdgePtr& edge); - MKLDNNEdgePtr getSharedEdge() const; - MKLDNNEdgePtr getSharedEdge(std::nothrow_t) const; + void sharedMemFrom(const EdgePtr& edge); + EdgePtr getSharedEdge() const; + EdgePtr getSharedEdge(std::nothrow_t) const; bool hasDefinedMaxSize() const { return getDesc().hasDefinedMaxSize(); @@ -82,14 +82,14 @@ public: private: std::string name() const; - std::weak_ptr parent; - std::weak_ptr child; + std::weak_ptr parent; + std::weak_ptr child; int parent_port; int child_port; bool useExternalMemory = false; - MKLDNNEdgeWeakPtr memoryFromEdge; - MKLDNNMemoryPtr memoryPtr; + EdgeWeakPtr memoryFromEdge; + MemoryPtr memoryPtr; Status status = Status::Uninitialized; const MemoryDesc& getInputDesc() const; @@ -102,9 +102,9 @@ private: enum LOOK { LOOK_UP = 1, LOOK_DOWN = 2, LOOK_BOTH = LOOK_UP | LOOK_DOWN, LOOK_NO_RECURRENT = 4 }; - MKLDNNEdgePtr getBaseEdge(int look = LOOK_BOTH); + EdgePtr getBaseEdge(int look = LOOK_BOTH); bool inPlace(LOOK look = LOOK_BOTH); - friend class MKLDNNGraph; + friend class Graph; }; } // namespace intel_cpu diff --git a/src/plugins/intel_cpu/src/emitters/jit_snippets_emitters.hpp b/src/plugins/intel_cpu/src/emitters/jit_snippets_emitters.hpp index 84a506178d2..40a28fbc3e7 100644 --- a/src/plugins/intel_cpu/src/emitters/jit_snippets_emitters.hpp +++ b/src/plugins/intel_cpu/src/emitters/jit_snippets_emitters.hpp @@ -49,7 +49,7 @@ struct jit_snippets_compile_args { /// \param in[0] The number of the node inputs /// \param in[1] The number of the node outputs /// -// Todo: Scheduler dims and offsets are currently calculated in MKLDNN Subgraph node and passed to the KernelEmitter. +// Todo: Scheduler dims and offsets are currently calculated in Subgraph node and passed to the KernelEmitter. // However, it seems more natural to calculate all the offsets right in the Kernel op, because the calculation is // not device-specific. It is based only on input/output dims (which we already know) and harness num dims // (which we should pass from the plugin). It seems also better to wrap the enclosed emitters in tiles in the Kernel op diff --git a/src/plugins/intel_cpu/src/exec_network.cpp b/src/plugins/intel_cpu/src/exec_network.cpp index 5116c57989a..e73922859dc 100644 --- a/src/plugins/intel_cpu/src/exec_network.cpp +++ b/src/plugins/intel_cpu/src/exec_network.cpp @@ -33,25 +33,27 @@ #include #include -using namespace ov::intel_cpu; using namespace InferenceEngine; using namespace InferenceEngine::details; +namespace ov { +namespace intel_cpu { + InferenceEngine::IInferRequestInternal::Ptr -MKLDNNExecNetwork::CreateInferRequestImpl(const std::vector>& inputs, - const std::vector>& outputs) { +ExecNetwork::CreateInferRequestImpl(const std::vector>& inputs, + const std::vector>& outputs) { if (!this->_plugin) return nullptr; const auto& core = _plugin->GetCore(); if (!core || !core->isNewAPI()) return nullptr; - return std::make_shared(inputs, outputs, std::static_pointer_cast(shared_from_this())); + return std::make_shared(inputs, outputs, std::static_pointer_cast(shared_from_this())); } InferenceEngine::IInferRequestInternal::Ptr -MKLDNNExecNetwork::CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs, - InferenceEngine::OutputsDataMap networkOutputs) { - return std::make_shared(networkInputs, networkOutputs, std::static_pointer_cast(shared_from_this())); +ExecNetwork::CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs, + InferenceEngine::OutputsDataMap networkOutputs) { + return std::make_shared(networkInputs, networkOutputs, std::static_pointer_cast(shared_from_this())); } struct ImmediateSerialExecutor : public ITaskExecutor { @@ -62,11 +64,11 @@ struct ImmediateSerialExecutor : public ITaskExecutor { std::mutex _mutex; }; -MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::CNNNetwork &network, - const Config &cfg, - const MKLDNNExtensionManager::Ptr& extMgr, - NumaNodesWeights &numaNodesWeights, - const std::shared_ptr& plugin) : +ExecNetwork::ExecNetwork(const InferenceEngine::CNNNetwork &network, + const Config &cfg, + const ExtensionManager::Ptr& extMgr, + NumaNodesWeights &numaNodesWeights, + const std::shared_ptr& plugin) : InferenceEngine::ExecutableNetworkThreadSafeDefault{nullptr, nullptr}, extensionManager(extMgr), _cfg{cfg}, @@ -92,7 +94,7 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::CNNNetwork &network, } else if (_cfg.batchLimit > 1) { // check topology for applicability if (!CanProcessDynBatch(_network)) { - IE_THROW() << "MKLDNNGraph::CreateGraph: such topology cannot be compiled for dynamic batch!"; + IE_THROW() << "Graph::CreateGraph: such topology cannot be compiled for dynamic batch!"; } } @@ -126,12 +128,12 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::CNNNetwork &network, if (_cfg.streamExecutorConfig._streams != 0) { for (auto&& task : tasks) { task = [this] { - MKLDNNExecNetwork::GetGraph(); + ExecNetwork::GetGraph(); }; } _taskExecutor->runAndWait(tasks); } else { - MKLDNNExecNetwork::GetGraph(); + ExecNetwork::GetGraph(); } // Save all MemoryLayer data tensors. Will use insight about mechanics @@ -139,10 +141,10 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::CNNNetwork &network, // producer as storage for tensor to keep it between infer calls. if (_graphs.size() == 1) { for (auto &node : GetGraph()._graph.GetNodes()) { - if (node->getType() == MemoryInput) { - auto memoryNode = dynamic_cast(node.get()); + if (node->getType() == Type::MemoryInput) { + auto memoryNode = dynamic_cast(node.get()); if (!memoryNode) { - IE_THROW() << "Cannot cast " << node->getName() << " to MKLDNNMemoryInputNode"; + IE_THROW() << "Cannot cast " << node->getName() << " to MemoryInput"; } auto state_store = memoryNode->getStore(); auto state_name = memoryNode->getId(); @@ -152,13 +154,13 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::CNNNetwork &network, if (suffix_idx != std::string::npos) state_name = state_name.substr(0, suffix_idx); - memoryStates.emplace_back(new MKLDNNVariableState(state_name, state_store)); + memoryStates.emplace_back(new VariableState(state_name, state_store)); } } } } -MKLDNNExecNetwork::Graph::Lock MKLDNNExecNetwork::GetGraph() const { +ExecNetwork::GraphGuard::Lock ExecNetwork::GetGraph() const { int streamId = 0; int numaNodeId = 0; auto streamsExecutor = dynamic_cast(_taskExecutor.get()); @@ -166,7 +168,7 @@ MKLDNNExecNetwork::Graph::Lock MKLDNNExecNetwork::GetGraph() const { streamId = streamsExecutor->GetStreamId(); numaNodeId = streamsExecutor->GetNumaNodeId(); } - auto graphLock = Graph::Lock(_graphs[streamId % _graphs.size()]); + auto graphLock = GraphGuard::Lock(_graphs[streamId % _graphs.size()]); if (!graphLock._graph.IsReady()) { std::exception_ptr exception; auto makeGraph = [&] { @@ -192,31 +194,31 @@ MKLDNNExecNetwork::Graph::Lock MKLDNNExecNetwork::GetGraph() const { return graphLock; } -void MKLDNNExecNetwork::setProperty(const std::map &properties) { +void ExecNetwork::setProperty(const std::map &properties) { { std::lock_guard lock{_cfgMutex}; _cfg.readProperties(properties); } for (auto& g : _graphs) { - auto graphLock = Graph::Lock(g); + auto graphLock = GraphGuard::Lock(g); if (graphLock._graph.IsReady()) { graphLock._graph.setProperty(properties); } } } -InferenceEngine::IInferRequestInternal::Ptr MKLDNNExecNetwork::CreateInferRequest() { - return CreateAsyncInferRequestFromSync(); +InferenceEngine::IInferRequestInternal::Ptr ExecNetwork::CreateInferRequest() { + return CreateAsyncInferRequestFromSync(); } -std::shared_ptr MKLDNNExecNetwork::GetExecGraphInfo() { +std::shared_ptr ExecNetwork::GetExecGraphInfo() { if (_graphs.empty()) IE_THROW() << "No graph was found"; return GetGraph()._graph.dump(); } -bool MKLDNNExecNetwork::isLegacyAPI() const { +bool ExecNetwork::isLegacyAPI() const { const auto& core = _plugin->GetCore(); if (!core) IE_THROW() << "Unable to get API version. Core is unavailable"; @@ -224,7 +226,7 @@ bool MKLDNNExecNetwork::isLegacyAPI() const { return !core->isNewAPI(); } -Parameter MKLDNNExecNetwork::GetConfigLegacy(const std::string &name) const { +Parameter ExecNetwork::GetConfigLegacy(const std::string &name) const { if (_graphs.empty()) IE_THROW() << "No graph was found"; /* legacy implementation return all the parameters which is actually not correct @@ -244,13 +246,13 @@ Parameter MKLDNNExecNetwork::GetConfigLegacy(const std::string &name) const { * All the RO properties are covered with GetMetric() method and * GetConfig() is not expected to be called by new API with params from new configuration API. */ -Parameter MKLDNNExecNetwork::GetConfig(const std::string &name) const { +Parameter ExecNetwork::GetConfig(const std::string &name) const { /* Internally legacy parameters are used with new API as part of migration procedure. * This fallback can be removed as soon as migration completed */ return GetConfigLegacy(name); } -InferenceEngine::Parameter MKLDNNExecNetwork::GetMetricLegacy(const std::string &name, const Graph& graph) const { +InferenceEngine::Parameter ExecNetwork::GetMetricLegacy(const std::string &name, const GraphGuard& graph) const { if (name == METRIC_KEY(NETWORK_NAME)) { IE_SET_METRIC_RETURN(NETWORK_NAME, graph.dump()->get_friendly_name()); } else if (name == METRIC_KEY(SUPPORTED_METRICS)) { @@ -278,7 +280,7 @@ InferenceEngine::Parameter MKLDNNExecNetwork::GetMetricLegacy(const std::string } } -InferenceEngine::Parameter MKLDNNExecNetwork::GetMetric(const std::string &name) const { +InferenceEngine::Parameter ExecNetwork::GetMetric(const std::string &name) const { if (_graphs.empty()) IE_THROW() << "No graph was found"; // @todo Can't we just use local copy (_cfg) instead? @@ -354,7 +356,7 @@ InferenceEngine::Parameter MKLDNNExecNetwork::GetMetric(const std::string &name) return GetMetricLegacy(name, graph); } -bool MKLDNNExecNetwork::canBeExecViaLegacyDynBatch(std::shared_ptr function, int64_t& maxBatchSize) const { +bool ExecNetwork::canBeExecViaLegacyDynBatch(std::shared_ptr function, int64_t& maxBatchSize) const { maxBatchSize = -1; auto isDynBatchWithUpperBound = [maxBatchSize](const ov::PartialShape& shape) -> bool { if (shape.rank().is_dynamic()) { @@ -401,20 +403,20 @@ bool MKLDNNExecNetwork::canBeExecViaLegacyDynBatch(std::shared_ptrget_type_name()); - if (!one_of(type, Input, - Output, - Convolution, - Deconvolution, - Lrn, - Pooling, - FullyConnected, - MatMul, - Softmax, - Split, - Concatenation, - Eltwise, - Reshape, - Tile)) { + if (!one_of(type, Type::Input, + Type::Output, + Type::Convolution, + Type::Deconvolution, + Type::Lrn, + Type::Pooling, + Type::FullyConnected, + Type::MatMul, + Type::Softmax, + Type::Split, + Type::Concatenation, + Type::Eltwise, + Type::Reshape, + Type::Tile)) { return false; } @@ -424,7 +426,7 @@ bool MKLDNNExecNetwork::canBeExecViaLegacyDynBatch(std::shared_ptr(op->get_input_node_shared_ptr(1)); const auto tile = std::dynamic_pointer_cast(op); if (!(tile && repeatsNode && repeatsNode->cast_vector()[0] == 1)) { @@ -432,7 +434,7 @@ bool MKLDNNExecNetwork::canBeExecViaLegacyDynBatch(std::shared_ptrget_input_partial_shape(0); const auto outShape = op->get_output_partial_shape(0); if (isDynBatchWithUpperBound(inShape) && isDynBatchWithUpperBound(outShape)) { @@ -452,34 +454,34 @@ bool MKLDNNExecNetwork::canBeExecViaLegacyDynBatch(std::shared_ptr(op->get_input_node_shared_ptr(1)); if (!axis || axis->cast_vector()[0] == 0) { return false; } } - if (type == Concatenation) { + if (type == Type::Concatenation) { const auto concat = std::dynamic_pointer_cast(op); if (!concat || concat->get_axis() == 0) { return false; } } - if (type == Softmax) { + if (type == Type::Softmax) { const auto softmax = std::dynamic_pointer_cast(op); if (!softmax || softmax->get_axis() == 0) { return false; } } - if ((type == MatMul || type == FullyConnected) && + if ((type == Type::MatMul || type == Type::FullyConnected) && (op->get_input_node_ptr(1)->get_type_info() != ngraph::op::Constant::get_type_info_static() || op->get_input_partial_shape(0).rank().get_length() < 2)) { return false; } - if (type == Eltwise && std::dynamic_pointer_cast(op) && + if (type == Type::Eltwise && std::dynamic_pointer_cast(op) && !(op->get_input_node_ptr(0)->get_type_info() == ngraph::op::Constant::get_type_info_static() || op->get_input_node_ptr(1)->get_type_info() == ngraph::op::Constant::get_type_info_static()) && op->get_input_partial_shape(0).rank().get_length() != op->get_input_partial_shape(1).rank().get_length()) { @@ -489,7 +491,7 @@ bool MKLDNNExecNetwork::canBeExecViaLegacyDynBatch(std::shared_ptrget_ordered_ops(); for (const auto& op : ops) { auto type = TypeFromName(op->get_type_name()); - if (type == Tile) { + if (type == Type::Tile) { const auto repeatsNode = std::dynamic_pointer_cast(op->get_input_node_shared_ptr(1)); if (!repeatsNode) return false; @@ -512,23 +514,23 @@ bool MKLDNNExecNetwork::CanProcessDynBatch(const InferenceEngine::CNNNetwork &ne continue; } - if (type == Reshape) { + if (type == Type::Reshape) { if (op->get_input_shape(0)[0] == op->get_output_shape(0)[0]) continue; } - if (type != Input && - type != Output && - type != Convolution && - type != Deconvolution && - type != Lrn && - type != Pooling && - type != FullyConnected && - type != MatMul && - type != Softmax && - type != Split && - type != Concatenation && - type != Eltwise) { + if (type != Type::Input && + type != Type::Output && + type != Type::Convolution && + type != Type::Deconvolution && + type != Type::Lrn && + type != Type::Pooling && + type != Type::FullyConnected && + type != Type::MatMul && + type != Type::Softmax && + type != Type::Split && + type != Type::Concatenation && + type != Type::Eltwise) { return false; } } @@ -536,7 +538,10 @@ bool MKLDNNExecNetwork::CanProcessDynBatch(const InferenceEngine::CNNNetwork &ne return true; } -void MKLDNNExecNetwork::Export(std::ostream& modelStream) { +void ExecNetwork::Export(std::ostream& modelStream) { CNNNetworkSerializer serializer(modelStream, extensionManager); serializer <<_network; } + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/exec_network.h b/src/plugins/intel_cpu/src/exec_network.h index c1330a1b538..7931ce8df9a 100644 --- a/src/plugins/intel_cpu/src/exec_network.h +++ b/src/plugins/intel_cpu/src/exec_network.h @@ -20,9 +20,9 @@ namespace ov { namespace intel_cpu { -class MKLDNNExecNetwork: public InferenceEngine::ExecutableNetworkThreadSafeDefault { +class ExecNetwork: public InferenceEngine::ExecutableNetworkThreadSafeDefault { public: - typedef std::shared_ptr Ptr; + typedef std::shared_ptr Ptr; std::shared_ptr CreateInferRequestImpl(const std::vector>& inputs, @@ -34,9 +34,9 @@ public: InferenceEngine::IInferRequestInternal::Ptr CreateInferRequest() override; - MKLDNNExecNetwork(const InferenceEngine::CNNNetwork &network, const Config &cfg, - const MKLDNNExtensionManager::Ptr &extMgr, NumaNodesWeights &weightsSharing, - const std::shared_ptr& plugin); + ExecNetwork(const InferenceEngine::CNNNetwork &network, const Config &cfg, + const ExtensionManager::Ptr &extMgr, NumaNodesWeights &weightsSharing, + const std::shared_ptr& plugin); void setProperty(const std::map &properties); @@ -49,31 +49,31 @@ public: void Export(std::ostream& modelStream) override; protected: - friend class MKLDNNInferRequestBase; - MKLDNNExtensionManager::Ptr extensionManager; + friend class InferRequestBase; + ExtensionManager::Ptr extensionManager; std::vector memoryStates; const InferenceEngine::CNNNetwork _network; mutable std::mutex _cfgMutex; Config _cfg; std::atomic_int _numRequests = {0}; std::string _name; - struct Graph : public MKLDNNGraph { + struct GraphGuard : public Graph { std::mutex _mutex; struct Lock : public std::unique_lock { - explicit Lock(Graph& graph) : std::unique_lock(graph._mutex), _graph(graph) {} - Graph& _graph; + explicit Lock(GraphGuard& graph) : std::unique_lock(graph._mutex), _graph(graph) {} + GraphGuard& _graph; }; }; // WARNING: Do not use _graphs directly. - mutable std::deque _graphs; + mutable std::deque _graphs; NumaNodesWeights& _numaNodesWeights; /* WARNING: Use GetGraph() function to get access to graph in current stream. * NOTE: Main thread is interpreted as master thread of external stream so use this function to get access to graphs * even from main thread */ - Graph::Lock GetGraph() const; + GraphGuard::Lock GetGraph() const; bool canBeExecViaLegacyDynBatch(std::shared_ptr function, int64_t& maxBatchSize) const; bool CanProcessDynBatch(const InferenceEngine::CNNNetwork &network) const; @@ -82,7 +82,7 @@ protected: InferenceEngine::Parameter GetConfigLegacy(const std::string &name) const; - InferenceEngine::Parameter GetMetricLegacy(const std::string &name, const Graph& graph) const; + InferenceEngine::Parameter GetMetricLegacy(const std::string &name, const GraphGuard& graph) const; }; } // namespace intel_cpu diff --git a/src/plugins/intel_cpu/src/extension.cpp b/src/plugins/intel_cpu/src/extension.cpp index 03dae940efc..89aa77f0783 100644 --- a/src/plugins/intel_cpu/src/extension.cpp +++ b/src/plugins/intel_cpu/src/extension.cpp @@ -18,19 +18,19 @@ namespace ov { namespace intel_cpu { -void MKLDNNExtension::GetVersion(const InferenceEngine::Version*& versionInfo) const noexcept { +void Extension::GetVersion(const InferenceEngine::Version*& versionInfo) const noexcept { static const InferenceEngine::Version version = { {1, 0}, // extension API version "1.0", - "MKLDNNExtension" // extension description message + "Extension" // extension description message }; versionInfo = &version; } -void MKLDNNExtension::Unload() noexcept {} +void Extension::Unload() noexcept {} -std::map MKLDNNExtension::getOpSets() { +std::map Extension::getOpSets() { auto cpu_plugin_opset = []() { ngraph::OpSet opset; @@ -119,11 +119,11 @@ std::map MKLDNNExtension::getOpSets() { return opsets; } -std::vector MKLDNNExtension::getImplTypes(const std::shared_ptr&) { +std::vector Extension::getImplTypes(const std::shared_ptr&) { return {}; } -InferenceEngine::ILayerImpl::Ptr MKLDNNExtension::getImplementation(const std::shared_ptr& node, const std::string& implType) { +InferenceEngine::ILayerImpl::Ptr Extension::getImplementation(const std::shared_ptr& node, const std::string& implType) { return nullptr; } @@ -131,4 +131,4 @@ InferenceEngine::ILayerImpl::Ptr MKLDNNExtension::getImplementation(const std::s } // namespace ov // Generate exported function -IE_DEFINE_EXTENSION_CREATE_FUNCTION(ov::intel_cpu::MKLDNNExtension) +IE_DEFINE_EXTENSION_CREATE_FUNCTION(ov::intel_cpu::Extension) diff --git a/src/plugins/intel_cpu/src/extension.h b/src/plugins/intel_cpu/src/extension.h index 428234f6193..108ba07c664 100644 --- a/src/plugins/intel_cpu/src/extension.h +++ b/src/plugins/intel_cpu/src/extension.h @@ -9,7 +9,7 @@ namespace ov { namespace intel_cpu { -class MKLDNNExtension : public InferenceEngine::IExtension { +class Extension : public InferenceEngine::IExtension { public: void GetVersion(const InferenceEngine::Version*& versionInfo) const noexcept override; void Unload() noexcept override; diff --git a/src/plugins/intel_cpu/src/extension_mngr.cpp b/src/plugins/intel_cpu/src/extension_mngr.cpp index da6b8b38f83..c108fb1d44e 100644 --- a/src/plugins/intel_cpu/src/extension_mngr.cpp +++ b/src/plugins/intel_cpu/src/extension_mngr.cpp @@ -8,14 +8,16 @@ #include "extension_mngr.h" -using namespace ov::intel_cpu; using namespace InferenceEngine; -void MKLDNNExtensionManager::AddExtension(const IExtensionPtr& extension) { +namespace ov { +namespace intel_cpu { + +void ExtensionManager::AddExtension(const IExtensionPtr& extension) { _extensions.push_back(extension); } -InferenceEngine::ILayerImpl::Ptr MKLDNNExtensionManager::CreateImplementation(const std::shared_ptr& op) { +InferenceEngine::ILayerImpl::Ptr ExtensionManager::CreateImplementation(const std::shared_ptr& op) { if (!op) IE_THROW() << "Cannot get nGraph operation!"; for (const auto& ext : _extensions) { @@ -31,27 +33,9 @@ InferenceEngine::ILayerImpl::Ptr MKLDNNExtensionManager::CreateImplementation(co return nullptr; } -std::shared_ptr MKLDNNExtensionManager::CreateExtensionFactory(const std::shared_ptr& op) { - std::shared_ptr factory; - for (auto& ext : _extensions) { - ResponseDesc responseDesc; - StatusCode rc = GENERAL_ERROR; - ILayerImplFactory* factory_ptr = nullptr; - if (auto mkldnnExt = dynamic_cast(ext.get())) - rc = mkldnnExt->getFactoryFor(factory_ptr, op, &responseDesc); - if (rc != OK) { - factory = nullptr; - continue; - } else { - factory.reset(factory_ptr); - } - if (factory) { - break; - } - } - return factory; -} - -const std::vector & MKLDNNExtensionManager::Extensions() const { +const std::vector & ExtensionManager::Extensions() const { return _extensions; } + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/extension_mngr.h b/src/plugins/intel_cpu/src/extension_mngr.h index 0053e629a29..78ec7ef5ab1 100644 --- a/src/plugins/intel_cpu/src/extension_mngr.h +++ b/src/plugins/intel_cpu/src/extension_mngr.h @@ -8,17 +8,15 @@ #include #include #include -#include "nodes/list.hpp" namespace ov { namespace intel_cpu { -class MKLDNNExtensionManager { +class ExtensionManager { public: - using Ptr = std::shared_ptr; - MKLDNNExtensionManager() = default; + using Ptr = std::shared_ptr; + ExtensionManager() = default; InferenceEngine::ILayerImpl::Ptr CreateImplementation(const std::shared_ptr& op); - std::shared_ptr CreateExtensionFactory(const std::shared_ptr& op); void AddExtension(const InferenceEngine::IExtensionPtr& extension); const std::vector & Extensions() const; @@ -28,4 +26,3 @@ private: } // namespace intel_cpu } // namespace ov - diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index 21788567bf9..4ad70ceaae3 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -17,7 +17,7 @@ #include "graph.h" #include "graph_dumper.h" #include "graph_optimizer.h" -#include "extension_utils.h" +#include "dnnl_extension_utils.h" #include "extension_mngr.h" #include "memory_solver.hpp" #include "itt.h" @@ -51,19 +51,21 @@ #include "memory_desc/dnnl_blocked_memory_desc.h" using namespace mkldnn; -using namespace ov::intel_cpu; using namespace InferenceEngine; using namespace InferenceEngine::details; -typedef std::unordered_set edge_cluster_t; +namespace ov { +namespace intel_cpu { + +typedef std::unordered_set edge_cluster_t; typedef std::vector edge_clusters_t; -mkldnn::engine MKLDNNGraph::eng(mkldnn::engine::kind::cpu, 0); +mkldnn::engine Graph::eng(mkldnn::engine::kind::cpu, 0); template -void MKLDNNGraph::CreateGraph(NET &net, const MKLDNNExtensionManager::Ptr& extMgr, - MKLDNNWeightsSharing::Ptr &w_cache) { - OV_ITT_SCOPE(FIRST_INFERENCE, ov::intel_cpu::itt::domains::intel_cpu_LT, "CreateGraph"); +void Graph::CreateGraph(NET &net, const ExtensionManager::Ptr& extMgr, + WeightsSharing::Ptr &w_cache) { + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "CreateGraph"); if (IsReady()) ForgetGraphData(); @@ -80,9 +82,9 @@ void MKLDNNGraph::CreateGraph(NET &net, const MKLDNNExtensionManager::Ptr& extMg CPU_DEBUG_CAP_ENABLE(serialize(*this)); } -void MKLDNNGraph::CreateGraph(const std::vector &graphNodes, - const std::vector &graphEdges, - MKLDNNWeightsSharing::Ptr &w_cache, +void Graph::CreateGraph(const std::vector &graphNodes, + const std::vector &graphEdges, + WeightsSharing::Ptr &w_cache, std::string name) { if (IsReady()) ForgetGraphData(); @@ -112,12 +114,12 @@ void MKLDNNGraph::CreateGraph(const std::vector &graphNodes, CPU_DEBUG_CAP_ENABLE(serialize(*this)); } -template void MKLDNNGraph::CreateGraph(const std::shared_ptr&, - const MKLDNNExtensionManager::Ptr&, MKLDNNWeightsSharing::Ptr&); -template void MKLDNNGraph::CreateGraph(const CNNNetwork&, - const MKLDNNExtensionManager::Ptr&, MKLDNNWeightsSharing::Ptr&); +template void Graph::CreateGraph(const std::shared_ptr&, + const ExtensionManager::Ptr&, WeightsSharing::Ptr&); +template void Graph::CreateGraph(const CNNNetwork&, + const ExtensionManager::Ptr&, WeightsSharing::Ptr&); -void MKLDNNGraph::Replicate(const std::shared_ptr &subgraph, const MKLDNNExtensionManager::Ptr& extMgr) { +void Graph::Replicate(const std::shared_ptr &subgraph, const ExtensionManager::Ptr& extMgr) { this->_name = "subgraph"; this->reuse_io_tensors = false; @@ -125,7 +127,7 @@ void MKLDNNGraph::Replicate(const std::shared_ptr &subgraph, co ngraph::pass::low_precision::LowPrecision::isFunctionQuantized(subgraph); // Map data object onto producer node - std::map, MKLDNNNodePtr> op2node; + std::map, NodePtr> op2node; // nodes which has no consumers (output or just unused). But doesn't marked as graph output. // Will be stored as fake output separately. @@ -143,7 +145,7 @@ void MKLDNNGraph::Replicate(const std::shared_ptr &subgraph, co }; for (const auto op : subgraph->get_ordered_ops()) { - const MKLDNNNodePtr node {MKLDNNNode::factory().create(op, getEngine(), extMgr, weightsCache)}; + const NodePtr node {Node::factory().create(op, getEngine(), extMgr, weightsCache)}; if (isQuantized()) { node->setQuantizedGraphFlag(true); } @@ -168,12 +170,12 @@ void MKLDNNGraph::Replicate(const std::shared_ptr &subgraph, co auto parentOp = op->get_input_node_shared_ptr(port); auto parentNode = op2node[parentOp]; - MKLDNNEdgePtr edge(new MKLDNNEdge(parentNode, node, getParentOutputPort(op, parentOp, port), static_cast(port))); + EdgePtr edge(new Edge(parentNode, node, getParentOutputPort(op, parentOp, port), static_cast(port))); node->addEdge(edge); graphEdges.push_back(edge); } - if (!ov::intel_cpu::one_of(op->get_type_info(), + if (!one_of(op->get_type_info(), ngraph::op::v0::Result::get_type_info_static(), ngraph::op::v3::Assign::get_type_info_static(), ngraph::op::v6::Assign::get_type_info_static())) { @@ -190,18 +192,18 @@ void MKLDNNGraph::Replicate(const std::shared_ptr &subgraph, co auto parentNode = op2node[unusedOutput.get_node_shared_ptr()]; const auto port = unusedOutput.get_index(); const auto nodeName = std::string("stub_") + std::to_string(unusedOutput.get_index()) + "_" + parentNode->getName(); - const MKLDNNNodePtr outNode = std::make_shared(parentNode->outputShapes[port], + const NodePtr outNode = std::make_shared(parentNode->outputShapes[port], parentNode->getOriginalOutputPrecisionAtPort(port), nodeName, "Result", getEngine(), weightsCache); - MKLDNNEdgePtr edge(new MKLDNNEdge(parentNode, outNode, port, 0)); + EdgePtr edge(new Edge(parentNode, outNode, port, 0)); outNode->addEdge(edge); graphEdges.push_back(edge); graphNodes.push_back(outNode); } } -void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionManager::Ptr& extMgr) { - OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, itt::domains::intel_cpu_LT, "MKLDNNGraph::Replicate", "CNNNetwork"); +void Graph::Replicate(const CNNNetwork &network, const ExtensionManager::Ptr& extMgr) { + OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, itt::domains::intel_cpu_LT, "Graph::Replicate", "CNNNetwork"); InputsDataMap inputsInfo = network.getInputsInfo(); OutputsDataMap outputsInfo = network.getOutputsInfo(); @@ -237,7 +239,7 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana auto orderedOps = func->get_ordered_ops(); // TODO [NM]: unordered_map is preferred from performance perspective. Needs hash for ngraph::Node - std::map, MKLDNNNodePtr> op2node; + std::map, NodePtr> op2node; std::deque> unusedOutputs; // nodes which has no consumers (output or just unused) auto getParentOutputPort = [](const std::shared_ptr childOp, const std::shared_ptr parentOp, @@ -255,7 +257,7 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana // Replicate All Nodes in topological order for (const auto& op : orderedOps) { - const MKLDNNNodePtr node(MKLDNNNode::factory().create(op, getEngine(), extMgr, weightsCache)); + const NodePtr node(Node::factory().create(op, getEngine(), extMgr, weightsCache)); if (isQuantized()) { node->setQuantizedGraphFlag(true); } @@ -287,12 +289,12 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana auto parentOp = op->get_input_node_shared_ptr(port); auto parentNode = op2node[parentOp]; - MKLDNNEdgePtr edge(new MKLDNNEdge(parentNode, node, getParentOutputPort(op, parentOp, port), static_cast(port))); + EdgePtr edge(new Edge(parentNode, node, getParentOutputPort(op, parentOp, port), static_cast(port))); node->addEdge(edge); graphEdges.push_back(edge); } - if (!ov::intel_cpu::one_of(op->get_type_info(), + if (!one_of(op->get_type_info(), ngraph::op::v0::Result::get_type_info_static(), ngraph::op::v3::Assign::get_type_info_static(), ngraph::op::v6::Assign::get_type_info_static())) { @@ -309,10 +311,10 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana auto parentNode = op2node[unusedOutput.get_node_shared_ptr()]; const auto port = unusedOutput.get_index(); const auto nodeName = std::string("stub_") + std::to_string(unusedOutput.get_index()) + "_" + parentNode->getName(); - const MKLDNNNodePtr outNode = std::make_shared(parentNode->outputShapes[port], + const NodePtr outNode = std::make_shared(parentNode->outputShapes[port], parentNode->getOriginalOutputPrecisionAtPort(port), nodeName, "Result", getEngine(), weightsCache); - MKLDNNEdgePtr edge(new MKLDNNEdge(parentNode, outNode, port, 0)); + EdgePtr edge(new Edge(parentNode, outNode, port, 0)); outNode->addEdge(edge); graphEdges.push_back(edge); graphNodes.push_back(outNode); @@ -321,10 +323,10 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana if (config.enforceBF16) EnforceBF16(); - auto hasSubgraphConsumers = [] (const MKLDNNNodePtr& node) -> bool { + auto hasSubgraphConsumers = [] (const NodePtr& node) -> bool { const auto & childEdges = node->getChildEdges(); return std::any_of(childEdges.begin(), childEdges.end(), - [] (const MKLDNNEdgeWeakPtr& edge) -> bool { + [] (const EdgeWeakPtr& edge) -> bool { auto edgePtr = edge.lock(); if (!edgePtr) return false; @@ -372,8 +374,8 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana } } -void MKLDNNGraph::InitGraph() { - MKLDNNGraphOptimizer optimizer; +void Graph::InitGraph() { + GraphOptimizer optimizer; SortTopologically(); InitNodes(); @@ -404,19 +406,19 @@ void MKLDNNGraph::InitGraph() { ExecuteConstantNodesOnly(); } -void MKLDNNGraph::InitNodes() { - OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "MKLDNNGraph::InitNodes"); +void Graph::InitNodes() { + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "Graph::InitNodes"); for (auto &node : graphNodes) { node->init(); } } -void MKLDNNGraph::InitDescriptors() { - OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, ov::intel_cpu::itt::domains::intel_cpu_LT, "InitDescriptors", "Prepare"); +void Graph::InitDescriptors() { + OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, itt::domains::intel_cpu_LT, "InitDescriptors", "Prepare"); for (auto &node : graphNodes) { - if (node->getType() == Input && _normalizePreprocMap.find(node->getName()) != _normalizePreprocMap.end()) { - auto *inputNode = dynamic_cast(node.get()); + if (node->getType() == Type::Input && _normalizePreprocMap.find(node->getName()) != _normalizePreprocMap.end()) { + auto *inputNode = dynamic_cast(node.get()); if (inputNode) inputNode->withMeanImage(); } @@ -436,16 +438,16 @@ void MKLDNNGraph::InitDescriptors() { } } -void MKLDNNGraph::InitOptimalPrimitiveDescriptors() { - OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, "MKLDNNGraph::InitOptimalPrimitiveDescriptors"); +void Graph::InitOptimalPrimitiveDescriptors() { + OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, "Graph::InitOptimalPrimitiveDescriptors"); for (auto &node : graphNodes) { OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, node->profiling.initOptimalPrimitiveDescriptor); node->initOptimalPrimitiveDescriptor(); } } -void MKLDNNGraph::ExtractConstantAndExecutableNodes() { - OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "MKLDNNGraph::ExtractConstantAndExecutableNodes"); +void Graph::ExtractConstantAndExecutableNodes() { + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "Graph::ExtractConstantAndExecutableNodes"); for (const auto& graphNode : graphNodes) { if (graphNode->isConstant()) { constantGraphNodes.emplace_back(graphNode); @@ -460,13 +462,13 @@ void MKLDNNGraph::ExtractConstantAndExecutableNodes() { } } -void MKLDNNGraph::ExecuteConstantNodesOnly() const { - OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "MKLDNNGraph::ExecuteConstantNodesOnly"); +void Graph::ExecuteConstantNodesOnly() const { + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "Graph::ExecuteConstantNodesOnly"); mkldnn::stream stream(eng); - using shared_memory_ptr = MKLDNNWeightsSharing::MKLDNNSharedMemory::Ptr; + using shared_memory_ptr = WeightsSharing::SharedMemory::Ptr; - auto acquireSharedOutputs = [this](const MKLDNNNodePtr & node) { + auto acquireSharedOutputs = [this](const NodePtr & node) { std::vector outputs; bool hasLocalAllocatedEdges = false; bool hasExternalInvalidEdges = false; @@ -523,8 +525,8 @@ static bool isReorderAvailable(const MemoryDescPtr& parentDesc, const MemoryDesc return mkldnn_success == status; } -void MKLDNNGraph::InitEdges() { - OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "MKLDNNGraph::InitEdges"); +void Graph::InitEdges() { + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "Graph::InitEdges"); size_t numberOfEdges = graphEdges.size(); @@ -533,9 +535,9 @@ void MKLDNNGraph::InitEdges() { uniqueLayerNames.insert(node->getName()); } - auto insertReorder = [&](MKLDNNEdgePtr& edge, bool isOptimized) { + auto insertReorder = [&](EdgePtr& edge, bool isOptimized) { std::string basicLayerName = edge->getParent()->getName() + "_" + - MKLDNNReorderNode::getReorderArgs(edge->getInputDesc(), edge->getOutputDesc()) + "_" + + node::Reorder::getReorderArgs(edge->getInputDesc(), edge->getOutputDesc()) + "_" + edge->getChild()->getName(); std::string layerName = basicLayerName; int idx = 0; @@ -558,8 +560,8 @@ void MKLDNNGraph::InitEdges() { for (auto i = 0; i < numberOfEdges; i++) { auto edge = graphEdges[i]; auto reorderStatus = graphEdges[i]->needReorder(); - if (reorderStatus == MKLDNNEdge::ReorderStatus::Regular) { - MKLDNNEdge::ReorderStatus reorderStatusInternal = MKLDNNEdge::ReorderStatus::Regular; + if (reorderStatus == Edge::ReorderStatus::Regular) { + Edge::ReorderStatus reorderStatusInternal = Edge::ReorderStatus::Regular; // Check if there is a reorder that needs the precision conversion if (edge->getInputDesc().getPrecision() != edge->getOutputDesc().getPrecision() && !isReorderAvailable(edge->getInputPortDesc()->getMemDesc(), @@ -572,33 +574,33 @@ void MKLDNNGraph::InitEdges() { std::string convertName = edge->getParent()->getName() + "_" + inDesc.getPrecision().name() + "_" + outDesc.getPrecision().name(); - auto convertNode = std::make_shared(inDesc.getShape(), inDesc.getPrecision(), outDesc.getPrecision(), + auto convertNode = std::make_shared(inDesc.getShape(), inDesc.getPrecision(), outDesc.getPrecision(), convertName, this->getEngine(), this->weightsCache); convertNode->setDescs(inDesc, outDesc); InsertNode(edge, convertNode, true); //Check if reorder is still needed reorderStatusInternal = convertNode->getChildEdgeAt(0)->needReorder(); - if (reorderStatusInternal != MKLDNNEdge::ReorderStatus::No) + if (reorderStatusInternal != Edge::ReorderStatus::No) edge = convertNode->getChildEdgeAt(0); } - if (reorderStatusInternal != MKLDNNEdge::ReorderStatus::No) { - insertReorder(edge, reorderStatusInternal == MKLDNNEdge::ReorderStatus::Optimized); + if (reorderStatusInternal != Edge::ReorderStatus::No) { + insertReorder(edge, reorderStatusInternal == Edge::ReorderStatus::Optimized); } updateEdge(i); - } else if (reorderStatus == MKLDNNEdge::ReorderStatus::Optimized) { + } else if (reorderStatus == Edge::ReorderStatus::Optimized) { insertReorder(edge, true); updateEdge(i); } } } -static inline bool isConstOutput(MKLDNNEdgePtr edge) { +static inline bool isConstOutput(EdgePtr edge) { return edge->getParent()->isConstant() && !edge->getChild()->isConstant(); } -static edge_clusters_t findEdgeClusters(const std::vector & graphEdges) { - typedef std::unordered_map edge_cluster_idx_map_t; +static edge_clusters_t findEdgeClusters(const std::vector & graphEdges) { + typedef std::unordered_map edge_cluster_idx_map_t; edge_clusters_t edge_clusters; edge_cluster_idx_map_t edge_cluster_indices; @@ -613,7 +615,7 @@ static edge_clusters_t findEdgeClusters(const std::vector & graph continue; // edge is visited size_t cluster_idx = edge_clusters.size(); - MKLDNNEdgePtr last_shared_edge = nullptr; + EdgePtr last_shared_edge = nullptr; //has_defined_max_path means all the edges on path from current to the actual shared edge //have defined max memory size so they can be added to the clusters and resolved by mem solver bool has_defined_max_path = true; @@ -654,7 +656,7 @@ static edge_clusters_t findEdgeClusters(const std::vector & graph return edge_clusters; } -void MKLDNNGraph::AllocateWithReuse() { +void Graph::AllocateWithReuse() { edge_clusters_t edge_clusters = findEdgeClusters(graphEdges); size_t edge_clusters_count = edge_clusters.size(); @@ -663,11 +665,11 @@ void MKLDNNGraph::AllocateWithReuse() { auto &cluster = edge_clusters[i]; bool erase = false; for (auto &edge : cluster) { - if (edge->getStatus() == MKLDNNEdge::Status::NeedAllocation + if (edge->getStatus() == Edge::Status::NeedAllocation && edge->getParent()->isConstant()) { - if (edge->getParent()->getType() == Input) { - auto constNode = std::static_pointer_cast(edge->getParent()); - edge->reuse(std::const_pointer_cast(constNode->getMemoryPtr())); + if (edge->getParent()->getType() == Type::Input) { + auto constNode = std::static_pointer_cast(edge->getParent()); + edge->reuse(std::const_pointer_cast(constNode->getMemoryPtr())); } else { edge->externalAllocate(weightsCache); } @@ -711,8 +713,8 @@ void MKLDNNGraph::AllocateWithReuse() { bool isConst = false, isOutput = false, isInput = false; for (auto &edge : edge_clusters[i]) { isConst |= isConstOutput(edge); - isOutput |= edge->getChild()->getType() == Output; - isInput |= edge->getParent()->getType() == Input; + isOutput |= edge->getChild()->getType() == Type::Output; + isInput |= edge->getParent()->getType() == Type::Input; } if (reuse_io_tensors) { @@ -731,7 +733,7 @@ void MKLDNNGraph::AllocateWithReuse() { MemorySolver memSolver(boxes); size_t total_size = static_cast(memSolver.solve()) * alignment; - memWorkspace = std::make_shared(eng); + memWorkspace = std::make_shared(eng); memWorkspace->Create(DnnlBlockedMemoryDesc(InferenceEngine::Precision::I8, Shape(InferenceEngine::SizeVector{total_size}))); if (edge_clusters.empty()) @@ -742,7 +744,7 @@ void MKLDNNGraph::AllocateWithReuse() { for (int i = 0; i < edge_clusters.size(); i++) { int count = 0; for (auto &edge : edge_clusters[i]) { - if (edge->getStatus() == MKLDNNEdge::Status::NeedAllocation) { + if (edge->getStatus() == Edge::Status::NeedAllocation) { int64_t offset = memSolver.getOffset(i); // !! Fallback to individual memory allocation !! // if you like to check infer without reuse just call this function without arguments. @@ -751,7 +753,7 @@ void MKLDNNGraph::AllocateWithReuse() { // TODO: WA for some test (like strided_slice_test) which use tensors with // shapes {0}. And it is implisitly converted into {1} tensor. // Zeroing of input data allow pass tests. - if (edge->getParent()->type == Input && edge->hasDefinedMaxSize()) + if (edge->getParent()->type == Type::Input && edge->hasDefinedMaxSize()) edge->getMemoryPtr()->FillZero(); count++; @@ -761,8 +763,8 @@ void MKLDNNGraph::AllocateWithReuse() { } } -void MKLDNNGraph::Allocate() { - OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "MKLDNNGraph::Allocate"); +void Graph::Allocate() { + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "Graph::Allocate"); // resolve edges. Define which will be a view on others // NeedAllocation - real blob @@ -782,15 +784,15 @@ void MKLDNNGraph::Allocate() { for (auto& edge : graphEdges) edge->validate(); } -void MKLDNNGraph::CreatePrimitives() { - OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, "MKLDNNGraph::CreatePrimitives"); +void Graph::CreatePrimitives() { + OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, "Graph::CreatePrimitives"); for (auto& node : graphNodes) { OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, node->profiling.createPrimitive); node->createPrimitive(); } } -void MKLDNNGraph::PushInputData(const std::string& name, const InferenceEngine::Blob::Ptr &in) { +void Graph::PushInputData(const std::string& name, const InferenceEngine::Blob::Ptr &in) { if (!IsReady()) IE_THROW()<< "Wrong state. Topology not ready."; auto input = inputNodesMap.find(name); @@ -806,7 +808,7 @@ void MKLDNNGraph::PushInputData(const std::string& name, const InferenceEngine:: if (ext_data_ptr != inter_data_ptr) { auto ext_tdesc = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(in->getTensorDesc()); - MKLDNNMemory ext_mem(eng); + Memory ext_mem(eng); ext_mem.Create(ext_tdesc, ext_data_ptr, false); // branch for handling dynamic batch feature in new API @@ -814,7 +816,7 @@ void MKLDNNGraph::PushInputData(const std::string& name, const InferenceEngine:: auto newDims = childEdge->getMemory().getStaticDims(); newDims[0] = ext_mem.getStaticDims()[0]; - MKLDNNMemory tmpMem(eng); + Memory tmpMem(eng); auto newDesc = childEdge->getMemory().getDesc().cloneWithNewDims(newDims, true); tmpMem.Create(newDesc, childEdge->getMemory().GetData(), false); @@ -838,7 +840,7 @@ void MKLDNNGraph::PushInputData(const std::string& name, const InferenceEngine:: } } -void MKLDNNGraph::PullOutputData(BlobMap &out) { +void Graph::PullOutputData(BlobMap &out) { if (!IsReady()) IE_THROW() << "Wrong state. Topology not ready."; @@ -846,7 +848,7 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) { auto name = outputMap.first; auto node = outputMap.second; auto parentEdge = node->getParentEdgeAt(0); - const MKLDNNMemory& intr_blob = parentEdge->getMemory(); + const Memory& intr_blob = parentEdge->getMemory(); const auto ext_blob_map = out.find(name); const auto ext_blob = ext_blob_map->second; @@ -907,7 +909,7 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) { auto outBlobDesc = expectedDesc.getLayout() == InferenceEngine::Layout::ANY ? DnnlBlockedMemoryDesc(expectedDesc.getPrecision(), Shape(expectedDesc.getDims())) : MemoryDescUtils::convertToDnnlBlockedMemoryDesc(expectedDesc); - MKLDNNMemory outBloMem(eng); + Memory outBloMem(eng); outBloMem.Create(outBlobDesc, ext_blob_ptr, false); // branch for handling dynamic batch feature in new API @@ -915,7 +917,7 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) { auto newDims = intr_blob.getStaticDims(); newDims[0] = outBloMem.getStaticDims()[0]; - MKLDNNMemory tmpMem(eng); + Memory tmpMem(eng); auto newDesc = intr_blob.getDesc().cloneWithNewDims(newDims, true); tmpMem.Create(newDesc, intr_blob.GetData(), false); @@ -940,7 +942,7 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) { } } -inline void MKLDNNGraph::ExecuteNode(const MKLDNNNodePtr& node, const mkldnn::stream& stream) const { +inline void Graph::ExecuteNode(const NodePtr& node, const mkldnn::stream& stream) const { DUMP(node, config, infer_count); OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, node->profiling.execute); @@ -951,7 +953,7 @@ inline void MKLDNNGraph::ExecuteNode(const MKLDNNNodePtr& node, const mkldnn::st } } -void MKLDNNGraph::Infer(MKLDNNInferRequestBase* request) { +void Graph::Infer(InferRequestBase* request) { if (!IsReady()) { IE_THROW() << "Wrong state. Topology is not ready."; } @@ -970,7 +972,7 @@ void MKLDNNGraph::Infer(MKLDNNInferRequestBase* request) { if (infer_count != -1) infer_count++; } -void MKLDNNGraph::VisitNode(MKLDNNNodePtr node, std::vector& sortedNodes) { +void Graph::VisitNode(NodePtr node, std::vector& sortedNodes) { if (node->temporary) { return; } @@ -991,14 +993,14 @@ void MKLDNNGraph::VisitNode(MKLDNNNodePtr node, std::vector& sort sortedNodes.insert(sortedNodes.begin(), node); } -void MKLDNNGraph::SortTopologically() { - OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "MKLDNNGraph::SortTopologically"); +void Graph::SortTopologically() { + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "Graph::SortTopologically"); - std::vector unsorted; - std::vector sorted; + std::vector unsorted; + std::vector sorted; for (int i = 0; i < graphNodes.size(); i++) { - MKLDNNNodePtr node = graphNodes[i]; + NodePtr node = graphNodes[i]; node->permanent = false; node->temporary = false; @@ -1007,7 +1009,7 @@ void MKLDNNGraph::SortTopologically() { } while (!unsorted.empty()) { - MKLDNNNodePtr node = unsorted.at(0); + NodePtr node = unsorted.at(0); unsorted.erase(unsorted.begin()); VisitNode(node, sorted); @@ -1027,7 +1029,7 @@ void MKLDNNGraph::SortTopologically() { for (auto &node : graphNodes) { { int port_num = node->inputShapes.size(); - std::vector res(port_num); + std::vector res(port_num); for (int i = 0; i < node->parentEdges.size(); i++) { auto edge = node->getParentEdgeAt(i); @@ -1041,7 +1043,7 @@ void MKLDNNGraph::SortTopologically() { } { int port_num = node->outputShapes.size(); - std::vector res(port_num); + std::vector res(port_num); for (int i = 0; i < node->childEdges.size(); i++) { auto edge = node->getChildEdgeAt(i); @@ -1056,10 +1058,10 @@ void MKLDNNGraph::SortTopologically() { } } -void MKLDNNGraph::GetPerfData(std::map &perfMap) const { +void Graph::GetPerfData(std::map &perfMap) const { unsigned i = 0; - std::function &, const MKLDNNNodePtr&)> - getPerfMapFor = [&](std::map &perfMap, const MKLDNNNodePtr& node) { + std::function &, const NodePtr&)> + getPerfMapFor = [&](std::map &perfMap, const NodePtr& node) { InferenceEngine::InferenceEngineProfileInfo &pc = perfMap[node->getName()]; pc.execution_index = i++; // TODO: Why time counter is signed? @@ -1088,23 +1090,23 @@ void MKLDNNGraph::GetPerfData(std::map& properties) { +void Graph::setProperty(const std::map& properties) { config.readProperties(properties); } -Config MKLDNNGraph::getProperty() const { +Config Graph::getProperty() const { return config; } -void MKLDNNGraph::RemoveEdge(MKLDNNEdgePtr& edge) { +void Graph::RemoveEdge(EdgePtr& edge) { for (auto it = graphEdges.begin(); it != graphEdges.end(); it++) { if ((*it) == edge) { edge->drop(); @@ -1114,7 +1116,7 @@ void MKLDNNGraph::RemoveEdge(MKLDNNEdgePtr& edge) { } } -void MKLDNNGraph::DropNode(const MKLDNNNodePtr &node) { +void Graph::DropNode(const NodePtr &node) { auto children = node->childEdges; auto parents = node->parentEdges; @@ -1131,7 +1133,7 @@ void MKLDNNGraph::DropNode(const MKLDNNNodePtr &node) { if (!child) continue; - MKLDNNEdgePtr &remEdge = p_edge; + EdgePtr &remEdge = p_edge; int inNum = 0; if (remEdge) { inNum = remEdge->getInputNum(); @@ -1145,14 +1147,14 @@ void MKLDNNGraph::DropNode(const MKLDNNNodePtr &node) { remEdge->drop(); RemoveEdge(remEdge); } - MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, child, inNum, outNum)); + EdgePtr newEdge(new Edge(parent, child, inNum, outNum)); graphEdges.push_back(newEdge); parent->addEdge(newEdge); } } } -void MKLDNNGraph::DropDWConvNode(const MKLDNNNodePtr &node) { +void Graph::DropDWConvNode(const NodePtr &node) { auto children = node->childEdges; auto parents = node->parentEdges; @@ -1176,7 +1178,7 @@ void MKLDNNGraph::DropDWConvNode(const MKLDNNNodePtr &node) { if (!child) continue; - MKLDNNEdgePtr &remEdge = p_edge; + EdgePtr &remEdge = p_edge; int inNum = 0; if (remEdge) { inNum = remEdge->getInputNum(); @@ -1190,7 +1192,7 @@ void MKLDNNGraph::DropDWConvNode(const MKLDNNNodePtr &node) { remEdge->drop(); RemoveEdge(remEdge); } - MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, child, inNum, outNum)); + EdgePtr newEdge(new Edge(parent, child, inNum, outNum)); graphEdges.push_back(newEdge); parent->addEdge(newEdge); } @@ -1202,7 +1204,7 @@ void MKLDNNGraph::DropDWConvNode(const MKLDNNNodePtr &node) { auto parent = p_edge->getParent(); if (!parent) continue; - MKLDNNEdgePtr &remEdge = p_edge; + EdgePtr &remEdge = p_edge; int inNum = 0; int portCandidate = 0; if (remEdge) { @@ -1213,7 +1215,7 @@ void MKLDNNGraph::DropDWConvNode(const MKLDNNNodePtr &node) { } int outNum = parentConv->parentEdges.size(); - MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, parentConv, inNum, outNum)); + EdgePtr newEdge(new Edge(parent, parentConv, inNum, outNum)); graphEdges.push_back(newEdge); parent->addEdge(newEdge); parentConv->inputShapes.push_back(node->getInputShapeAtPort(portCandidate)); @@ -1221,7 +1223,7 @@ void MKLDNNGraph::DropDWConvNode(const MKLDNNNodePtr &node) { parentConv->outputShapes[0] = node->getOutputShapeAtPort(0); } -void MKLDNNGraph::RemoveDroppedNodes() { +void Graph::RemoveDroppedNodes() { auto& nodes = this->GetNodes(); auto it = nodes.begin(); @@ -1235,7 +1237,7 @@ void MKLDNNGraph::RemoveDroppedNodes() { } } -void MKLDNNGraph::RemoveDroppedEdges() { +void Graph::RemoveDroppedEdges() { auto& edges = this->GetEdges(); auto it = edges.begin(); @@ -1249,20 +1251,20 @@ void MKLDNNGraph::RemoveDroppedEdges() { } } -MKLDNNNodePtr MKLDNNGraph::InsertReorder(MKLDNNEdgePtr edge, std::string layerName, const MemoryDesc& inDesc, const MemoryDesc& outDesc, +NodePtr Graph::InsertReorder(EdgePtr edge, std::string layerName, const MemoryDesc& inDesc, const MemoryDesc& outDesc, bool isOptimized) { - MKLDNNNodePtr newReorder(new MKLDNNReorderNode(layerName, getEngine(), weightsCache)); - auto *reorderPtr = dynamic_cast(newReorder.get()); + NodePtr newReorder(new node::Reorder(layerName, getEngine(), weightsCache)); + auto *reorderPtr = dynamic_cast(newReorder.get()); if (reorderPtr == nullptr) { - IE_THROW() << "MKLDNNGraph::InsertReorder: Cannot cast to MKLDNNReorderNode"; + IE_THROW() << "Graph::InsertReorder: Cannot cast to Reorder"; } reorderPtr->setDescs(inDesc, outDesc); reorderPtr->setOptimized(isOptimized); InsertNode(edge, newReorder, true); - // Using the method MKLDNNEdge::getDesc() we can check that input and output tensor descriptors are equal. - // Due to the specificity of MKLDNNGraphOptimizer::MergeTransposeAndReorder() that isOptimized flag uses, we shouldn't do these checks. + // Using the method Edge::getDesc() we can check that input and output tensor descriptors are equal. + // Due to the specificity of GraphOptimizer::MergeTransposeAndReorder() that isOptimized flag uses, we shouldn't do these checks. if (!isOptimized) { newReorder->getParentEdgeAt(0)->getDesc(); newReorder->getChildEdgeAt(0)->getDesc(); @@ -1271,7 +1273,7 @@ MKLDNNNodePtr MKLDNNGraph::InsertReorder(MKLDNNEdgePtr edge, std::string layerNa return newReorder; } -bool MKLDNNGraph::InsertNode(MKLDNNEdgePtr edge, MKLDNNNodePtr node, bool initNode) { +bool Graph::InsertNode(EdgePtr edge, NodePtr node, bool initNode) { auto oIndex = edge->getOutputNum(); auto iIndex = edge->getInputNum(); if (iIndex < 0 || oIndex < 0) @@ -1284,9 +1286,9 @@ bool MKLDNNGraph::InsertNode(MKLDNNEdgePtr edge, MKLDNNNodePtr node, bool initNo return InsertNode(edge->getParent(), edge->getChild(), node, iIndex, oIndex, initNode); } -bool MKLDNNGraph::InsertNode(MKLDNNNodePtr parent, MKLDNNNodePtr child, MKLDNNNodePtr node, int parentPort, int childPort, bool initNode) { - MKLDNNEdgePtr beforeNode(new MKLDNNEdge(parent, node, parentPort, 0)); - MKLDNNEdgePtr afterNode(new MKLDNNEdge(node, child, 0, childPort)); +bool Graph::InsertNode(NodePtr parent, NodePtr child, NodePtr node, int parentPort, int childPort, bool initNode) { + EdgePtr beforeNode(new Edge(parent, node, parentPort, 0)); + EdgePtr afterNode(new Edge(node, child, 0, childPort)); // Add edge for beforeNode beforeNode->getChild()->parentEdges.push_back(beforeNode); @@ -1316,29 +1318,28 @@ bool MKLDNNGraph::InsertNode(MKLDNNNodePtr parent, MKLDNNNodePtr child, MKLDNNNo } // Set all non const data paths precision to BF16 -void MKLDNNGraph::EnforceBF16() { +void Graph::EnforceBF16() { // Floating point parts of FP32 + INT8 or FP32 + BIN mixed precision models will be executed in BF16 precision // only if enforceBF16 flag was set manually because current performance is not good enough to enable it by default if (!implication(isQuantized(), config.manualEnforceBF16)) return; - /* list of node types that must be forced to be executed in BF16 precision - * because of performance gains */ - static const std::unordered_set> significantNodes { // std::hash is necessary old compilers (defect in C++11 standart) - Convolution, // conv nets - FullyConnected, // conv / bert nets - RNNCell, // recurent nets - RNNSeq, // recurent nets - MatMul, // bert nets - ROIPooling, // object detection nets - Interpolate, // super resolution nets - }; - std::function& skipNodes)> searchForNodesToSkip; - searchForNodesToSkip = [&](const MKLDNNNodePtr& node, std::unordered_set& skipNodes) -> void { + std::function& skipNodes)> searchForNodesToSkip; + searchForNodesToSkip = [&](const NodePtr& node, std::unordered_set& skipNodes) -> void { for (size_t i = 0; i < node->getParentEdges().size(); i++) { const auto& parent = node->getParentEdgeAt(i)->getParent(); - if (significantNodes.count(parent->getType())) // stop at significant nodes - continue; + + /* list of node types that must be forced to be executed in BF16 precision + * because of performance gains */ + if (one_of(parent->getType(), + Type::Convolution, // conv nets + Type::FullyConnected, // conv / bert nets + Type::RNNCell, // recurent nets + Type::RNNSeq, // recurent nets + Type::MatMul, // bert nets + Type::ROIPooling, // object detection nets + Type::Interpolate)) // super resolution nets + continue; // stop at significant nodes const auto res = skipNodes.insert(parent); if (res.second) // node not visited yet @@ -1349,7 +1350,7 @@ void MKLDNNGraph::EnforceBF16() { /* Skip BF16 enforcement for tail of the graph by forming set of nodes to skip. * Necessary to maintain accuracy. * Experiments show zero peformance impact on average */ - std::unordered_set nodesToSkip; + std::unordered_set nodesToSkip; // starting from output nodes for (const auto& entry : outputNodesMap) { const auto& node = entry.second; @@ -1360,15 +1361,17 @@ void MKLDNNGraph::EnforceBF16() { if (nodesToSkip.count(node) && !node->enforceBF16evenForGraphTail) continue; - if (node->getType() != Input && node->getType() != Output) { + if (node->getType() != Type::Input && node->getType() != Type::Output) { for (size_t i = 0; i < node->getOriginalInputsNumber(); i++) { const auto &parent = node->getParentEdgesAtPort(i)[0]->getParent(); /* Skip BF16 enforcement for nodes after Constant Inputs for maintaining precision for fusing. * Precision conversion to BF16 does automatically, if convolution follows up after Constant Inputs * and if activation is BF16 */ - if (!(parent->getType() == Input && parent->isConstant() && - node->getType() != Concatenation) && // Concatenation node is exception because it doesn't change an accuracy for BF16 activation - !(parent->getType() == Input && node->getType() == Eltwise) && // exclude Eltwise after Input since it supports conversion to BF16 + if (!(parent->getType() == Type::Input && parent->isConstant() && + // Concatenation node is exception because it doesn't change an accuracy for BF16 activation + node->getType() != Type::Concatenation) && + // exclude Eltwise after Input since it supports conversion to BF16 + !(parent->getType() == Type::Input && node->getType() == Type::Eltwise) && node->getOriginalInputPrecisionAtPort(i) == Precision::FP32) node->setOriginalInputPrecisionAtPort(i, Precision::BF16); } @@ -1381,6 +1384,9 @@ void MKLDNNGraph::EnforceBF16() { } } -std::shared_ptr MKLDNNGraph::dump() const { +std::shared_ptr Graph::dump() const { return dump_graph_as_ie_ngraph_net(*this); } + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/graph.h b/src/plugins/intel_cpu/src/graph.h index 929af7db95b..65c76b70a74 100644 --- a/src/plugins/intel_cpu/src/graph.h +++ b/src/plugins/intel_cpu/src/graph.h @@ -20,18 +20,20 @@ namespace ov { namespace intel_cpu { -class MKLDNNInferRequestBase; -class MKLDNNGraph { +class InferRequestBase; +class InferRequest; + +class Graph { public: - typedef std::shared_ptr Ptr; - MKLDNNWeightsSharing::Ptr weightsCache; + typedef std::shared_ptr Ptr; + WeightsSharing::Ptr weightsCache; enum Status { NotReady = 0, Ready = 1, }; - MKLDNNGraph() = default; + Graph() = default; Status GetStatus() { return status; @@ -49,12 +51,12 @@ public: template void CreateGraph(NET &network, - const MKLDNNExtensionManager::Ptr& extMgr, - MKLDNNWeightsSharing::Ptr &w_cache); + const ExtensionManager::Ptr& extMgr, + WeightsSharing::Ptr &w_cache); - void CreateGraph(const std::vector &graphNodes, - const std::vector &graphEdges, - MKLDNNWeightsSharing::Ptr &w_cache, + void CreateGraph(const std::vector &graphNodes, + const std::vector &graphEdges, + WeightsSharing::Ptr &w_cache, std::string name); bool hasMeanImageFor(const std::string& name) { @@ -64,13 +66,13 @@ public: void PushInputData(const std::string& name, const InferenceEngine::Blob::Ptr &in); void PullOutputData(InferenceEngine::BlobMap &out); - void Infer(MKLDNNInferRequestBase* request = nullptr); + void Infer(InferRequestBase* request = nullptr); - const std::vector& GetNodes() const { + const std::vector& GetNodes() const { return graphNodes; } - std::vector& GetNodes() { + std::vector& GetNodes() { return graphNodes; } @@ -78,26 +80,26 @@ public: return _name; } - std::vector& GetEdges() { + std::vector& GetEdges() { return graphEdges; } - std::map& GetInputNodesMap() { + std::map& GetInputNodesMap() { return inputNodesMap; } - std::map& GetOutputNodesMap() { + std::map& GetOutputNodesMap() { return outputNodesMap; } - MKLDNNNodePtr getInputNodeByName(const std::string &name) { + NodePtr getInputNodeByName(const std::string &name) { auto input = inputNodesMap.find(name); if (input == inputNodesMap.end()) IE_THROW() << "CPU execution graph doesn't contain input node with name: " << name; return input->second; } - MKLDNNNodePtr getOutputNodeByName(const std::string &name) { + NodePtr getOutputNodeByName(const std::string &name) { auto output = outputNodesMap.find(name); if (output == outputNodesMap.end()) IE_THROW() << "CPU execution graph doesn't contain output node with name: " << name; @@ -116,9 +118,9 @@ public: void RemoveDroppedNodes(); void RemoveDroppedEdges(); - void RemoveEdge(MKLDNNEdgePtr& edge); - void DropNode(const MKLDNNNodePtr& node); - void DropDWConvNode(const MKLDNNNodePtr& node); + void RemoveEdge(EdgePtr& edge); + void DropNode(const NodePtr& node); + void DropDWConvNode(const NodePtr& node); /** * @brief Insert Reorder node at the edge-specified location. @@ -139,11 +141,11 @@ public: * pointer to the blob containing scales * @return pointer to the new Reorder node. */ - MKLDNNNodePtr InsertReorder(MKLDNNEdgePtr edge, std::string layerName, const MemoryDesc& inDesc, + NodePtr InsertReorder(EdgePtr edge, std::string layerName, const MemoryDesc& inDesc, const MemoryDesc& outDesc, bool isOptimized = false); /** - * @brief Insert MKLDNNNode at the edge-specified location. + * @brief Insert Node at the edge-specified location. * This method supports two regimes. First, the node is inserted without initialization (i.e. supported descriptors initialization, * supported primitive descriptors selection, etc.), which can be useful after the InitEdges() completes. The second is just inserting the * node without initialization. @@ -155,10 +157,10 @@ public: * parameter that determines whether the node needs to be initialized * @return true in case of success, false otherwise. */ - bool InsertNode(MKLDNNEdgePtr edge, MKLDNNNodePtr node, bool initNode = false); + bool InsertNode(EdgePtr edge, NodePtr node, bool initNode = false); /** - * @brief Insert MKLDNNNode between two specified nodes. + * @brief Insert Node between two specified nodes. * This procedure creates two edges that link the parent and child nodes to the inserted one and adds all created objects to the graph. * This method supports two regimes. First, the node is inserted without initialization (i.e. supported descriptors initialization, * supported primitive descriptors selection, etc.), which can be useful after the InitEdges() completes. The second is just inserting the @@ -175,7 +177,7 @@ public: * parameter that determines whether the node needs to be initialized * @return true in case of success, false otherwise. */ - bool InsertNode(MKLDNNNodePtr parent, MKLDNNNodePtr child, MKLDNNNodePtr node, int parentPort, int childPort, bool initNode = false); + bool InsertNode(NodePtr parent, NodePtr child, NodePtr node, int parentPort, int childPort, bool initNode = false); std::shared_ptr dump() const; @@ -192,7 +194,7 @@ public: } protected: - void VisitNode(MKLDNNNodePtr node, std::vector& sortedNodes); + void VisitNode(NodePtr node, std::vector& sortedNodes); void ForgetGraphData() { status = NotReady; @@ -213,10 +215,10 @@ protected: bool reuse_io_tensors = true; - MKLDNNMemoryPtr memWorkspace; + MemoryPtr memWorkspace; - std::vector graphNodes; - std::vector graphEdges; + std::vector graphNodes; + std::vector graphEdges; std::map _normalizePreprocMap; std::string _name; @@ -226,8 +228,8 @@ protected: static mkldnn::engine eng; - void Replicate(const InferenceEngine::CNNNetwork &network, const MKLDNNExtensionManager::Ptr& extMgr); - void Replicate(const std::shared_ptr &subgraph, const MKLDNNExtensionManager::Ptr& extMgr); + void Replicate(const InferenceEngine::CNNNetwork &network, const ExtensionManager::Ptr& extMgr); + void Replicate(const std::shared_ptr &subgraph, const ExtensionManager::Ptr& extMgr); void InitGraph(); void InitNodes(); void InitDescriptors(); @@ -237,24 +239,24 @@ protected: void AllocateWithReuse(); void CreatePrimitives(); void ExtractConstantAndExecutableNodes(); - void ExecuteNode(const MKLDNNNodePtr& node, const mkldnn::stream& stream) const; + void ExecuteNode(const NodePtr& node, const mkldnn::stream& stream) const; void ExecuteConstantNodesOnly() const; - friend class MKLDNNInferRequestBase; - friend class MKLDNNLegacyInferRequest; - friend class MKLDNNInferRequest; - friend std::shared_ptr dump_graph_as_ie_ngraph_net(const MKLDNNGraph &graph); + friend class LegacyInferRequest; + friend class intel_cpu::InferRequest; + friend class intel_cpu::InferRequestBase; + friend std::shared_ptr dump_graph_as_ie_ngraph_net(const Graph &graph); private: // TODO: change std::map to std::unordered_map - std::map inputNodesMap; - std::map outputNodesMap; + std::map inputNodesMap; + std::map outputNodesMap; // these node pointers (from graphNodes) are to avoid regular checking for // constantness of nodes in ExecuteConstantNodesOnly, Infer methods and calls of // non-executable (optimized out) nodes, such as Input, Reshape, etc. - std::vector constantGraphNodes; - std::vector executableGraphNodes; + std::vector constantGraphNodes; + std::vector executableGraphNodes; MultiCachePtr rtParamsCache; diff --git a/src/plugins/intel_cpu/src/graph_dumper.cpp b/src/plugins/intel_cpu/src/graph_dumper.cpp index 18fbab0d284..84de0b35a9e 100644 --- a/src/plugins/intel_cpu/src/graph_dumper.cpp +++ b/src/plugins/intel_cpu/src/graph_dumper.cpp @@ -24,18 +24,18 @@ using namespace InferenceEngine; namespace ov { namespace intel_cpu { -void serializeToCout(const MKLDNNGraph &graph); -void serializeToXML(const MKLDNNGraph &graph, const std::string& path); +void serializeToCout(const Graph &graph); +void serializeToXML(const Graph &graph, const std::string& path); namespace { -std::map extract_node_metadata(const MKLDNNNodePtr &node) { +std::map extract_node_metadata(const NodePtr &node) { std::map serialization_info; - if (node->getType() == Input && node->isConstant()) { + if (node->getType() == Type::Input && node->isConstant()) { // We need to separate Input and Const layers serialization_info[ExecGraphInfoSerialization::LAYER_TYPE] = "Const"; - } else if (node->getType() == Generic) { + } else if (node->getType() == Type::Generic) { // Path to print actual name for extension layers serialization_info[ExecGraphInfoSerialization::LAYER_TYPE] = node->getTypeStr(); } else { @@ -114,14 +114,14 @@ std::map extract_node_metadata(const MKLDNNNodePtr &no } // namespace -std::shared_ptr dump_graph_as_ie_ngraph_net(const MKLDNNGraph &graph) { - std::map > node2layer; +std::shared_ptr dump_graph_as_ie_ngraph_net(const Graph &graph) { + std::map > node2layer; ngraph::ResultVector results; ngraph::ParameterVector params; ngraph::NodeVector to_hold; - auto get_inputs = [&] (const MKLDNNNodePtr & node) { + auto get_inputs = [&] (const NodePtr & node) { auto pr_edges = node->getParentEdges(); ngraph::OutputVector inputs(pr_edges.size()); @@ -140,7 +140,7 @@ std::shared_ptr dump_graph_as_ie_ngraph_net(const MKLDNNGraph return inputs; }; - auto create_ngraph_node = [&](const MKLDNNNodePtr &node) { + auto create_ngraph_node = [&](const NodePtr &node) { bool is_input = false, is_output = false, should_be_hold = false; for (auto && kvp : graph.inputNodesMap) { if (kvp.second == node) { @@ -209,7 +209,7 @@ std::shared_ptr dump_graph_as_ie_ngraph_net(const MKLDNNGraph } #ifdef CPU_DEBUG_CAPS -void serialize(const MKLDNNGraph &graph) { +void serialize(const Graph &graph) { const std::string& path = graph.getConfig().execGraphPath; if (path.empty()) @@ -223,7 +223,7 @@ void serialize(const MKLDNNGraph &graph) { IE_THROW() << "Unknown serialize format. Should be either 'cout' or '*.xml'. Got " << path; } -void serializeToXML(const MKLDNNGraph &graph, const std::string& path) { +void serializeToXML(const Graph &graph, const std::string& path) { if (path.empty()) return; @@ -235,7 +235,7 @@ void serializeToXML(const MKLDNNGraph &graph, const std::string& path) { manager.run_passes(graph.dump()); } -void serializeToCout(const MKLDNNGraph &graph) { +void serializeToCout(const Graph &graph) { for (const auto& node : graph.GetNodes()) { std::cout << "name: " << node->getName() << " [ "; auto nodeDesc = node->getSelectedPrimitiveDescriptor(); diff --git a/src/plugins/intel_cpu/src/graph_dumper.h b/src/plugins/intel_cpu/src/graph_dumper.h index 461e88e2f16..282417cad9b 100644 --- a/src/plugins/intel_cpu/src/graph_dumper.h +++ b/src/plugins/intel_cpu/src/graph_dumper.h @@ -13,9 +13,9 @@ namespace ov { namespace intel_cpu { -std::shared_ptr dump_graph_as_ie_ngraph_net(const MKLDNNGraph &graph); +std::shared_ptr dump_graph_as_ie_ngraph_net(const Graph &graph); #ifdef CPU_DEBUG_CAPS -void serialize(const MKLDNNGraph &graph); +void serialize(const Graph &graph); #endif // CPU_DEBUG_CAPS } // namespace intel_cpu diff --git a/src/plugins/intel_cpu/src/graph_optimizer.cpp b/src/plugins/intel_cpu/src/graph_optimizer.cpp index ec870c43291..115ca79da91 100644 --- a/src/plugins/intel_cpu/src/graph_optimizer.cpp +++ b/src/plugins/intel_cpu/src/graph_optimizer.cpp @@ -4,7 +4,7 @@ #include "graph_optimizer.h" -#include "extension_utils.h" +#include "dnnl_extension_utils.h" #include "nodes/reshape.h" #include "nodes/pooling.h" #include "nodes/eltwise.h" @@ -52,12 +52,15 @@ #include "memory_desc/cpu_memory_desc_utils.h" using namespace mkldnn; -using namespace ov::intel_cpu; using namespace InferenceEngine; +using namespace ov::intel_cpu::node; -MKLDNNGraphOptimizer::MKLDNNGraphOptimizer() {} +namespace ov { +namespace intel_cpu { -void MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations(MKLDNNGraph &graph) { +GraphOptimizer::GraphOptimizer() {} + +void GraphOptimizer::ApplyCommonGraphOptimizations(Graph &graph) { OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, itt::domains::intel_cpu_LT, "ApplyCommonGraphOptimizations", "FuseConvolutionAndBias"); FuseConvolutionMatMulAndBias(graph); graph.RemoveDroppedNodes(); @@ -154,8 +157,8 @@ void MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations(MKLDNNGraph &graph) { graph.RemoveDroppedEdges(); } -void MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations(MKLDNNGraph &graph) { - OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations"); +void GraphOptimizer::ApplyImplSpecificGraphOptimizations(Graph &graph) { + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "GraphOptimizer::ApplyImplSpecificGraphOptimizations"); DropDoubleReorders(graph); graph.RemoveDroppedNodes(); @@ -166,22 +169,24 @@ void MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations(MKLDNNGraph &grap graph.RemoveDroppedEdges(); } -void MKLDNNGraphOptimizer::FuseConvolutionMatMulAndBias(MKLDNNGraph &graph) { +void GraphOptimizer::FuseConvolutionMatMulAndBias(Graph &graph) { auto& graphNodes = graph.GetNodes(); - auto isSuitableParentNode = [](const MKLDNNNodePtr& node) { - return (node->getType() == Convolution || node->getType() == MatMul) && + auto isSuitableParentNode = [](const NodePtr& node) { + return (node->getType() == Type::Convolution || node->getType() == Type::MatMul) && node->getChildEdges().size() == 1 && node->getParentEdges().size() == 2 && node->getFusedWith().empty(); }; - auto isSuitableChildNode = [&](const MKLDNNNodePtr& parentNode, const MKLDNNNodePtr& childNode) { - if (childNode->getAlgorithm() != EltwiseAdd || !childNode->getFusedWith().empty() || childNode->getParentEdges().size() != 2) + auto isSuitableChildNode = [&](const NodePtr& parentNode, const NodePtr& childNode) { + if (childNode->getAlgorithm() != Algorithm::EltwiseAdd + || !childNode->getFusedWith().empty() + || childNode->getParentEdges().size() != 2) return false; const auto biasNode = childNode->getParentEdgesAtPort(1)[0]->getParent(); - if (biasNode->getType() != Input || !biasNode->isConstant() || biasNode->getChildEdges().size() != 1) + if (biasNode->getType() != Type::Input || !biasNode->isConstant() || biasNode->getChildEdges().size() != 1) return false; const auto parentOutDims = parentNode->getOutputShapeAtPort(0).getDims(); @@ -235,7 +240,7 @@ void MKLDNNGraphOptimizer::FuseConvolutionMatMulAndBias(MKLDNNGraph &graph) { if (!child) continue; - MKLDNNEdgePtr &remEdge = p_edge; + EdgePtr &remEdge = p_edge; int inNum = 0; if (remEdge) { inNum = remEdge->getInputNum(); @@ -247,13 +252,13 @@ void MKLDNNGraphOptimizer::FuseConvolutionMatMulAndBias(MKLDNNGraph &graph) { outNum = remEdge->getOutputNum(); graph.RemoveEdge(remEdge); } - MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, child, inNum, outNum)); + EdgePtr newEdge(new Edge(parent, child, inNum, outNum)); auto &graphEdges = graph.GetEdges(); graphEdges.push_back(newEdge); parent->addEdge(newEdge); } } else { - MKLDNNEdgePtr &remEdge = p_edge; + EdgePtr &remEdge = p_edge; int inNum = 0; if (remEdge) { inNum = remEdge->getInputNum(); @@ -261,7 +266,7 @@ void MKLDNNGraphOptimizer::FuseConvolutionMatMulAndBias(MKLDNNGraph &graph) { } const auto& parentEltwise = parentNode; - MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, parentEltwise, inNum, parentEltwise->getParentEdges().size())); + EdgePtr newEdge(new Edge(parent, parentEltwise, inNum, parentEltwise->getParentEdges().size())); auto& graphEdges = graph.GetEdges(); graphEdges.push_back(newEdge); parent->addEdge(newEdge); @@ -280,17 +285,17 @@ void MKLDNNGraphOptimizer::FuseConvolutionMatMulAndBias(MKLDNNGraph &graph) { } } -void MKLDNNGraphOptimizer::FuseDeconvolutionAndSimpleOperation(MKLDNNGraph &graph) { +void GraphOptimizer::FuseDeconvolutionAndSimpleOperation(Graph &graph) { auto& graphNodes = graph.GetNodes(); - auto isSuitableParentNode = [](MKLDNNNodePtr node) { - if (node->getType() != Deconvolution || node->getChildEdges().size() != 1) + auto isSuitableParentNode = [](NodePtr node) { + if (node->getType() != Type::Deconvolution || node->getChildEdges().size() != 1) return false; - const auto deconv = std::dynamic_pointer_cast(node); + const auto deconv = std::dynamic_pointer_cast(node); if (deconv == nullptr) IE_THROW() << "Cannot cast to deconvolution node " << node->getName(); - if (deconv->getAlgorithm() != DeconvolutionCommon) { + if (deconv->getAlgorithm() != Algorithm::DeconvolutionCommon) { return true; } @@ -324,7 +329,7 @@ void MKLDNNGraphOptimizer::FuseDeconvolutionAndSimpleOperation(MKLDNNGraph &grap auto parentEdges = childNode->parentEdges; for (auto &parentEdge : parentEdges) { auto p_edge = parentEdge.lock(); - if (p_edge->getParent()->getType() == Deconvolution) + if (p_edge->getParent()->getType() == Type::Deconvolution) continue; graph.RemoveEdge(p_edge); @@ -334,11 +339,11 @@ void MKLDNNGraphOptimizer::FuseDeconvolutionAndSimpleOperation(MKLDNNGraph &grap } } -void MKLDNNGraphOptimizer::FuseMultiplyAndAdd(MKLDNNGraph &graph) { +void GraphOptimizer::FuseMultiplyAndAdd(Graph &graph) { auto& graphNodes = graph.GetNodes(); - auto isSuitableSecondInput = [](const MKLDNNNodePtr& node, VectorDims dataDims) { - if (node->getType() != Input || !node->isConstant()) + auto isSuitableSecondInput = [](const NodePtr& node, VectorDims dataDims) { + if (node->getType() != Type::Input || !node->isConstant()) return false; const auto secondInputDims = node->getOutputShapeAtPort(0).getStaticDims(); if (secondInputDims.size() != dataDims.size() || secondInputDims.size() < 2) @@ -367,16 +372,16 @@ void MKLDNNGraphOptimizer::FuseMultiplyAndAdd(MKLDNNGraph &graph) { return true; }; - auto isSuitableParentNode = [&](const MKLDNNNodePtr& node) { - if (node->getAlgorithm() != EltwiseMultiply || !node->getFusedWith().empty() || + auto isSuitableParentNode = [&](const NodePtr& node) { + if (node->getAlgorithm() != Algorithm::EltwiseMultiply || !node->getFusedWith().empty() || node->getParentEdges().size() != 2 || node->getChildEdges().size() != 1) return false; return isSuitableSecondInput(node->getParentEdgesAtPort(1)[0]->getParent(), node->getInputShapeAtPort(0).getDims()); }; - auto isSuitableChildNode = [&](const MKLDNNNodePtr& parentNode, const MKLDNNNodePtr& childNode) { - if (childNode->getAlgorithm() != EltwiseAdd || !childNode->getFusedWith().empty() || childNode->getParentEdges().size() != 2) + auto isSuitableChildNode = [&](const NodePtr& parentNode, const NodePtr& childNode) { + if (childNode->getAlgorithm() != Algorithm::EltwiseAdd || !childNode->getFusedWith().empty() || childNode->getParentEdges().size() != 2) return false; return isSuitableSecondInput(childNode->getParentEdgesAtPort(1)[0]->getParent(), childNode->getInputShapeAtPort(0).getDims()) && @@ -414,7 +419,7 @@ void MKLDNNGraphOptimizer::FuseMultiplyAndAdd(MKLDNNGraph &graph) { if (!child) continue; - MKLDNNEdgePtr &remEdge = p_edge; + EdgePtr &remEdge = p_edge; int inNum = 0; if (remEdge) { inNum = remEdge->getInputNum(); @@ -428,13 +433,13 @@ void MKLDNNGraphOptimizer::FuseMultiplyAndAdd(MKLDNNGraph &graph) { remEdge->drop(); graph.RemoveEdge(remEdge); } - MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, child, inNum, outNum)); + EdgePtr newEdge(new Edge(parent, child, inNum, outNum)); auto &graphEdges = graph.GetEdges(); graphEdges.push_back(newEdge); parent->addEdge(newEdge); } } else { - MKLDNNEdgePtr &remEdge = p_edge; + EdgePtr &remEdge = p_edge; int inNum = 0; if (remEdge) { inNum = remEdge->getInputNum(); @@ -443,7 +448,7 @@ void MKLDNNGraphOptimizer::FuseMultiplyAndAdd(MKLDNNGraph &graph) { } auto& parentEltwise = parentNode; - MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, parentEltwise, inNum, parentEltwise->getParentEdges().size())); + EdgePtr newEdge(new Edge(parent, parentEltwise, inNum, parentEltwise->getParentEdges().size())); auto &graphEdges = graph.GetEdges(); graphEdges.push_back(newEdge); parent->addEdge(newEdge); @@ -453,20 +458,20 @@ void MKLDNNGraphOptimizer::FuseMultiplyAndAdd(MKLDNNGraph &graph) { } parentNode->addOriginalInputPrecision(childNode->getOriginalInputPrecisionAtPort(1)); - parentNode->setAlgorithm(EltwiseMulAdd); + parentNode->setAlgorithm(Algorithm::EltwiseMulAdd); parentNode->setTypeStr("MulAdd"); parentNode->addOriginalLayer(childNode->getOriginalLayers()); graph.DropNode(childNode); } } -void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) { +void GraphOptimizer::FuseConvolutionAndZeroPoints(Graph &graph) { auto& graphNodes = graph.GetNodes(); - auto isSuitableConvNode = [](MKLDNNNodePtr node) { + auto isSuitableConvNode = [](NodePtr node) { bool retVal = false; - if (node->getType() == Convolution) { - if (auto convNode = std::dynamic_pointer_cast(node)) { + if (node->getType() == Type::Convolution) { + if (auto convNode = std::dynamic_pointer_cast(node)) { auto rank = convNode->getInputShapeAtPort(0).getRank(); // int8 depthwise convolution does not support fusing zero points in 3D case if (implication(convNode->isDepthWise(), rank < 5)) { @@ -477,8 +482,8 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) { return retVal; }; - auto initializeInputZeroPoints = [](MKLDNNNodePtr node, MKLDNNNodePtr parent0, MKLDNNNodePtr parent1) { - auto* convNode = dynamic_cast(node.get()); + auto initializeInputZeroPoints = [](NodePtr node, NodePtr parent0, NodePtr parent1) { + auto* convNode = dynamic_cast(node.get()); if (convNode == nullptr) IE_THROW() << "Cannot get convolution node " << node->getName(); @@ -489,14 +494,14 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) { return false; } - if (parent0->getType() == Eltwise) { + if (parent0->getType() == Type::Eltwise) { if (!parent0->getFusedWith().empty() || !parent1->getFusedWith().empty()) return false; // The plug-in doesn't support FP32 convolution with input/weights zero points. // In case weights are in FP32 (or we have zero points on weights which are not supported by INT8 convolution) we cannot use // INT8 implementation so we have to disable input zero points fusing as well. - if (parent1->getType() != Input || !parent1->isConstant() || parent1->getOriginalOutputPrecisionAtPort(0) != Precision::I8) { + if (parent1->getType() != Type::Input || !parent1->isConstant() || parent1->getOriginalOutputPrecisionAtPort(0) != Precision::I8) { return false; } @@ -507,7 +512,7 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) { return false; auto arg0 = parent0->getParentEdgesAtPort(1)[0]->getParent(); - if (arg0->getType() == Input && arg0->isConstant()) { + if (arg0->getType() == Type::Input && arg0->isConstant()) { if (arg0->getOriginalOutputPrecisionAtPort(0) != Precision::U8) return false; @@ -528,7 +533,7 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) { if (arg1->getOriginalOutputPrecisionAtPort(0) != Precision::U8) return false; - auto zeroPointsConstant = dynamic_cast(arg0.get()); + auto zeroPointsConstant = dynamic_cast(arg0.get()); if (zeroPointsConstant == nullptr) IE_THROW() << "Cannot cast to Input node"; @@ -562,15 +567,15 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) { return true; }; - auto initializeOutputCompensation = [](MKLDNNNodePtr node) { - auto* convNode = dynamic_cast(node.get()); + auto initializeOutputCompensation = [](NodePtr node) { + auto* convNode = dynamic_cast(node.get()); if (convNode == nullptr) IE_THROW() << "Cannot get convolution node " << node->getName(); if (convNode->inputZeroPoints.empty()) return; - auto weightsConstant = dynamic_cast(convNode->getParentEdgesAtPort(1)[0]->getParent().get()); + auto weightsConstant = dynamic_cast(convNode->getParentEdgesAtPort(1)[0]->getParent().get()); if (!weightsConstant || !weightsConstant->isConstant()) return; @@ -583,7 +588,7 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) { IE_THROW() << "weightsBlob has not allocated buffer"; ptrdiff_t G = convNode->getGroupNum(); - const int groupOffset = convNode->getAlgorithm() == ConvolutionGrouped ? 1 : 0; + const int groupOffset = convNode->getAlgorithm() == Algorithm::ConvolutionGrouped ? 1 : 0; auto& weightsConstantDims = weightsConstant->outputShapes[0].getStaticDims(); ptrdiff_t OC = weightsConstantDims[0 + groupOffset]; @@ -647,18 +652,18 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) { * This check can already be removed for FC fusing, but should be kept for Convolution, * which still uses legacy depthwise postops for performance reasons. */ -static bool BF16QuantizeNodeFusing(const MKLDNNNodePtr& parentNode, const MKLDNNNodePtr& childNode) { - return childNode->getType() == FakeQuantize && +static bool BF16QuantizeNodeFusing(const NodePtr& parentNode, const NodePtr& childNode) { + return childNode->getType() == Type::FakeQuantize && one_of(Precision::BF16, parentNode->getOriginalOutputPrecisionAtPort(0), childNode->getOriginalOutputPrecisionAtPort(0)); } -void MKLDNNGraphOptimizer::FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &graph) { +void GraphOptimizer::FuseFullyConnectedAndSimpleOperation(Graph &graph) { auto& graphNodes = graph.GetNodes(); - auto isSuitableParentNode = [](MKLDNNNodePtr node) { - return node->getType() == FullyConnected && node->getChildEdges().size() == 1; + auto isSuitableParentNode = [](NodePtr node) { + return node->getType() == Type::FullyConnected && node->getChildEdges().size() == 1; }; auto parent = graphNodes.begin(); @@ -683,11 +688,11 @@ void MKLDNNGraphOptimizer::FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &gra childNode->fuseInto(parentNode); - if (childNode->getType() == FakeQuantize || childNode->getType() == Eltwise) { + if (childNode->getType() == Type::FakeQuantize || childNode->getType() == Type::Eltwise) { auto parentEdges = childNode->parentEdges; for (auto &parentEdge : parentEdges) { auto p_edge = parentEdge.lock(); - if (p_edge->getParent()->getType() == FullyConnected) + if (p_edge->getParent()->getType() == Type::FullyConnected) continue; graph.RemoveEdge(p_edge); @@ -698,11 +703,11 @@ void MKLDNNGraphOptimizer::FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &gra } } -void MKLDNNGraphOptimizer::FuseMatMulAndSimpleOperation(MKLDNNGraph &graph) { +void GraphOptimizer::FuseMatMulAndSimpleOperation(Graph &graph) { auto& graphNodes = graph.GetNodes(); - auto isSutableParentNode = [](const MKLDNNNodePtr& node) { - return node->getType() == MatMul && node->getChildEdges().size() == 1; + auto isSutableParentNode = [](const NodePtr& node) { + return node->getType() == Type::MatMul && node->getChildEdges().size() == 1; }; auto parent = graphNodes.begin(); @@ -721,11 +726,11 @@ void MKLDNNGraphOptimizer::FuseMatMulAndSimpleOperation(MKLDNNGraph &graph) { childNode->fuseInto(parentNode); - if (childNode->getType() == FakeQuantize || childNode->getType() == Eltwise) { + if (childNode->getType() == Type::FakeQuantize || childNode->getType() == Type::Eltwise) { auto parentEdges = childNode->parentEdges; for (auto &parentEdge : parentEdges) { auto p_edge = parentEdge.lock(); - if (p_edge->getParent()->getType() == MatMul) + if (p_edge->getParent()->getType() == Type::MatMul) continue; graph.RemoveEdge(p_edge); @@ -736,26 +741,26 @@ void MKLDNNGraphOptimizer::FuseMatMulAndSimpleOperation(MKLDNNGraph &graph) { } } -void MKLDNNGraphOptimizer::FuseConvolutionAndDWConvolution(MKLDNNGraph &graph) { +void GraphOptimizer::FuseConvolutionAndDWConvolution(Graph &graph) { auto& graphNodes = graph.GetNodes(); - auto isConvolutionNode = [](const MKLDNNNodePtr &node) { - return node->getType() == Convolution; + auto isConvolutionNode = [](const NodePtr &node) { + return node->getType() == Type::Convolution; }; - auto is1x1Convolution = [](const std::shared_ptr &conv) { + auto is1x1Convolution = [](const std::shared_ptr &conv) { const auto weightRank = conv->getWeightDims().size(); return conv->getWeightDims()[weightRank - 1] == 1 && conv->getWeightDims()[weightRank - 2] == 1; }; - auto isSuitableParentConvolution = [&](MKLDNNNodePtr node) { + auto isSuitableParentConvolution = [&](NodePtr node) { if (node->isDropped()) return false; if (node->isDynamicNode()) return false; - const auto conv = std::dynamic_pointer_cast(node); + const auto conv = std::dynamic_pointer_cast(node); if (conv == nullptr) IE_THROW() << "Cannot cast to convolution node " << node->getName(); @@ -779,18 +784,18 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndDWConvolution(MKLDNNGraph &graph) { return node->getChildEdges().size() == 1 && isConvolutionNode(node->getChildEdgeAt(0)->getChild()); }; - auto isSuitableChildConvolution = [&](const MKLDNNNodePtr &parentNode, const MKLDNNNodePtr &childNode) { + auto isSuitableChildConvolution = [&](const NodePtr &parentNode, const NodePtr &childNode) { if (parentNode->isDropped() || childNode->isDropped()) return false; if (childNode->isDynamicNode()) return false; - const auto convChild = std::dynamic_pointer_cast(childNode); + const auto convChild = std::dynamic_pointer_cast(childNode); if (convChild == nullptr) IE_THROW() << "Cannot cast to convolution node " << childNode->getName(); - const auto convParent = std::dynamic_pointer_cast(parentNode); + const auto convParent = std::dynamic_pointer_cast(parentNode); if (convParent == nullptr) IE_THROW() << "Cannot cast to convolution node " << parentNode->getName(); @@ -830,7 +835,7 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndDWConvolution(MKLDNNGraph &graph) { return isSupportedParams; }; - auto isFusingWorthwhile = [&](const MKLDNNNodePtr &parentNode, const MKLDNNNodePtr &childNode) { + auto isFusingWorthwhile = [&](const NodePtr &parentNode, const NodePtr &childNode) { if (!childNode->inputShapes[0].isStatic() || !childNode->outputShapes[0].isStatic()) { return false; } @@ -843,7 +848,7 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndDWConvolution(MKLDNNGraph &graph) { int dw_conv_input_size = inDims[0] * inDims[1] * inDims[2] * inDims[3] * elemSize; int dw_conv_output_size = outDims[0] * outDims[1]* outDims[2] * outDims[3] * elemSize; - auto parentConvolutionNode = std::dynamic_pointer_cast(parentNode); + auto parentConvolutionNode = std::dynamic_pointer_cast(parentNode); if (parentConvolutionNode == nullptr) IE_THROW() << "Cannot get convolution node " << parentNode->getName(); @@ -876,11 +881,11 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndDWConvolution(MKLDNNGraph &graph) { } // TODO [NM]: unite with FuseConvolutionAndSimpleOperation -void MKLDNNGraphOptimizer::FuseConvolutionAndSimpleOperationThroughMaxPool(MKLDNNGraph &graph) { +void GraphOptimizer::FuseConvolutionAndSimpleOperationThroughMaxPool(Graph &graph) { auto& graphNodes = graph.GetNodes(); - auto isSuitableParentNode = [](MKLDNNNodePtr node) { - return (node->getType() == Convolution || node->getType() == BinaryConvolution) && node->getChildEdges().size() == 1 && + auto isSuitableParentNode = [](NodePtr node) { + return (node->getType() == Type::Convolution || node->getType() == Type::BinaryConvolution) && node->getChildEdges().size() == 1 && node->getOriginalOutputPrecisionAtPort(0) == Precision::FP32; }; @@ -893,20 +898,32 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndSimpleOperationThroughMaxPool(MKLDN } auto childNode = parentNode->getChildEdgeAt(0)->getChild(); - if (childNode->getAlgorithm() != PoolingMax || childNode->getChildEdges().size() != 1) { + if (childNode->getAlgorithm() != Algorithm::PoolingMax || childNode->getChildEdges().size() != 1) { parent++; continue; } auto fuseCandidate = childNode->getChildEdgeAt(0)->getChild(); - if (parentNode->getType() == BinaryConvolution && !parentNode->canFuse(fuseCandidate)) { + if (parentNode->getType() == Type::BinaryConvolution && !parentNode->canFuse(fuseCandidate)) { parent++; continue; } - if (!one_of(fuseCandidate->getAlgorithm(), EltwiseRelu, EltwiseGelu, EltwiseElu, EltwiseSigmoid, EltwiseClamp, EltwiseTanh, - EltwiseSwish, EltwiseHswish, EltwiseMish, EltwiseHsigmoid, EltwiseRoundHalfToEven, - EltwiseRoundHalfAwayFromZero, EltwiseAbs, EltwiseSqrt, EltwiseSoftRelu)) { + if (!one_of(fuseCandidate->getAlgorithm(), Algorithm::EltwiseRelu, + Algorithm::EltwiseGelu, + Algorithm::EltwiseElu, + Algorithm::EltwiseSigmoid, + Algorithm::EltwiseClamp, + Algorithm::EltwiseTanh, + Algorithm::EltwiseSwish, + Algorithm::EltwiseHswish, + Algorithm::EltwiseMish, + Algorithm::EltwiseHsigmoid, + Algorithm::EltwiseRoundHalfToEven, + Algorithm::EltwiseRoundHalfAwayFromZero, + Algorithm::EltwiseAbs, + Algorithm::EltwiseSqrt, + Algorithm::EltwiseSoftRelu)) { parent++; continue; } @@ -924,11 +941,11 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndSimpleOperationThroughMaxPool(MKLDN } } -void MKLDNNGraphOptimizer::FuseConvolutionAndSimpleOperation(MKLDNNGraph &graph) { +void GraphOptimizer::FuseConvolutionAndSimpleOperation(Graph &graph) { auto& graphNodes = graph.GetNodes(); - auto isSuitableParentNode = [](MKLDNNNodePtr node) { - return (node->getType() == Convolution || node->getType() == BinaryConvolution) && node->getChildEdges().size() == 1; + auto isSuitableParentNode = [](NodePtr node) { + return (node->getType() == Type::Convolution || node->getType() == Type::BinaryConvolution) && node->getChildEdges().size() == 1; }; auto parent = graphNodes.begin(); @@ -954,7 +971,7 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndSimpleOperation(MKLDNNGraph &graph) childNode->fuseInto(parentNode); - if (childNode->getType() == FakeQuantize || childNode->getType() == Eltwise) { + if (childNode->getType() == Type::FakeQuantize || childNode->getType() == Type::Eltwise) { auto parentEdges = childNode->parentEdges; for (auto &parentEdge : parentEdges) { auto p_edge = parentEdge.lock(); @@ -969,11 +986,11 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndSimpleOperation(MKLDNNGraph &graph) } } -void MKLDNNGraphOptimizer::FusePoolingAndFakeQuantize(MKLDNNGraph &graph) { +void GraphOptimizer::FusePoolingAndFakeQuantize(Graph &graph) { auto& graphNodes = graph.GetNodes(); - auto isSuitableParentNode = [](MKLDNNNodePtr node) { - if (node->getType() == Pooling) { + auto isSuitableParentNode = [](NodePtr node) { + if (node->getType() == Type::Pooling) { if (!one_of(node->getOriginalInputPrecisionAtPort(0), Precision::U8, Precision::I8)) return false; return node->getChildEdges().size() == 1 && node->getAlgorithm() == Algorithm::PoolingAvg; @@ -981,8 +998,8 @@ void MKLDNNGraphOptimizer::FusePoolingAndFakeQuantize(MKLDNNGraph &graph) { return false; }; - auto isSuitableChildNode = [](MKLDNNNodePtr node) { - return node->getType() == FakeQuantize && node->getAlgorithm() != Algorithm::FQBinarization; + auto isSuitableChildNode = [](NodePtr node) { + return node->getType() == Type::FakeQuantize && node->getAlgorithm() != Algorithm::FQBinarization; }; for (int i = 0; i < graphNodes.size(); i++) { @@ -997,7 +1014,7 @@ void MKLDNNGraphOptimizer::FusePoolingAndFakeQuantize(MKLDNNGraph &graph) { auto parents = child->parentEdges; for (size_t i = 0; i < parents.size(); i++) { auto p_edge = parents[i].lock(); - if (p_edge->getParent()->getType() == Pooling) + if (p_edge->getParent()->getType() == Type::Pooling) continue; graph.RemoveEdge(p_edge); @@ -1015,10 +1032,10 @@ void MKLDNNGraphOptimizer::FusePoolingAndFakeQuantize(MKLDNNGraph &graph) { * @param child node we try to find * @return True if child is one of data supplier */ -static bool is_data_dependency(const std::shared_ptr &parent, - const std::shared_ptr &child) { - std::set visited; - std::list nextLayers {parent.get()}; +static bool is_data_dependency(const std::shared_ptr &parent, + const std::shared_ptr &child) { + std::set visited; + std::list nextLayers {parent.get()}; for (; !nextLayers.empty();) { auto layer = *nextLayers.begin(); @@ -1073,18 +1090,27 @@ static bool is_data_dependency(const std::shared_ptr &parent, * *** */ -void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNGraph &graph) { +void GraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(Graph &graph) { auto &graphNodes = graph.GetNodes(); - auto isFusingSupported = [&](MKLDNNNodePtr conv, MKLDNNNodePtr child) { - return child->getType() == Eltwise && - one_of(child->getAlgorithm(), EltwiseRelu, EltwiseElu, EltwiseSigmoid, EltwiseClamp, EltwiseSwish, EltwiseHswish, - EltwiseMish, EltwiseHsigmoid, EltwiseRoundHalfToEven, EltwiseRoundHalfAwayFromZero, EltwiseSoftRelu); + auto isFusingSupported = [&](NodePtr conv, NodePtr child) { + return child->getType() == Type::Eltwise && + one_of(child->getAlgorithm(), Algorithm::EltwiseRelu, + Algorithm::EltwiseElu, + Algorithm::EltwiseSigmoid, + Algorithm::EltwiseClamp, + Algorithm::EltwiseSwish, + Algorithm::EltwiseHswish, + Algorithm::EltwiseMish, + Algorithm::EltwiseHsigmoid, + Algorithm::EltwiseRoundHalfToEven, + Algorithm::EltwiseRoundHalfAwayFromZero, + Algorithm::EltwiseSoftRelu); }; for (auto &graphNode : graphNodes) { - const auto eltwiseNode = std::dynamic_pointer_cast(graphNode); - if (graphNode->getType() != Eltwise || graphNode->getAlgorithm() != EltwiseAdd || + const auto eltwiseNode = std::dynamic_pointer_cast(graphNode); + if (graphNode->getType() != Type::Eltwise || graphNode->getAlgorithm() != Algorithm::EltwiseAdd || !eltwiseNode || eltwiseNode->isWithBroadcast()) continue; @@ -1096,19 +1122,21 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG auto parent1 = graphNode->getParentEdgesAtPort(0)[0]->getParent(); auto parent2 = graphNode->getParentEdgesAtPort(1)[0]->getParent(); - bool isSuitableParent1 = parent1->getType() == Convolution || parent1->getType() == BinaryConvolution; - bool isSuitableParent2 = parent2->getType() == Convolution || parent2->getType() == BinaryConvolution; + bool isSuitableParent1 = parent1->getType() == Type::Convolution + || parent1->getType() == Type::BinaryConvolution; + bool isSuitableParent2 = parent2->getType() == Type::Convolution + || parent2->getType() == Type::BinaryConvolution; - auto canFuseSum = [](MKLDNNBinaryConvolutionNode *binConv, MKLDNNNodePtr fuseCandidate) { + auto canFuseSum = [](node::BinaryConvolution *binConv, NodePtr fuseCandidate) { if (binConv->getImplType() == impl_desc_type::ref) return false; - if (binConv->isFusedWith(FakeQuantize)) + if (binConv->isFusedWith(Type::FakeQuantize)) return false; - if (fuseCandidate->getAlgorithm() == EltwiseAdd) { + if (fuseCandidate->getAlgorithm() == Algorithm::EltwiseAdd) { for (auto& fusedNode : binConv->fusedWith) { - const auto eltwise = std::dynamic_pointer_cast(fusedNode); + const auto eltwise = std::dynamic_pointer_cast(fusedNode); if (eltwise && eltwise->isSpecialConvolutionAddFusing()) { return false; } @@ -1118,26 +1146,26 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG return false; }; - auto* binConvNode1 = dynamic_cast(parent1.get()); + auto* binConvNode1 = dynamic_cast(parent1.get()); if (binConvNode1) { isSuitableParent1 = isSuitableParent1 && canFuseSum(binConvNode1, graphNode); } - auto* binConvNode2 = dynamic_cast(parent2.get()); + auto* binConvNode2 = dynamic_cast(parent2.get()); if (binConvNode2) { isSuitableParent2 = isSuitableParent2 && canFuseSum(binConvNode2, graphNode); } - auto checkFusedWithSum = [](MKLDNNConvolutionNode* conv) -> bool { + auto checkFusedWithSum = [](Convolution* conv) -> bool { for (const auto& node : conv->getFusedWith()) { - const auto eltwise = std::dynamic_pointer_cast(node); + const auto eltwise = std::dynamic_pointer_cast(node); if (eltwise && eltwise->isSpecialConvolutionAddFusing()) return true; } return false; }; - auto* convNode1 = dynamic_cast(parent1.get()); + auto* convNode1 = dynamic_cast(parent1.get()); if (convNode1) { if (!convNode1->canBeExecutedInInt8()) { isSuitableParent1 = isSuitableParent1 && convNode1->getFusedWith().empty(); @@ -1146,7 +1174,7 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG } } - auto* convNode2 = dynamic_cast(parent2.get()); + auto* convNode2 = dynamic_cast(parent2.get()); if (convNode2) { if (!convNode2->canBeExecutedInInt8()) { isSuitableParent2 = isSuitableParent2 && convNode2->getFusedWith().empty(); @@ -1158,12 +1186,12 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG if (!isSuitableParent1 && !isSuitableParent2) continue; - std::shared_ptr mergedConv; - std::shared_ptr peerNode; + std::shared_ptr mergedConv; + std::shared_ptr peerNode; if (isSuitableParent1 && isSuitableParent2) { // not merged operation (peerNode) has to be in low precision - const auto isBranchQuantized = [](const MKLDNNNodePtr& branchParent) { + const auto isBranchQuantized = [](const NodePtr& branchParent) { const auto& fused = branchParent->getFusedWith(); const auto branchPrecision = fused.empty() ? branchParent->getOriginalOutputPrecisionAtPort(0) : @@ -1191,7 +1219,7 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG } if (isSuitableParent1 && isSuitableParent2) { - if ((peerNode->getType() == Convolution || peerNode->getType() == BinaryConvolution) && + if ((peerNode->getType() == Type::Convolution || peerNode->getType() == Type::BinaryConvolution) && mergedConv->getChildEdges().size() != 1) { mergedConv = parent2; peerNode = parent1; @@ -1234,7 +1262,7 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG lastNode->fuseInto(mergedConv); if (mergedConv->fusedWith.size() > 0 && - (mergedConv->fusedWith[0]->getType() == Convolution || mergedConv->fusedWith[0]->getType() == BinaryConvolution)) { + (mergedConv->fusedWith[0]->getType() == Type::Convolution || mergedConv->fusedWith[0]->getType() == Type::BinaryConvolution)) { // Merged with DW_conv. Shape may change mergedConv->inputShapes.push_back(mergedConv->fusedWith[0]->getOutputShapeAtPort(0)); } else { @@ -1253,20 +1281,20 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG peerNode->getChildEdgeAt(childIdx)->drop(); int childPort = 1; - auto* mergedConvNode = dynamic_cast(mergedConv.get()); + auto* mergedConvNode = dynamic_cast(mergedConv.get()); if (mergedConvNode != nullptr) childPort = mergedConvNode->getParentEdges().size(); - auto* mergedBinConvNode = dynamic_cast(mergedConv.get()); + auto* mergedBinConvNode = dynamic_cast(mergedConv.get()); if (mergedBinConvNode != nullptr) childPort = mergedBinConvNode->getParentEdges().size(); - MKLDNNEdgePtr edgePtr(new MKLDNNEdge(peerNode, mergedConv, peer_port, childPort)); + EdgePtr edgePtr(new Edge(peerNode, mergedConv, peer_port, childPort)); graph.GetEdges().push_back(edgePtr); mergedConv->addEdge(edgePtr); - std::vector edges_to_reconnect = lastNode->getChildEdges(); + std::vector edges_to_reconnect = lastNode->getChildEdges(); for (auto &edge_w : edges_to_reconnect) { auto edge = edge_w.lock(); auto child = edge->getChild(); @@ -1278,7 +1306,7 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG edge->drop(); - MKLDNNEdgePtr newEdge(new MKLDNNEdge(mergedConv, child, idxParent, idxChild)); + EdgePtr newEdge(new Edge(mergedConv, child, idxParent, idxChild)); graph.GetEdges().push_back(newEdge); child->addEdge(newEdge); } @@ -1290,11 +1318,11 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG } } -void MKLDNNGraphOptimizer::FuseMVNAndSimpleOperation(MKLDNNGraph &graph) { +void GraphOptimizer::FuseMVNAndSimpleOperation(Graph &graph) { auto& graphNodes = graph.GetNodes(); - auto isSuitableParentNode = [](MKLDNNNodePtr node) { - return (node->getType() == MVN) && (node->getChildEdges().size() == 1); + auto isSuitableParentNode = [](NodePtr node) { + return (node->getType() == Type::MVN) && (node->getChildEdges().size() == 1); }; auto parent = graphNodes.begin(); @@ -1313,11 +1341,11 @@ void MKLDNNGraphOptimizer::FuseMVNAndSimpleOperation(MKLDNNGraph &graph) { childNode->fuseInto(parentNode); - if (childNode->getType() == FakeQuantize || childNode->getType() == Eltwise) { + if (childNode->getType() == Type::FakeQuantize || childNode->getType() == Type::Eltwise) { auto parentEdges = childNode->parentEdges; for (auto &parentEdge : parentEdges) { auto p_edge = parentEdge.lock(); - if (p_edge->getParent()->getType() == MVN) + if (p_edge->getParent()->getType() == Type::MVN) continue; graph.RemoveEdge(p_edge); @@ -1328,14 +1356,14 @@ void MKLDNNGraphOptimizer::FuseMVNAndSimpleOperation(MKLDNNGraph &graph) { } } -void MKLDNNGraphOptimizer::FuseInterpolateAndSimpleOperation(MKLDNNGraph &graph) { +void GraphOptimizer::FuseInterpolateAndSimpleOperation(Graph &graph) { auto& graphNodes = graph.GetNodes(); - auto isSuitableParentNode = [](MKLDNNNodePtr node) { - return node->getType() == Interpolate && node->getChildEdges().size() == 1; + auto isSuitableParentNode = [](NodePtr node) { + return node->getType() == Type::Interpolate && node->getChildEdges().size() == 1; }; - auto isSuitableChildNode = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) { + auto isSuitableChildNode = [&](NodePtr parentNode, NodePtr childNode) { // Avoid cycle dependencies for (auto &childParentEdge : childNode->getParentEdges()) { for (auto &parentParentEdge : parentNode->getParentEdges()) { @@ -1345,9 +1373,9 @@ void MKLDNNGraphOptimizer::FuseInterpolateAndSimpleOperation(MKLDNNGraph &graph) } if (!childNode->getFusedWith().empty()) return false; - auto interpolateNode = dynamic_cast(parentNode.get()); + auto interpolateNode = dynamic_cast(parentNode.get()); if (!interpolateNode) { - IE_THROW() << "Cannot cast " << parentNode->getName() << " to MKLDNNInterpolateNode"; + IE_THROW() << "Cannot cast " << parentNode->getName() << " to Interpolate"; } return interpolateNode->canFuse(childNode); }; @@ -1368,11 +1396,11 @@ void MKLDNNGraphOptimizer::FuseInterpolateAndSimpleOperation(MKLDNNGraph &graph) childNode->fuseInto(parentNode); - if (childNode->getType() == FakeQuantize || childNode->getType() == Eltwise) { + if (childNode->getType() == Type::FakeQuantize || childNode->getType() == Type::Eltwise) { auto parentEdges = childNode->parentEdges; for (auto &parentEdge : parentEdges) { auto p_edge = parentEdge.lock(); - if (p_edge->getParent()->getType() == Interpolate) + if (p_edge->getParent()->getType() == Type::Interpolate) continue; graph.RemoveEdge(p_edge); @@ -1383,11 +1411,11 @@ void MKLDNNGraphOptimizer::FuseInterpolateAndSimpleOperation(MKLDNNGraph &graph) } } -void MKLDNNGraphOptimizer::FuseNormalizeL2AndSimpleOperation(MKLDNNGraph &graph) { +void GraphOptimizer::FuseNormalizeL2AndSimpleOperation(Graph &graph) { auto& graphNodes = graph.GetNodes(); - auto isSuitableParentNode = [](MKLDNNNodePtr node) { - return node->getType() == NormalizeL2 && node->getChildEdges().size() == 1; + auto isSuitableParentNode = [](NodePtr node) { + return node->getType() == Type::NormalizeL2 && node->getChildEdges().size() == 1; }; auto parent = graphNodes.begin(); @@ -1406,11 +1434,11 @@ void MKLDNNGraphOptimizer::FuseNormalizeL2AndSimpleOperation(MKLDNNGraph &graph) childNode->fuseInto(parentNode); - if (childNode->getType() == FakeQuantize || childNode->getType() == Eltwise) { + if (childNode->getType() == Type::FakeQuantize || childNode->getType() == Type::Eltwise) { auto parentEdges = childNode->parentEdges; for (auto &parentEdge : parentEdges) { auto p_edge = parentEdge.lock(); - if (p_edge->getParent()->getType() == NormalizeL2) + if (p_edge->getParent()->getType() == Type::NormalizeL2) continue; graph.RemoveEdge(p_edge); @@ -1421,11 +1449,11 @@ void MKLDNNGraphOptimizer::FuseNormalizeL2AndSimpleOperation(MKLDNNGraph &graph) } } -void MKLDNNGraphOptimizer::FuseReduceAndSimpleOperation(MKLDNNGraph &graph) { +void GraphOptimizer::FuseReduceAndSimpleOperation(Graph &graph) { auto& graphNodes = graph.GetNodes(); - auto isSuitableParentNode = [](MKLDNNNodePtr node) { - return node->getType() == Reduce && node->getChildEdges().size() == 1; + auto isSuitableParentNode = [](NodePtr node) { + return node->getType() == Type::Reduce && node->getChildEdges().size() == 1; }; auto parent = graphNodes.begin(); @@ -1444,13 +1472,13 @@ void MKLDNNGraphOptimizer::FuseReduceAndSimpleOperation(MKLDNNGraph &graph) { childNode->fuseInto(parentNode); - if (childNode->getType() == FakeQuantize || childNode->getType() == Eltwise) { + if (childNode->getType() == Type::FakeQuantize || childNode->getType() == Type::Eltwise) { auto parentEdges = childNode->parentEdges; for (auto &parentEdge : parentEdges) { auto p_edge = parentEdge.lock(); if (p_edge == nullptr) IE_THROW() << "Cannot get parent edge " << childNode->getName(); - if (p_edge->getParent()->getType() == Reduce) + if (p_edge->getParent()->getType() == Type::Reduce) continue; graph.RemoveEdge(p_edge); @@ -1461,19 +1489,19 @@ void MKLDNNGraphOptimizer::FuseReduceAndSimpleOperation(MKLDNNGraph &graph) { } } -void MKLDNNGraphOptimizer::FuseEltwiseAndSimple(MKLDNNGraph &graph) { +void GraphOptimizer::FuseEltwiseAndSimple(Graph &graph) { auto& graphNodes = graph.GetNodes(); - auto isSuitableParentNode = [](MKLDNNNodePtr node) { - return node->getType() == Eltwise && node->getChildEdges().size() == 1; + auto isSuitableParentNode = [](NodePtr node) { + return node->getType() == Type::Eltwise && node->getChildEdges().size() == 1; }; - auto isSuitableChildNode = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) { + auto isSuitableChildNode = [&](NodePtr parentNode, NodePtr childNode) { if (parentNode->isConstant() && !childNode->isConstant()) return false; for (auto &childParentEdge : childNode->getParentEdges()) { // WA to prevent unsupported reorder exception issue in some cases - if (childParentEdge.lock()->getParent()->getType() == Split) { + if (childParentEdge.lock()->getParent()->getType() == Type::Split) { return false; } @@ -1512,18 +1540,18 @@ void MKLDNNGraphOptimizer::FuseEltwiseAndSimple(MKLDNNGraph &graph) { childNode->fuseInto(parentNode); - if (childNode->getType() == FakeQuantize) { + if (childNode->getType() == Type::FakeQuantize) { auto parentEdges = childNode->parentEdges; for (auto &parentEdge : parentEdges) { auto p_edge = parentEdge.lock(); - if (p_edge->getParent()->getType() == Eltwise) + if (p_edge->getParent()->getType() == Type::Eltwise) continue; graph.RemoveEdge(p_edge); } graph.DropNode(childNode); - } else if (childNode->getType() == Eltwise) { + } else if (childNode->getType() == Type::Eltwise) { auto children = childNode->childEdges; auto parents = childNode->parentEdges; auto initialParentInNum = parentNode->getParentEdges().size(); @@ -1542,7 +1570,7 @@ void MKLDNNGraphOptimizer::FuseEltwiseAndSimple(MKLDNNGraph &graph) { if (!child) continue; - MKLDNNEdgePtr &remEdge = p_edge; + EdgePtr &remEdge = p_edge; int inNum = 0; if (remEdge) { inNum = remEdge->getInputNum(); @@ -1554,7 +1582,7 @@ void MKLDNNGraphOptimizer::FuseEltwiseAndSimple(MKLDNNGraph &graph) { outNum = remEdge->getOutputNum(); graph.RemoveEdge(remEdge); } - MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, child, inNum, outNum)); + EdgePtr newEdge(new Edge(parent, child, inNum, outNum)); auto &graphEdges = graph.GetEdges(); graphEdges.push_back(newEdge); parent->addEdge(newEdge); @@ -1562,19 +1590,19 @@ void MKLDNNGraphOptimizer::FuseEltwiseAndSimple(MKLDNNGraph &graph) { parent->outputShapes[inNum] = child->inputShapes[outNum]; } } else { - MKLDNNEdgePtr &remEdge = p_edge; + EdgePtr &remEdge = p_edge; int inNum = 0; int outNum = parentNode->getParentEdges().size(); if (remEdge) { inNum = remEdge->getInputNum(); // Need to keep order for MulAdd - if (childNode->getAlgorithm() == EltwiseMulAdd) { + if (childNode->getAlgorithm() == Algorithm::EltwiseMulAdd) { outNum = initialParentInNum + remEdge->getOutputNum() - 1; } graph.RemoveEdge(remEdge); } - MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, parentNode, inNum, outNum)); + EdgePtr newEdge(new Edge(parent, parentNode, inNum, outNum)); auto &graphEdges = graph.GetEdges(); graphEdges.push_back(newEdge); parent->addEdge(newEdge); @@ -1590,24 +1618,24 @@ void MKLDNNGraphOptimizer::FuseEltwiseAndSimple(MKLDNNGraph &graph) { } } -void MKLDNNGraphOptimizer::DropDoubleReorders(MKLDNNGraph &graph) { - std::set processed; +void GraphOptimizer::DropDoubleReorders(Graph &graph) { + std::set processed; std::size_t graphNodesSize = graph.GetNodes().size(); for (std::size_t i = 0; i < graphNodesSize; i++) { - MKLDNNNodePtr& node = graph.GetNodes()[i]; - if (processed.find(node) == processed.end() && node->getType() == Reorder + NodePtr& node = graph.GetNodes()[i]; + if (processed.find(node) == processed.end() && node->getType() == Type::Reorder && node->getChildEdges().size() == 1 - && node->getChildEdgeAt(0)->getChild()->getType() == Reorder ) { + && node->getChildEdgeAt(0)->getChild()->getType() == Type::Reorder ) { auto nextNode = node->getChildEdgeAt(0)->getChild(); - MKLDNNReorderNode* n = dynamic_cast(node.get()); + Reorder* n = dynamic_cast(node.get()); if (n == nullptr) IE_THROW() << "Cannot get reorder layer " << node->getName(); - MKLDNNReorderNode* nn = dynamic_cast(nextNode.get()); + Reorder* nn = dynamic_cast(nextNode.get()); if (nn == nullptr) IE_THROW() << "Cannot get reorder layer " << nextNode->getName(); - MKLDNNNodePtr p = n->getParentEdgesAtPort(0)[0]->getParent(); - MKLDNNNodePtr c = nn->getChildEdgesAtPort(0)[0]->getChild(); + NodePtr p = n->getParentEdgesAtPort(0)[0]->getParent(); + NodePtr c = nn->getChildEdgesAtPort(0)[0]->getChild(); auto oldEdgeNum = n->getParentEdgesAtPort(0)[0]->getInputNum(); @@ -1617,7 +1645,7 @@ void MKLDNNGraphOptimizer::DropDoubleReorders(MKLDNNGraph &graph) { processed.insert(node); processed.insert(nextNode); - MKLDNNEdgePtr edge; + EdgePtr edge; for (auto cur : p->getChildEdgesAtPort(oldEdgeNum)) { if (cur->getChild() == c) edge = cur; @@ -1632,18 +1660,18 @@ void MKLDNNGraphOptimizer::DropDoubleReorders(MKLDNNGraph &graph) { } } -void MKLDNNGraphOptimizer::FuseBroadcastAndEltwise(MKLDNNGraph &graph) { +void GraphOptimizer::FuseBroadcastAndEltwise(Graph &graph) { auto& graphNodes = graph.GetNodes(); for (auto &graphNode : graphNodes) { - if (graphNode->getType() != Generic + if (graphNode->getType() != Type::Generic || graphNode->getTypeStr() != "Broadcast" || graphNode->getChildEdges().size() != 1lu - || graphNode->getChildEdgeAt(0)->getChild()->getType() != Eltwise) + || graphNode->getChildEdgeAt(0)->getChild()->getType() != Type::Eltwise) continue; - MKLDNNNodePtr& broadcastNode = graphNode; - MKLDNNNodePtr eltwiseNode = broadcastNode->getChildEdgeAt(0)->getChild(); + NodePtr& broadcastNode = graphNode; + NodePtr eltwiseNode = broadcastNode->getChildEdgeAt(0)->getChild(); eltwiseNode->inputShapes[broadcastNode->getChildEdgeAt(0)->getOutputNum()] = broadcastNode->getInputShapeAtPort(0); @@ -1662,23 +1690,23 @@ void MKLDNNGraphOptimizer::FuseBroadcastAndEltwise(MKLDNNGraph &graph) { } } -void MKLDNNGraphOptimizer::FuseClampAndFakeQuantize(MKLDNNGraph &graph) { +void GraphOptimizer::FuseClampAndFakeQuantize(Graph &graph) { auto& graphNodes = graph.GetNodes(); - auto isSuitableClampNode = [](MKLDNNNodePtr node) { - return node->getType() == Eltwise && node->getChildEdges().size() == 1 && node->getAlgorithm() == EltwiseClamp; + auto isSuitableClampNode = [](NodePtr node) { + return node->getType() == Type::Eltwise && node->getChildEdges().size() == 1 && node->getAlgorithm() == Algorithm::EltwiseClamp; }; - auto isSuitableFakeQuantizeNode = [](MKLDNNNodePtr node) { - return node->getType() == FakeQuantize && node->getAlgorithm() != FQBinarization; + auto isSuitableFakeQuantizeNode = [](NodePtr node) { + return node->getType() == Type::FakeQuantize && node->getAlgorithm() != Algorithm::FQBinarization; }; - auto fuseClampAndFakeQuantizeNodes = [](MKLDNNNodePtr parent, MKLDNNNodePtr child) { - auto* eltwiseNode = dynamic_cast(parent.get()); + auto fuseClampAndFakeQuantizeNodes = [](NodePtr parent, NodePtr child) { + auto* eltwiseNode = dynamic_cast(parent.get()); if (eltwiseNode == nullptr) IE_THROW() << "Cannot cast " << parent->getName() << " to Eltwise node"; - auto* fakeQuantizeNode = dynamic_cast(child.get()); + auto* fakeQuantizeNode = dynamic_cast(child.get()); if (fakeQuantizeNode == nullptr) IE_THROW() << "Cannot cast " << child->getName() << " to FakeQuantize node"; @@ -1711,14 +1739,14 @@ void MKLDNNGraphOptimizer::FuseClampAndFakeQuantize(MKLDNNGraph &graph) { } } -void MKLDNNGraphOptimizer::FusePerformedAsScaleShiftAndFakeQuantize(MKLDNNGraph &graph) { +void GraphOptimizer::FusePerformedAsScaleShiftAndFakeQuantize(Graph &graph) { auto& graphNodes = graph.GetNodes(); - auto getNonConstPort = [](const MKLDNNNodePtr& node) { + auto getNonConstPort = [](const NodePtr& node) { std::vector nonConstPorts; for (size_t i = 0; i < node->getParentEdges().size(); i++) { const auto& parent = node->getParentEdgeAt(i)->getParent(); - if (!(parent->getType() == Input && parent->isConstant())) + if (!(parent->getType() == Type::Input && parent->isConstant())) nonConstPorts.push_back(i); } // there are more than 1 nonconst port or missed @@ -1728,35 +1756,39 @@ void MKLDNNGraphOptimizer::FusePerformedAsScaleShiftAndFakeQuantize(MKLDNNGraph return nonConstPorts[0]; }; - auto isSuitableScaleShiftNode = [getNonConstPort](const MKLDNNNodePtr& node) { - if (!one_of(node->getAlgorithm(), EltwiseAdd, EltwiseSubtract, EltwiseMultiply, EltwiseDivide, EltwiseMulAdd)) + auto isSuitableScaleShiftNode = [getNonConstPort](const NodePtr& node) { + if (!one_of(node->getAlgorithm(), Algorithm::EltwiseAdd, + Algorithm::EltwiseSubtract, + Algorithm::EltwiseMultiply, + Algorithm::EltwiseDivide, + Algorithm::EltwiseMulAdd)) return false; const auto nonConstPort = getNonConstPort(node); if (nonConstPort == -1) return false; - const MKLDNNNodePtr eltwiseInput = node->getParentEdgeAt(nonConstPort)->getParent(); + const NodePtr eltwiseInput = node->getParentEdgeAt(nonConstPort)->getParent(); return node->getChildEdges().size() == 1 && node->canBePerformedAsScaleShift(eltwiseInput.get()); }; - auto isSuitableFakeQuantizeNode = [](const MKLDNNNodePtr& node) { - return node->getType() == FakeQuantize && node->getAlgorithm() != FQBinarization; + auto isSuitableFakeQuantizeNode = [](const NodePtr& node) { + return node->getType() == Type::FakeQuantize && node->getAlgorithm() != Algorithm::FQBinarization; }; - auto fuseScaleShiftAndFakeQuantizeNodes = [getNonConstPort](const MKLDNNNodePtr& parent, const MKLDNNNodePtr& child) { - auto fakeQuantizeNode = std::dynamic_pointer_cast(child); + auto fuseScaleShiftAndFakeQuantizeNodes = [getNonConstPort](const NodePtr& parent, const NodePtr& child) { + auto fakeQuantizeNode = std::dynamic_pointer_cast(child); if (fakeQuantizeNode == nullptr) IE_THROW() << "Cannot cast " << child->getName() << " to FakeQuantize node"; std::vector scalesBuffer; std::vector shiftsBuffer; - auto parentEltwise = std::dynamic_pointer_cast(parent); + auto parentEltwise = std::dynamic_pointer_cast(parent); if (!parentEltwise) { IE_THROW() << "Cannot cast " << parent->getName() << " to Eltwise node"; } - const MKLDNNNodePtr eltwiseInput = parentEltwise->getParentEdgeAt(getNonConstPort(parent))->getParent(); + const NodePtr eltwiseInput = parentEltwise->getParentEdgeAt(getNonConstPort(parent))->getParent(); std::tie(scalesBuffer, shiftsBuffer) = parentEltwise->getScalesAndShifts(eltwiseInput.get()); const auto &outputShape = child->getOutputShapeAtPort(0); @@ -1874,17 +1906,17 @@ void MKLDNNGraphOptimizer::FusePerformedAsScaleShiftAndFakeQuantize(MKLDNNGraph } } -void MKLDNNGraphOptimizer::MergeTransposeAndReorder(MKLDNNGraph &graph) { +void GraphOptimizer::MergeTransposeAndReorder(Graph &graph) { auto& graphNodes = graph.GetNodes(); - auto isSuitableParentNode = [](MKLDNNNodePtr node) { + auto isSuitableParentNode = [](NodePtr node) { // WA: to avoid broken memory pointer for conv + sum - auto prevNodeIsConvSum = [](MKLDNNNodePtr node) -> bool { + auto prevNodeIsConvSum = [](NodePtr node) -> bool { const auto parent = node->getParentEdgesAtPort(0)[0]->getParent(); - if (parent->getType() == Convolution) { + if (parent->getType() == Type::Convolution) { for (const auto& fusedNode : parent->getFusedWith()) { - if (fusedNode->getAlgorithm() == EltwiseAdd) { - const auto addNode = std::dynamic_pointer_cast(fusedNode); + if (fusedNode->getAlgorithm() == Algorithm::EltwiseAdd) { + const auto addNode = std::dynamic_pointer_cast(fusedNode); if (addNode && addNode->isSpecialConvolutionAddFusing()) { return true; } @@ -1894,23 +1926,23 @@ void MKLDNNGraphOptimizer::MergeTransposeAndReorder(MKLDNNGraph &graph) { return false; }; - return node->getType() == Transpose + return node->getType() == Type::Transpose && node->getChildEdges().size() == 1 && !node->isDynamicNode() // TODO [DS]: enable for dynamic shapes when inPlace in the dynamic case is available (CVS-74863) && !prevNodeIsConvSum(node); }; - auto isSuitableChildNode = [](MKLDNNNodePtr node) { - return node->getType() == Reorder + auto isSuitableChildNode = [](NodePtr node) { + return node->getType() == Type::Reorder && node->getChildEdges().size() == 1 && !node->isDynamicNode(); // TODO [DS]: enable for dynamic shapes when inPlace in the dynamic case is available (CVS-74863) }; // Method checkAscendingSummaryOrder() checks that after the sequential execution of Transpose and Reorder nodes, // the order of the elements in the memory will not change. In other words, that Transpose+Reorder is identical permutation. - auto checkAscendingSummaryOrder = [](std::shared_ptr &parentNode, std::shared_ptr &childNode) -> bool { - auto* transposeNode = dynamic_cast(parentNode.get()); - auto* reorderNode = dynamic_cast(childNode.get()); + auto checkAscendingSummaryOrder = [](std::shared_ptr &parentNode, std::shared_ptr &childNode) -> bool { + auto* transposeNode = dynamic_cast(parentNode.get()); + auto* reorderNode = dynamic_cast(childNode.get()); if (!transposeNode || !reorderNode) { return false; } @@ -1979,7 +2011,7 @@ void MKLDNNGraphOptimizer::MergeTransposeAndReorder(MKLDNNGraph &graph) { // As in the first case, we also replace Transpose+Reorder pattern with a new Reorder. // Additionally, we insert another Reorder that performs the conversion from the input precision (inPrec) // to the output precision (outPrec) - auto mergeTransposeAndReorder = [&](std::shared_ptr& parentNode, std::shared_ptr& childNode) { + auto mergeTransposeAndReorder = [&](std::shared_ptr& parentNode, std::shared_ptr& childNode) { auto parentParentNode = parentNode->getParentEdgesAtPort(0)[0]->getParent(); auto parentParentConstNode = parentNode->getParentEdgesAtPort(1)[0]->getParent(); auto childChildNode = childNode->getChildEdgeAt(0)->getChild(); @@ -2008,9 +2040,9 @@ void MKLDNNGraphOptimizer::MergeTransposeAndReorder(MKLDNNGraph &graph) { auto reorderOutDesc = outDesc->cloneWithNewPrecision(inPrec); std::string reorderlayerName = parentParentNode->getName() + "_" + - MKLDNNReorderNode::getReorderArgs(*reorderInDesc, *reorderOutDesc) + "_" + "fake"; + Reorder::getReorderArgs(*reorderInDesc, *reorderOutDesc) + "_" + "fake"; - MKLDNNEdgePtr edge; + EdgePtr edge; for (auto &childEdge : parentParentNode->getChildEdges()) { if (childEdge.lock()->getChild() == childChildNode) { edge = childEdge.lock(); @@ -2029,7 +2061,7 @@ void MKLDNNGraphOptimizer::MergeTransposeAndReorder(MKLDNNGraph &graph) { auto reorderOutDesc2 = outDesc; std::string reorderLayerName2 = reorderNode->getName() + "_" + - MKLDNNReorderNode::getReorderArgs(*reorderInDesc2, *reorderOutDesc2) + "_" + childChildNode->getName(); + Reorder::getReorderArgs(*reorderInDesc2, *reorderOutDesc2) + "_" + childChildNode->getName(); graph.InsertReorder(reorderNode->getChildEdgeAt(0), reorderLayerName2, *reorderInDesc2, *reorderOutDesc2, false); } @@ -2051,13 +2083,13 @@ void MKLDNNGraphOptimizer::MergeTransposeAndReorder(MKLDNNGraph &graph) { } } -void MKLDNNGraphOptimizer::reshapeRnnSeq(MKLDNNGraph &graph) { +void GraphOptimizer::reshapeRnnSeq(Graph &graph) { auto& graphNodes = graph.GetNodes(); - auto isSuitableParentNode = [](MKLDNNNodePtr node) { - if (node->type != RNNSeq) + auto isSuitableParentNode = [](NodePtr node) { + if (node->type != Type::RNNSeq) return false; - auto rnnNode = std::dynamic_pointer_cast(node); + auto rnnNode = std::dynamic_pointer_cast(node); return rnnNode && (!rnnNode->hasNativeOrder() || node->isDynamicNode()) && node->outputShapes[0].getRank() == 4 && node->outputShapes[0].getDims()[1] == 1; }; @@ -2085,11 +2117,11 @@ void MKLDNNGraphOptimizer::reshapeRnnSeq(MKLDNNGraph &graph) { parentNode->getOutputShapeAtPort(0).toPartialShape()), secondInput); unsqueeze->set_friendly_name(parentNode->getName() + "_abc_a1bc_" + std::to_string(j)); - const auto cpuUnsqueeze = std::make_shared(unsqueeze, graph.getEngine(), graph.weightsCache); + const auto cpuUnsqueeze = std::make_shared(unsqueeze, graph.getEngine(), graph.weightsCache); graph.InsertNode(parentNode, childNode, cpuUnsqueeze, edge->getInputNum(), edge->getOutputNum(), false); - const auto cpuConstant = std::make_shared(secondInput, graph.getEngine(), graph.weightsCache); - MKLDNNEdgePtr newEdge(new MKLDNNEdge(cpuConstant, cpuUnsqueeze, 0, 1)); + const auto cpuConstant = std::make_shared(secondInput, graph.getEngine(), graph.weightsCache); + EdgePtr newEdge(new Edge(cpuConstant, cpuUnsqueeze, 0, 1)); cpuUnsqueeze->addEdge(newEdge); auto &graphEdges = graph.GetEdges(); graphEdges.push_back(newEdge); @@ -2099,3 +2131,6 @@ void MKLDNNGraphOptimizer::reshapeRnnSeq(MKLDNNGraph &graph) { } } } + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/graph_optimizer.h b/src/plugins/intel_cpu/src/graph_optimizer.h index 5b5e5e0df42..f369dba4ff3 100644 --- a/src/plugins/intel_cpu/src/graph_optimizer.h +++ b/src/plugins/intel_cpu/src/graph_optimizer.h @@ -11,38 +11,38 @@ namespace ov { namespace intel_cpu { -class MKLDNNGraphOptimizer { +class GraphOptimizer { public: - MKLDNNGraphOptimizer(); + GraphOptimizer(); public: - void ApplyCommonGraphOptimizations(MKLDNNGraph& graph); - void ApplyImplSpecificGraphOptimizations(MKLDNNGraph& graph); + void ApplyCommonGraphOptimizations(Graph& graph); + void ApplyImplSpecificGraphOptimizations(Graph& graph); private: - void FuseConvolutionMatMulAndBias(MKLDNNGraph &graph); - void FuseDeconvolutionAndSimpleOperation(MKLDNNGraph &graph); - void FuseMultiplyAndAdd(MKLDNNGraph &graph); - void FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &graph); - void FuseMatMulAndSimpleOperation(MKLDNNGraph &graph); - void FuseConvolutionAndSimpleOperationThroughMaxPool(MKLDNNGraph &graph); - void FuseConvolutionAndSimpleOperation(MKLDNNGraph &graph); - void FuseConvolutionAndDWConvolution(MKLDNNGraph &graph); - void FusePoolingAndFakeQuantize(MKLDNNGraph &graph); - void FuseConvolutionSumAndConvolutionSumActivation(MKLDNNGraph &graph); - void FuseMVNAndSimpleOperation(MKLDNNGraph &graph); - void FuseInterpolateAndSimpleOperation(MKLDNNGraph &graph); - void FuseNormalizeL2AndSimpleOperation(MKLDNNGraph &graph); - void FuseReduceAndSimpleOperation(MKLDNNGraph &graph); + void FuseConvolutionMatMulAndBias(Graph &graph); + void FuseDeconvolutionAndSimpleOperation(Graph &graph); + void FuseMultiplyAndAdd(Graph &graph); + void FuseFullyConnectedAndSimpleOperation(Graph &graph); + void FuseMatMulAndSimpleOperation(Graph &graph); + void FuseConvolutionAndSimpleOperationThroughMaxPool(Graph &graph); + void FuseConvolutionAndSimpleOperation(Graph &graph); + void FuseConvolutionAndDWConvolution(Graph &graph); + void FusePoolingAndFakeQuantize(Graph &graph); + void FuseConvolutionSumAndConvolutionSumActivation(Graph &graph); + void FuseMVNAndSimpleOperation(Graph &graph); + void FuseInterpolateAndSimpleOperation(Graph &graph); + void FuseNormalizeL2AndSimpleOperation(Graph &graph); + void FuseReduceAndSimpleOperation(Graph &graph); - void DropDoubleReorders(MKLDNNGraph& graph); - void FuseConvolutionAndZeroPoints(MKLDNNGraph &graph); - void FuseBroadcastAndEltwise(MKLDNNGraph &graph); - void FuseEltwiseAndSimple(MKLDNNGraph &graph); - void FusePerformedAsScaleShiftAndFakeQuantize(MKLDNNGraph &graph); - void FuseClampAndFakeQuantize(MKLDNNGraph &graph); - void MergeTransposeAndReorder(MKLDNNGraph &graph); - void reshapeRnnSeq(MKLDNNGraph &graph); + void DropDoubleReorders(Graph& graph); + void FuseConvolutionAndZeroPoints(Graph &graph); + void FuseBroadcastAndEltwise(Graph &graph); + void FuseEltwiseAndSimple(Graph &graph); + void FusePerformedAsScaleShiftAndFakeQuantize(Graph &graph); + void FuseClampAndFakeQuantize(Graph &graph); + void MergeTransposeAndReorder(Graph &graph); + void reshapeRnnSeq(Graph &graph); }; } // namespace intel_cpu diff --git a/src/plugins/intel_cpu/src/infer_request.cpp b/src/plugins/intel_cpu/src/infer_request.cpp index 5b7e2103ae8..fef83221c9c 100644 --- a/src/plugins/intel_cpu/src/infer_request.cpp +++ b/src/plugins/intel_cpu/src/infer_request.cpp @@ -3,7 +3,7 @@ // #include "infer_request.h" -#include "extension_utils.h" +#include "dnnl_extension_utils.h" #include #include #include @@ -26,9 +26,12 @@ #include #include -void ov::intel_cpu::MKLDNNInferRequestBase::CreateInferRequest() { +namespace ov { +namespace intel_cpu { + +void InferRequestBase::CreateInferRequest() { auto id = (execNetwork->_numRequests)++; - profilingTask = openvino::itt::handle("MKLDNN_INFER_" + execNetwork->_name + "_" + std::to_string(id)); + profilingTask = openvino::itt::handle("INTEL_CPU_INFER_" + execNetwork->_name + "_" + std::to_string(id)); if (execNetwork->_graphs.size() == 0) IE_THROW() << "No graph was found"; @@ -40,10 +43,10 @@ void ov::intel_cpu::MKLDNNInferRequestBase::CreateInferRequest() { // of MemoryLayer implementation. It uses output edge of MemoryLayer // producer as storage for tensor to keep it between infer calls. for (auto& node : graph->GetNodes()) { - if (node->getType() == MemoryInput) { - auto memoryNode = dynamic_cast(node.get()); + if (node->getType() == Type::MemoryInput) { + auto memoryNode = dynamic_cast(node.get()); if (!memoryNode) { - IE_THROW() << "Cannot cast " << node->getName() << " to MKLDNNMemoryInputNode"; + IE_THROW() << "Cannot cast " << node->getName() << " to MemoryInput"; } auto state_store = memoryNode->getStore(); auto state_name = memoryNode->getId(); @@ -53,16 +56,16 @@ void ov::intel_cpu::MKLDNNInferRequestBase::CreateInferRequest() { if (suffix_idx != std::string::npos) state_name = state_name.substr(0, suffix_idx); - memoryStates.emplace_back(new MKLDNNVariableState(state_name, state_store)); + memoryStates.emplace_back(new VariableState(state_name, state_store)); } } } -ov::intel_cpu::MKLDNNInferRequestBase::~MKLDNNInferRequestBase() { +InferRequestBase::~InferRequestBase() { --(execNetwork->_numRequests); } -void ov::intel_cpu::MKLDNNInferRequestBase::pushInput(const std::string& inputName, InferenceEngine::Blob::Ptr& inputBlob, InferenceEngine::Precision inPrec) { +void InferRequestBase::pushInput(const std::string& inputName, InferenceEngine::Blob::Ptr& inputBlob, InferenceEngine::Precision inPrec) { auto& tensorDesc = inputBlob->getTensorDesc(); bool needConvert = inPrec != tensorDesc.getPrecision(); @@ -89,12 +92,12 @@ void ov::intel_cpu::MKLDNNInferRequestBase::pushInput(const std::string& inputNa graph->PushInputData(inputName, needConvert ? iconv : inputBlob); } -void ov::intel_cpu::MKLDNNInferRequestBase::PushStates() { +void InferRequestBase::PushStates() { for (auto &node : graph->GetNodes()) { - if (node->getType() == MemoryInput) { - auto cur_node = dynamic_cast(node.get()); + if (node->getType() == Type::MemoryInput) { + auto cur_node = dynamic_cast(node.get()); if (!cur_node) { - IE_THROW() << "Cannot cast " << node->getName() << " to MKLDNNMemoryInputNode"; + IE_THROW() << "Cannot cast " << node->getName() << " to MemoryInput"; } auto cur_id = cur_node->getId(); for (const auto& state : memoryStates) { @@ -111,12 +114,12 @@ void ov::intel_cpu::MKLDNNInferRequestBase::PushStates() { } } -void ov::intel_cpu::MKLDNNInferRequestBase::PullStates() { +void InferRequestBase::PullStates() { for (auto &node : graph->GetNodes()) { - if (node->getType() == MemoryInput) { - auto cur_node = dynamic_cast(node.get()); + if (node->getType() == Type::MemoryInput) { + auto cur_node = dynamic_cast(node.get()); if (!cur_node) { - IE_THROW() << "Cannot cast " << node->getName() << " to MKLDNNMemoryInputNode"; + IE_THROW() << "Cannot cast " << node->getName() << " to MemoryInput"; } auto cur_id = cur_node->getId(); for (const auto& state : memoryStates) { @@ -133,7 +136,7 @@ void ov::intel_cpu::MKLDNNInferRequestBase::PullStates() { } } -void ov::intel_cpu::MKLDNNInferRequestBase::redefineMemoryForInputNodes() { +void InferRequestBase::redefineMemoryForInputNodes() { const auto cpuInputNodes = graph->GetInputNodesMap(); for (const auto &blob : _inputs) { @@ -146,7 +149,7 @@ void ov::intel_cpu::MKLDNNInferRequestBase::redefineMemoryForInputNodes() { } } -void ov::intel_cpu::MKLDNNInferRequestBase::InferImpl() { +void InferRequestBase::InferImpl() { using namespace openvino::itt; OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, profilingTask); auto graphLock = execNetwork->GetGraph(); @@ -184,7 +187,7 @@ void ov::intel_cpu::MKLDNNInferRequestBase::InferImpl() { graph->PullOutputData(_outputs); } -std::map ov::intel_cpu::MKLDNNInferRequestBase::GetPerformanceCounts() const { +std::map InferRequestBase::GetPerformanceCounts() const { if (!graph || !graph->IsReady()) IE_THROW() << "Graph is not ready!"; std::map perfMap; @@ -192,16 +195,16 @@ std::map ov::intel_cpu return perfMap; } -static inline void changeEdgePtr(const ov::intel_cpu::MKLDNNEdgePtr &edge, void *newPtr) { +static inline void changeEdgePtr(const EdgePtr &edge, void *newPtr) { edge->getMemoryPtr()->setDataHandle(newPtr); } -void ov::intel_cpu::MKLDNNInferRequestBase::changeDefaultPtr() { +void InferRequestBase::changeDefaultPtr() { for (auto& it : externalPtr) { const auto& inputNodesMap = graph->GetInputNodesMap(); auto input = inputNodesMap.find(it.first); if (input != inputNodesMap.end()) { - MKLDNNNodePtr inputNodePtr = input->second; + NodePtr inputNodePtr = input->second; if (inputNodePtr->getChildEdgeAt(0)->getMemory().GetData() == it.second) continue; auto& childEdges = inputNodePtr->getChildEdges(); @@ -219,8 +222,8 @@ void ov::intel_cpu::MKLDNNInferRequestBase::changeDefaultPtr() { break; } - if (child->getType() == Concatenation) { - auto concat = dynamic_cast(child.get()); + if (child->getType() == Type::Concatenation) { + auto concat = dynamic_cast(child.get()); if (concat && concat->isOptimized()) { canBeInPlace = false; break; @@ -228,7 +231,7 @@ void ov::intel_cpu::MKLDNNInferRequestBase::changeDefaultPtr() { } // Cannot be in-place before split because split is using different ptrs without offsets - if (child->getType() == Split) { + if (child->getType() == Type::Split) { canBeInPlace = false; break; } @@ -277,7 +280,7 @@ void ov::intel_cpu::MKLDNNInferRequestBase::changeDefaultPtr() { void* defaultPtr = parentEdge->getMemory().GetData(); // Cannot be in-place after concat because concat is using different ptrs without offsets auto parent = parentEdge->getParent(); - MKLDNNNodePtr previousParent; + NodePtr previousParent; do { previousParent = parent; if (parent->getChildEdges().size() != 1 || parent->isConstant() || parent->isInPlace()) { @@ -305,22 +308,22 @@ void ov::intel_cpu::MKLDNNInferRequestBase::changeDefaultPtr() { } } -std::vector ov::intel_cpu::MKLDNNInferRequestBase::QueryState() { +std::vector InferRequestBase::QueryState() { return memoryStates; } -void ov::intel_cpu::MKLDNNInferRequestBase::SetAsyncRequest(MKLDNNAsyncInferRequest* asyncRequest) { +void InferRequestBase::SetAsyncRequest(AsyncInferRequest* asyncRequest) { _asyncRequest = asyncRequest; } -void ov::intel_cpu::MKLDNNInferRequestBase::ThrowIfCanceled() const { +void InferRequestBase::ThrowIfCanceled() const { if (_asyncRequest != nullptr) { _asyncRequest->ThrowIfCanceled(); } } InferenceEngine::Precision -ov::intel_cpu::MKLDNNInferRequestBase::normToInputSupportedPrec(const std::pair& input) const { +InferRequestBase::normToInputSupportedPrec(const std::pair& input) const { const auto& inputTensorDesc = input.second->getTensorDesc(); auto inPrec = inputTensorDesc.getPrecision(); if (graph->hasMeanImageFor(input.first) && one_of(inPrec, InferenceEngine::Precision::U8, InferenceEngine::Precision::BOOL)) { @@ -336,24 +339,24 @@ ov::intel_cpu::MKLDNNInferRequestBase::normToInputSupportedPrec(const std::pair< return inPrec; } -/* ========================================== MKLDNNLegacyInferRequest ========================================== */ -ov::intel_cpu::MKLDNNLegacyInferRequest::MKLDNNLegacyInferRequest(InferenceEngine::InputsDataMap networkInputs, - InferenceEngine::OutputsDataMap networkOutputs, - std::shared_ptr execNetwork) -: MKLDNNInferRequestBase(networkInputs, networkOutputs, execNetwork) { +/* ========================================== LegacyInferRequest ========================================== */ +LegacyInferRequest::LegacyInferRequest(InferenceEngine::InputsDataMap networkInputs, + InferenceEngine::OutputsDataMap networkOutputs, + std::shared_ptr execNetwork) + : InferRequestBase(networkInputs, networkOutputs, execNetwork) { CreateInferRequest(); } -void ov::intel_cpu::MKLDNNLegacyInferRequest::initBlobs() { +void LegacyInferRequest::initBlobs() { for (const auto& it : _networkInputs) { - MKLDNNLegacyInferRequest::GetBlob(it.first); + LegacyInferRequest::GetBlob(it.first); } for (const auto& it : _networkOutputs) { - MKLDNNLegacyInferRequest::GetBlob(it.first); + LegacyInferRequest::GetBlob(it.first); } } -void ov::intel_cpu::MKLDNNLegacyInferRequest::SetBatch(int new_batch) { +void LegacyInferRequest::SetBatch(int new_batch) { if (!graph->getProperty().enableDynamicBatch) IE_THROW() << "Dynamic batch is not enabled."; @@ -369,7 +372,7 @@ void ov::intel_cpu::MKLDNNLegacyInferRequest::SetBatch(int new_batch) { } } -void ov::intel_cpu::MKLDNNLegacyInferRequest::SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr &data) { +void LegacyInferRequest::SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr &data) { OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, "SetBlobLegacy"); if (name.empty()) { IE_THROW(NotFound) << "Failed to set blob with empty name"; @@ -479,7 +482,7 @@ void ov::intel_cpu::MKLDNNLegacyInferRequest::SetBlob(const std::string& name, c } } -InferenceEngine::Blob::Ptr ov::intel_cpu::MKLDNNLegacyInferRequest::GetBlob(const std::string& name) { +InferenceEngine::Blob::Ptr LegacyInferRequest::GetBlob(const std::string& name) { OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, "GetBlobLegacy"); if (!graph || !graph->IsReady()) @@ -595,7 +598,7 @@ InferenceEngine::Blob::Ptr ov::intel_cpu::MKLDNNLegacyInferRequest::GetBlob(cons return data; } -void ov::intel_cpu::MKLDNNLegacyInferRequest::PushInputData() { +void LegacyInferRequest::PushInputData() { for (auto input : _inputs) { auto inputName = input.first; if (!_networkInputs[inputName]) { @@ -613,11 +616,11 @@ void ov::intel_cpu::MKLDNNLegacyInferRequest::PushInputData() { } } -/* ========================================== MKLDNNInferRequest ========================================== */ -ov::intel_cpu::MKLDNNInferRequest::MKLDNNInferRequest(const std::vector>& inputs, - const std::vector>& outputs, - MKLDNNExecNetwork::Ptr execNetwork) -: MKLDNNInferRequestBase(inputs, outputs, execNetwork) { +/* ========================================== InferRequest ========================================== */ +InferRequest::InferRequest(const std::vector>& inputs, + const std::vector>& outputs, + ExecNetwork::Ptr execNetwork) +: InferRequestBase(inputs, outputs, execNetwork) { for (const std::shared_ptr& in : inputs) { modelInputsMap[ngraph::op::util::get_ie_output_name(ngraph::Output(in))] = in; } @@ -628,16 +631,16 @@ ov::intel_cpu::MKLDNNInferRequest::MKLDNNInferRequest(const std::vectorgetProperty().batchLimit || modelInputsMap.begin()->second->get_output_partial_shape(0).is_static()) { IE_THROW() << "Can't SetBatch for model that can't be executed via legacy dynamic batch or for static model"; } @@ -653,7 +656,7 @@ void ov::intel_cpu::MKLDNNInferRequest::SetBatch(int new_batch) { } } -void ov::intel_cpu::MKLDNNInferRequest::SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr &data) { +void InferRequest::SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr &data) { OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, "SetBlob"); if (name.empty()) { IE_THROW(NotFound) << "Failed to set blob with empty name"; @@ -751,7 +754,7 @@ void ov::intel_cpu::MKLDNNInferRequest::SetBlob(const std::string& name, const I } } -InferenceEngine::Blob::Ptr ov::intel_cpu::MKLDNNInferRequest::GetBlob(const std::string& name) { +InferenceEngine::Blob::Ptr InferRequest::GetBlob(const std::string& name) { OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, "GetBlob"); if (!graph || !graph->IsReady()) @@ -790,7 +793,7 @@ InferenceEngine::Blob::Ptr ov::intel_cpu::MKLDNNInferRequest::GetBlob(const std: externalPtr[name] = _inputs[name]->buffer(); } } else { - IE_THROW() << "Blob with name: " << name << " exists in MKLDNN graph, but absents in network inputs"; + IE_THROW() << "Blob with name: " << name << " exists in CPU plugin graph, but absents in network inputs"; } } data = _inputs[name]; @@ -839,7 +842,7 @@ InferenceEngine::Blob::Ptr ov::intel_cpu::MKLDNNInferRequest::GetBlob(const std: externalPtr[name] = data->buffer(); } } else { - IE_THROW() << "Blob with name: " << name << " exists in MKLDNN graph, but absents in network outputs"; + IE_THROW() << "Blob with name: " << name << " exists in CPU plugin graph, but absents in network outputs"; } } data = _outputs[name]; @@ -852,7 +855,7 @@ InferenceEngine::Blob::Ptr ov::intel_cpu::MKLDNNInferRequest::GetBlob(const std: return data; } -void ov::intel_cpu::MKLDNNInferRequest::PushInputData() { +void InferRequest::PushInputData() { for (auto input : _inputs) { auto inputName = input.first; if (!modelInputsMap[inputName]) { @@ -862,3 +865,6 @@ void ov::intel_cpu::MKLDNNInferRequest::PushInputData() { pushInput(inputName, input.second, normToInputSupportedPrec(input)); } } + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/infer_request.h b/src/plugins/intel_cpu/src/infer_request.h index 17f7c000339..5f64f2e5460 100644 --- a/src/plugins/intel_cpu/src/infer_request.h +++ b/src/plugins/intel_cpu/src/infer_request.h @@ -13,12 +13,12 @@ namespace ov { namespace intel_cpu { -class MKLDNNExecNetwork; -class MKLDNNAsyncInferRequest; +class ExecNetwork; +class AsyncInferRequest; -class MKLDNNInferRequestBase : public InferenceEngine::IInferRequestInternal { +class InferRequestBase : public InferenceEngine::IInferRequestInternal { public: - virtual ~MKLDNNInferRequestBase(); + virtual ~InferRequestBase(); void InferImpl() override; @@ -30,7 +30,7 @@ public: * @brief Sets the pointer to asynchronous inference request that holds this request * @param[in] asyncRequest Pointer to asynchronous inference request */ - void SetAsyncRequest(MKLDNNAsyncInferRequest* asyncRequest); + void SetAsyncRequest(AsyncInferRequest* asyncRequest); /** * @brief If `_asyncRequest` is initialized throw exception with `InferenceEngine::INFER_CANCELLED` status if inference request is canceled @@ -38,14 +38,14 @@ public: void ThrowIfCanceled() const; protected: - MKLDNNInferRequestBase(InferenceEngine::InputsDataMap networkInputs, - InferenceEngine::OutputsDataMap networkOutputs, - std::shared_ptr execNetwork_) + InferRequestBase(InferenceEngine::InputsDataMap networkInputs, + InferenceEngine::OutputsDataMap networkOutputs, + std::shared_ptr execNetwork_) : IInferRequestInternal(networkInputs, networkOutputs), execNetwork(execNetwork_) {} - MKLDNNInferRequestBase(const std::vector>& inputs, - const std::vector>& outputs, - std::shared_ptr execNetwork_) + InferRequestBase(const std::vector>& inputs, + const std::vector>& outputs, + std::shared_ptr execNetwork_) : IInferRequestInternal(inputs, outputs), execNetwork(execNetwork_) {} void CreateInferRequest(); @@ -55,7 +55,7 @@ protected: virtual void initBlobs() = 0; virtual void PushInputData() = 0; - MKLDNNGraph* graph = nullptr; + Graph* graph = nullptr; std::unordered_map externalPtr; private: @@ -64,34 +64,36 @@ private: void redefineMemoryForInputNodes(); void changeDefaultPtr(); - std::shared_ptr execNetwork; + std::shared_ptr execNetwork; openvino::itt::handle_t profilingTask; std::vector> memoryStates; - MKLDNNAsyncInferRequest* _asyncRequest = nullptr; + AsyncInferRequest* _asyncRequest = nullptr; }; -class MKLDNNLegacyInferRequest : public MKLDNNInferRequestBase { +class LegacyInferRequest : public InferRequestBase { public: - MKLDNNLegacyInferRequest(InferenceEngine::InputsDataMap networkInputs, - InferenceEngine::OutputsDataMap networkOutputs, - std::shared_ptr execNetwork); + LegacyInferRequest(InferenceEngine::InputsDataMap networkInputs, + InferenceEngine::OutputsDataMap networkOutputs, + std::shared_ptr execNetwork); void SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr &data) override; InferenceEngine::Blob::Ptr GetBlob(const std::string& name) override; + private: void PushInputData() override; void initBlobs() override; void SetBatch(int batch = -1) override; }; -class MKLDNNInferRequest : public MKLDNNInferRequestBase { +class InferRequest : public InferRequestBase { public: - MKLDNNInferRequest(const std::vector>& inputs, - const std::vector>& outputs, - std::shared_ptr execNetwork); + InferRequest(const std::vector>& inputs, + const std::vector>& outputs, + std::shared_ptr execNetwork); void SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr &data) override; InferenceEngine::Blob::Ptr GetBlob(const std::string& name) override; + private: void PushInputData() override; void initBlobs() override; diff --git a/src/plugins/intel_cpu/src/memory_desc/blocked_memory_desc.cpp b/src/plugins/intel_cpu/src/memory_desc/blocked_memory_desc.cpp index 5cc5221217a..43e1a5e1f63 100644 --- a/src/plugins/intel_cpu/src/memory_desc/blocked_memory_desc.cpp +++ b/src/plugins/intel_cpu/src/memory_desc/blocked_memory_desc.cpp @@ -5,7 +5,8 @@ #include "blocked_memory_desc.h" #include "utils/general_utils.h" -using namespace ov::intel_cpu; +namespace ov { +namespace intel_cpu { bool BlockedMemoryDesc::isCompatibleInternal(const BlockedMemoryDesc &rhs, CmpMask cmpMask) const { if (this->getShape() != rhs.getShape() || this->getPrecision() != rhs.getPrecision()) @@ -66,3 +67,6 @@ std::string BlockedMemoryDesc::serializeFormat() const { return result.str(); } + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/memory_desc/cpu_blocked_memory_desc.cpp b/src/plugins/intel_cpu/src/memory_desc/cpu_blocked_memory_desc.cpp index 02a1f7a82d6..20608d8cac6 100644 --- a/src/plugins/intel_cpu/src/memory_desc/cpu_blocked_memory_desc.cpp +++ b/src/plugins/intel_cpu/src/memory_desc/cpu_blocked_memory_desc.cpp @@ -6,7 +6,8 @@ #include #include "dnnl_blocked_memory_desc.h" -using namespace ov::intel_cpu; +namespace ov { +namespace intel_cpu { static VectorDims makeRange(size_t size) { VectorDims retVec(size, 0); @@ -308,3 +309,6 @@ MemoryDescPtr CpuBlockedMemoryDesc::cloneWithNewPrecision(const InferenceEngine: newDesc->setPrecision(prec); return newDesc; } + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/memory_desc/cpu_memory_desc.h b/src/plugins/intel_cpu/src/memory_desc/cpu_memory_desc.h index 7eb2a95998c..b7925688f56 100644 --- a/src/plugins/intel_cpu/src/memory_desc/cpu_memory_desc.h +++ b/src/plugins/intel_cpu/src/memory_desc/cpu_memory_desc.h @@ -22,6 +22,9 @@ namespace ov { namespace intel_cpu { +namespace node { +class Split; +} // namespace node class MemoryDesc; @@ -173,7 +176,7 @@ protected: friend class BlobDumper; // WA: optimizedNspc2Ncsp used getElementOffset inside implementation - friend class MKLDNNSplitNode; + friend class node::Split; }; } // namespace intel_cpu diff --git a/src/plugins/intel_cpu/src/memory_desc/cpu_memory_desc_utils.cpp b/src/plugins/intel_cpu/src/memory_desc/cpu_memory_desc_utils.cpp index 296524f5412..b6b5a7bc302 100644 --- a/src/plugins/intel_cpu/src/memory_desc/cpu_memory_desc_utils.cpp +++ b/src/plugins/intel_cpu/src/memory_desc/cpu_memory_desc_utils.cpp @@ -15,7 +15,6 @@ #include using namespace mkldnn; -using namespace ov::intel_cpu; using namespace InferenceEngine; namespace ov { @@ -90,7 +89,7 @@ BlockedMemoryDescPtr MemoryDescUtils::convertToBlockedMemoryDesc(const MemoryDes } } -InferenceEngine::Blob::Ptr MemoryDescUtils::interpretAsBlob(const MKLDNNMemory &mem) { +InferenceEngine::Blob::Ptr MemoryDescUtils::interpretAsBlob(const Memory &mem) { // TODO [DS]: Rewrite when IE is moved to the new TensorDescriptor auto& memDesc = mem.getDesc(); InferenceEngine::TensorDesc desc = convertToTensorDesc(memDesc); diff --git a/src/plugins/intel_cpu/src/memory_desc/cpu_memory_desc_utils.h b/src/plugins/intel_cpu/src/memory_desc/cpu_memory_desc_utils.h index aabd227571d..be34b0c09db 100644 --- a/src/plugins/intel_cpu/src/memory_desc/cpu_memory_desc_utils.h +++ b/src/plugins/intel_cpu/src/memory_desc/cpu_memory_desc_utils.h @@ -18,7 +18,7 @@ class DnnlMemoryDesc; class BlockedMemoryDesc; class DnnlBlockedMemoryDesc; class CpuBlockedMemoryDesc; -class MKLDNNMemory; +class Memory; class MemoryDescUtils { public: @@ -60,11 +60,11 @@ public: static std::shared_ptr convertToBlockedMemoryDesc(const std::shared_ptr &desc); /** - * @brief Creates InferenceEngine::Blob from MKLDNNMemory with the memory reuse - * @param desc MKLDNNMemory from which will be created InferenceEngine::Blob + * @brief Creates InferenceEngine::Blob from Memory with the memory reuse + * @param desc Memory from which will be created InferenceEngine::Blob * @return pointer to InferenceEngine::Blob */ - static InferenceEngine::Blob::Ptr interpretAsBlob(const MKLDNNMemory& mem); + static InferenceEngine::Blob::Ptr interpretAsBlob(const Memory& mem); /** * @brief Converts MemoryDesc to InferenceEngine::TensorDesc diff --git a/src/plugins/intel_cpu/src/memory_desc/dnnl_blocked_memory_desc.cpp b/src/plugins/intel_cpu/src/memory_desc/dnnl_blocked_memory_desc.cpp index d1107f55e16..01f08b11c6a 100644 --- a/src/plugins/intel_cpu/src/memory_desc/dnnl_blocked_memory_desc.cpp +++ b/src/plugins/intel_cpu/src/memory_desc/dnnl_blocked_memory_desc.cpp @@ -6,9 +6,11 @@ #include #include -using namespace ov::intel_cpu; using namespace InferenceEngine; +namespace ov { +namespace intel_cpu { + DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, const Shape& shape, const VectorDims& strides) : MemoryDesc(shape, DnnlBlocked) { const auto ndims = shape.getRank(); @@ -18,9 +20,9 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, con if (shape.hasZeroDims() && std::any_of(strides.begin(), strides.end(), [](size_t stride) { return stride != 0; } )) { IE_THROW() << "Can't create DnnlBlockedMemoryDesc with zero dim, but with non zero strides"; } - desc = {MKLDNNExtensionUtils::convertToDnnlDims(dims), - MKLDNNExtensionUtils::IEPrecisionToDataType(prc), - MKLDNNExtensionUtils::convertToDnnlDims(strides)}; + desc = {DnnlExtensionUtils::convertToDnnlDims(dims), + DnnlExtensionUtils::IEPrecisionToDataType(prc), + DnnlExtensionUtils::convertToDnnlDims(strides)}; } else { mkldnn::memory::dims plain_strides; if (shape.hasZeroDims()) { @@ -34,7 +36,7 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, con } } - desc = {MKLDNNExtensionUtils::convertToDnnlDims(dims), MKLDNNExtensionUtils::IEPrecisionToDataType(prc), plain_strides}; + desc = {DnnlExtensionUtils::convertToDnnlDims(dims), DnnlExtensionUtils::IEPrecisionToDataType(prc), plain_strides}; } order.resize(ndims); @@ -69,13 +71,13 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, con // scalar case if (shape.getRank() == 0) { desc.data.format_kind = dnnl_blocked; - desc.data.data_type = memory::convert_to_c(MKLDNNExtensionUtils::IEPrecisionToDataType(prc)); + desc.data.data_type = memory::convert_to_c(DnnlExtensionUtils::IEPrecisionToDataType(prc)); desc.data.ndims = 1; desc.data.dims[0] = 1; desc.data.padded_dims[0] = 1; desc.data.format_desc.blocking.strides[0] = 1; desc.data.padded_offsets[0] = 0; - desc.data.offset0 = MKLDNNExtensionUtils::convertToDnnlDim(offsetPadding); + desc.data.offset0 = DnnlExtensionUtils::convertToDnnlDim(offsetPadding); return; } @@ -99,7 +101,7 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, con IE_THROW() << "DnnlBlockedMemoryDesc doesn't support undefined or zero blockedDims."; } - auto dims = MKLDNNExtensionUtils::convertToDnnlDims(shape.getDims()); + auto dims = DnnlExtensionUtils::convertToDnnlDims(shape.getDims()); size_t outer_ndims = dims.size(); @@ -141,9 +143,9 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, con // Fill general memory desc fields desc.data.format_kind = dnnl_blocked; desc.data.extra.flags = 0; - desc.data.data_type = memory::convert_to_c(MKLDNNExtensionUtils::IEPrecisionToDataType(prc)); + desc.data.data_type = memory::convert_to_c(DnnlExtensionUtils::IEPrecisionToDataType(prc)); desc.data.ndims = dims.size(); - desc.data.offset0 = MKLDNNExtensionUtils::convertToDnnlDim(offsetPadding); + desc.data.offset0 = DnnlExtensionUtils::convertToDnnlDim(offsetPadding); std::copy(dims.begin(), dims.end(), desc.data.dims); if (!offsetPaddingToData.empty()) { @@ -152,14 +154,14 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, con if (!inner_pad_offsets_is_zero) IE_THROW() << "Can not construct DnnlBlockedMemoryDesc, inner pad offsets is not zero: " << vec2str(offsetPaddingToData); - auto dnnlPaddedOffsets = MKLDNNExtensionUtils::convertToDnnlDims(offsetPaddingToData); + auto dnnlPaddedOffsets = DnnlExtensionUtils::convertToDnnlDims(offsetPaddingToData); std::copy(dnnlPaddedOffsets.begin(), dnnlPaddedOffsets.begin() + outer_ndims, desc.data.padded_offsets); } else { std::fill(std::begin(desc.data.padded_offsets), std::begin(desc.data.padded_offsets) + outer_ndims, 0); } std::fill(desc.data.padded_dims, desc.data.padded_dims + outer_ndims, 1); - auto dnnlBlkDims = MKLDNNExtensionUtils::convertToDnnlDims(blockedDims); + auto dnnlBlkDims = DnnlExtensionUtils::convertToDnnlDims(blockedDims); for (size_t i = 0; i < order.size(); i++) { auto idx = order[i]; @@ -184,7 +186,7 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, con this->recomputeDefaultStrides(); } else { for (size_t i = 0; i < outer_ndims; i++) { - auto dnnlStrides = MKLDNNExtensionUtils::convertToDnnlDims(strides); + auto dnnlStrides = DnnlExtensionUtils::convertToDnnlDims(strides); dnn_blk_desc.strides[order[i]] = dnnlStrides[i]; } initStrides(); @@ -201,7 +203,7 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(const Shape& shape, mkldnn::memory: if (format == memory::format_tag::x && shape.getRank() == 0) { desc = mkldnn::memory::desc(mkldnn::memory::dims(1, 1), dataType, format); } else { - desc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(dims), dataType, format); + desc = mkldnn::memory::desc(DnnlExtensionUtils::convertToDnnlDims(dims), dataType, format); } VectorDims perm; @@ -313,7 +315,7 @@ static VectorDims extractOrder(const mkldnn::memory::desc& desc) { } DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(const mkldnn::memory::desc& mdesc) : - MemoryDesc(MKLDNNExtensionUtils::convertToVectorDims(mdesc.dims()), DnnlBlocked) { + MemoryDesc(DnnlExtensionUtils::convertToVectorDims(mdesc.dims()), DnnlBlocked) { desc = mdesc; if (desc.data.format_kind == dnnl::impl::format_kind::any) IE_THROW(Unexpected) << "Memory format any is prohibited!"; @@ -400,7 +402,7 @@ bool DnnlBlockedMemoryDesc::isTailCFormat() const { static mkldnn::memory::desc cloneDescWithNewDims(const mkldnn::memory::desc& desc, const VectorDims& dims, const VectorDims& order) { using namespace dnnl::impl::utils; - auto mklDims = MKLDNNExtensionUtils::convertToDnnlDims(dims); + auto mklDims = DnnlExtensionUtils::convertToDnnlDims(dims); const auto offsetPadding = desc.data.offset0; mkldnn::memory::desc newMklDesc = desc; array_copy(newMklDesc.data.dims, mklDims.data(), mklDims.size()); @@ -562,7 +564,7 @@ void DnnlBlockedMemoryDesc::initBlockDims() { } // blocked dims // [dims via new_outer_order with auto pad] U [inner_blk_dims] - VectorDims outer_block_dims = MKLDNNExtensionUtils::convertToVectorDims(dims); + VectorDims outer_block_dims = DnnlExtensionUtils::convertToVectorDims(dims); for (size_t i = 0; i < outer_block_dims.size(); i++) { if (outer_block_dims[i] != Shape::UNDEFINED_DIM) { outer_block_dims[i] = div_up(outer_block_dims[i], total_block_per_dim[i]); @@ -650,7 +652,7 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(const mkldnn::memory::desc& mdesc, if (!descWrapped.is_blocking_desc()) IE_THROW(Unexpected) << "Can't create DnnlBlockedMemoryDesc from not blocking desc"; - if (!shape.isCompatible(MKLDNNExtensionUtils::convertToVectorDims(mdesc.dims()))) { + if (!shape.isCompatible(DnnlExtensionUtils::convertToVectorDims(mdesc.dims()))) { IE_THROW(ParameterMismatch) << "Can not create DnnlBlockedMemoryDesc. memory::desc dims: " << vec2str(mdesc.dims()) << " are incompatible with provided shape: " << shape.toString() << "."; } @@ -670,3 +672,6 @@ DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(const mkldnn::memory::desc& mdesc, std::string DnnlBlockedMemoryDesc::serializeFormat() const { return BlockedMemoryDesc::serializeFormat(); } + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/memory_desc/dnnl_blocked_memory_desc.h b/src/plugins/intel_cpu/src/memory_desc/dnnl_blocked_memory_desc.h index 5c649f5dafb..00584ceffcc 100644 --- a/src/plugins/intel_cpu/src/memory_desc/dnnl_blocked_memory_desc.h +++ b/src/plugins/intel_cpu/src/memory_desc/dnnl_blocked_memory_desc.h @@ -6,7 +6,7 @@ #include "blocked_memory_desc.h" #include -#include +#include namespace ov { namespace intel_cpu { @@ -40,7 +40,7 @@ public: } size_t getOffsetPadding() const override { - return MKLDNNExtensionUtils::convertToDim(desc.data.offset0); + return DnnlExtensionUtils::convertToDim(desc.data.offset0); } const VectorDims& getStrides() const override { @@ -93,8 +93,8 @@ private: void recomputeDefaultStrides(); - friend DnnlMemoryDescPtr MKLDNNExtensionUtils::makeDescriptor(const mkldnn::memory::desc &desc); - friend std::shared_ptr MKLDNNExtensionUtils::makeUndefinedDesc(const mkldnn::memory::desc &desc, const Shape& shape); + friend DnnlMemoryDescPtr DnnlExtensionUtils::makeDescriptor(const mkldnn::memory::desc &desc); + friend std::shared_ptr DnnlExtensionUtils::makeUndefinedDesc(const mkldnn::memory::desc &desc, const Shape& shape); friend class MemoryDescUtils; }; diff --git a/src/plugins/intel_cpu/src/memory_desc/dnnl_memory_desc.cpp b/src/plugins/intel_cpu/src/memory_desc/dnnl_memory_desc.cpp index 6d8986f68ed..2a2a2f84700 100644 --- a/src/plugins/intel_cpu/src/memory_desc/dnnl_memory_desc.cpp +++ b/src/plugins/intel_cpu/src/memory_desc/dnnl_memory_desc.cpp @@ -3,7 +3,7 @@ // #include "dnnl_memory_desc.h" -#include +#include #include #include "mkldnn/ie_mkldnn.h" @@ -11,7 +11,7 @@ namespace ov { namespace intel_cpu { DnnlMemoryDesc::DnnlMemoryDesc(const mkldnn::memory::desc& desc) : - MemoryDesc(Shape(MKLDNNExtensionUtils::convertToVectorDims(desc.dims())), Mkldnn), desc(desc) { + MemoryDesc(Shape(DnnlExtensionUtils::convertToVectorDims(desc.dims())), Mkldnn), desc(desc) { if (desc.data.format_kind == dnnl::impl::format_kind::any) IE_THROW(Unexpected) << "Memory format any is prohibited!"; } @@ -21,7 +21,7 @@ bool DnnlMemoryDesc::canComputeMemSizeZeroDims() const { } size_t DnnlMemoryDesc::getCurrentMemSizeImp() const { - return MKLDNNExtensionUtils::getMemSizeForDnnlDesc(desc); + return DnnlExtensionUtils::getMemSizeForDnnlDesc(desc); } size_t DnnlMemoryDesc::getElementOffset(size_t elemNumber) const { @@ -62,7 +62,7 @@ bool DnnlMemoryDesc::isDefinedImp() const { } InferenceEngine::Precision DnnlMemoryDesc::getPrecision() const { - return MKLDNNExtensionUtils::DataTypeToIEPrecision(desc.data_type()); + return DnnlExtensionUtils::DataTypeToIEPrecision(desc.data_type()); } MemoryDescPtr DnnlMemoryDesc::cloneWithNewDimsImp(const VectorDims &dims) const { diff --git a/src/plugins/intel_cpu/src/memory_desc/dnnl_memory_desc.h b/src/plugins/intel_cpu/src/memory_desc/dnnl_memory_desc.h index a1a95f8a90e..85693128ff3 100644 --- a/src/plugins/intel_cpu/src/memory_desc/dnnl_memory_desc.h +++ b/src/plugins/intel_cpu/src/memory_desc/dnnl_memory_desc.h @@ -5,7 +5,7 @@ #pragma once #include "cpu_blocked_memory_desc.h" -#include +#include namespace ov { namespace intel_cpu { @@ -56,7 +56,7 @@ protected: mkldnn::memory::desc desc; void setPrecision(InferenceEngine::Precision prc) override { - desc.data.data_type = static_cast(MKLDNNExtensionUtils::IEPrecisionToDataType(prc)); + desc.data.data_type = static_cast(DnnlExtensionUtils::IEPrecisionToDataType(prc)); } private: @@ -69,7 +69,7 @@ private: bool isDefinedImp() const override; MemoryDescPtr cloneWithNewDimsImp(const VectorDims& dims) const override; - friend DnnlMemoryDescPtr MKLDNNExtensionUtils::makeDescriptor(const mkldnn::memory::desc &desc); + friend DnnlMemoryDescPtr DnnlExtensionUtils::makeDescriptor(const mkldnn::memory::desc &desc); }; } // namespace intel_cpu diff --git a/src/plugins/intel_cpu/src/memory_state.cpp b/src/plugins/intel_cpu/src/memory_state.cpp index e1ee67138da..9ca882061c3 100644 --- a/src/plugins/intel_cpu/src/memory_state.cpp +++ b/src/plugins/intel_cpu/src/memory_state.cpp @@ -3,7 +3,7 @@ // #include "memory_state.h" -#include "extension_utils.h" +#include "dnnl_extension_utils.h" #include "blob_factory.hpp" using namespace InferenceEngine; @@ -11,7 +11,7 @@ using namespace InferenceEngine; namespace ov { namespace intel_cpu { -void MKLDNNVariableState::Reset() { +void VariableState::Reset() { std::memset(state->buffer(), 0, state->byteSize()); } diff --git a/src/plugins/intel_cpu/src/memory_state.h b/src/plugins/intel_cpu/src/memory_state.h index 855126a1fb7..4f42a8ac51b 100644 --- a/src/plugins/intel_cpu/src/memory_state.h +++ b/src/plugins/intel_cpu/src/memory_state.h @@ -15,10 +15,10 @@ namespace ov { namespace intel_cpu { -class MKLDNNVariableState : public InferenceEngine::IVariableStateInternal { +class VariableState : public InferenceEngine::IVariableStateInternal { public: - MKLDNNVariableState(std::string name, MKLDNNMemoryPtr storage) : - InferenceEngine::IVariableStateInternal{name} { + VariableState(std::string name, MemoryPtr storage) + : InferenceEngine::IVariableStateInternal{name} { state = make_blob_with_precision(MemoryDescUtils::convertToTensorDesc(storage->getDesc())); state->allocate(); cpu_memcpy(state->buffer(), storage->GetData(), storage->GetSize()); diff --git a/src/plugins/intel_cpu/src/mkldnn/iml_type_mapper.cpp b/src/plugins/intel_cpu/src/mkldnn/iml_type_mapper.cpp index 91409fc2056..ff86544296f 100644 --- a/src/plugins/intel_cpu/src/mkldnn/iml_type_mapper.cpp +++ b/src/plugins/intel_cpu/src/mkldnn/iml_type_mapper.cpp @@ -4,9 +4,10 @@ #include "iml_type_mapper.h" -using namespace ov::intel_cpu; +namespace ov { +namespace intel_cpu { -impl_desc_type ov::intel_cpu::parse_impl_name(std::string impl_desc_name) { +impl_desc_type parse_impl_name(std::string impl_desc_name) { impl_desc_type res = impl_desc_type::unknown; #define REPLACE_WORD(_wrd, _sub) auto pos = impl_desc_name.find(#_wrd); \ @@ -55,7 +56,7 @@ impl_desc_type ov::intel_cpu::parse_impl_name(std::string impl_desc_name) { return res; } -const char* ov::intel_cpu::impl_type_to_string(impl_desc_type type) { +const char* impl_type_to_string(impl_desc_type type) { #define CASE(_type) do { \ if (type == _type) return #_type; \ } while (0) @@ -111,3 +112,6 @@ const char* ov::intel_cpu::impl_type_to_string(impl_desc_type type) { #undef CASE return "unknown"; } + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/mkldnn/iml_type_mapper.h b/src/plugins/intel_cpu/src/mkldnn/iml_type_mapper.h index 18f81cddb8f..9ae0518fd45 100644 --- a/src/plugins/intel_cpu/src/mkldnn/iml_type_mapper.h +++ b/src/plugins/intel_cpu/src/mkldnn/iml_type_mapper.h @@ -97,4 +97,3 @@ impl_desc_type parse_impl_name(std::string impl_desc_name); } // namespace intel_cpu } // namespace ov - diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index 15945a2ecff..0d355a62a5d 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -46,7 +46,7 @@ #include "nodes/shuffle_channels.h" #include "nodes/reference.h" #include "nodes/fake_quantize.h" -#include "extension_utils.h" +#include "dnnl_extension_utils.h" #include "mkldnn/iml_type_mapper.h" #include "nodes/common/cpu_memcpy.h" @@ -63,17 +63,20 @@ #include "memory_desc/dnnl_blocked_memory_desc.h" using namespace mkldnn; -using namespace ov::intel_cpu; using namespace openvino; +using namespace ov::intel_cpu::node; using namespace InferenceEngine::details; -MKLDNNNode::NodesFactory & MKLDNNNode::factory() { +namespace ov { +namespace intel_cpu { + +Node::NodesFactory & Node::factory() { static NodesFactory factoryInstance; return factoryInstance; } -MKLDNNNode::MKLDNNNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &w_cache) +Node::Node(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &w_cache) : selectedPrimitiveDescriptorIndex(-1), permanent(false), temporary(false), constant(ConstantType::Unknown), weightCache(w_cache), engine(eng), name(op->get_friendly_name()), typeStr(op->get_type_name()), type(TypeFromName(op->get_type_name())), profiling(op->get_friendly_name()) { @@ -138,7 +141,7 @@ MKLDNNNode::MKLDNNNode(const std::shared_ptr& op, const mkldnn::en } } - std::string inputMemoryFormats = ngraph::getMKLDNNInputMemoryFormats(op); + std::string inputMemoryFormats = getInputMemoryFormats(op); if (!inputMemoryFormats.empty()) { std::istringstream stream(inputMemoryFormats); std::string str; @@ -149,7 +152,7 @@ MKLDNNNode::MKLDNNNode(const std::shared_ptr& op, const mkldnn::en } } - std::string outputMemoryFormats = ngraph::getMKLDNNOutputMemoryFormats(op); + std::string outputMemoryFormats = getOutputMemoryFormats(op); if (!outputMemoryFormats.empty()) { std::istringstream stream(outputMemoryFormats); std::string str; @@ -166,14 +169,14 @@ MKLDNNNode::MKLDNNNode(const std::shared_ptr& op, const mkldnn::en } } -MKLDNNNode::MKLDNNNode(const std::string& type, const std::string& name, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &w_cache) +Node::Node(const std::string& type, const std::string& name, const mkldnn::engine& eng, WeightsSharing::Ptr &w_cache) : selectedPrimitiveDescriptorIndex(-1), permanent(false), temporary(false), constant(ConstantType::Unknown), weightCache(w_cache), engine(eng), fusingPort(-1), name(name), typeStr(type), type(TypeFromName(type)), profiling(name) { // TODO [NM]: What about filling inDims and outDims? } -void MKLDNNNode::addEdge(const MKLDNNEdgeWeakPtr& edge) { +void Node::addEdge(const EdgeWeakPtr& edge) { auto edgePtr = edge.lock(); if (!edgePtr) return; @@ -186,7 +189,7 @@ void MKLDNNNode::addEdge(const MKLDNNEdgeWeakPtr& edge) { childPtr->parentEdges.push_back(edge); } -void MKLDNNNode::removeEdge(const MKLDNNEdgeWeakPtr& edge) { +void Node::removeEdge(const EdgeWeakPtr& edge) { auto edgePtr = edge.lock(); if (!edgePtr) return; @@ -210,7 +213,7 @@ void MKLDNNNode::removeEdge(const MKLDNNEdgeWeakPtr& edge) { } } -void MKLDNNNode::remove() { +void Node::remove() { auto parent_edges = parentEdges; for (const auto &parentEdge : parent_edges) { removeEdge(parentEdge); @@ -221,7 +224,7 @@ void MKLDNNNode::remove() { } } -bool MKLDNNNode::isEdgesEmpty(const std::vector& edges) const { +bool Node::isEdgesEmpty(const std::vector& edges) const { for (auto &edge : edges) { if (edge.lock()) return false; @@ -229,7 +232,7 @@ bool MKLDNNNode::isEdgesEmpty(const std::vector& edges) const return true; } -void MKLDNNNode::createPrimitive() { +void Node::createPrimitive() { if (inputShapesDefined() && isExecutable()) { if (needPrepareParams()) { prepareParams(); @@ -238,11 +241,11 @@ void MKLDNNNode::createPrimitive() { } } -void MKLDNNNode::selectOptimalPrimitiveDescriptor() { +void Node::selectOptimalPrimitiveDescriptor() { selectPreferPrimitiveDescriptor(getPrimitivesPriority(), false); } -void MKLDNNNode::selectPreferPrimitiveDescriptor(const std::vector& priority, bool ignoreConstInputs) { +void Node::selectPreferPrimitiveDescriptor(const std::vector& priority, bool ignoreConstInputs) { for (auto& type : priority) { int selectedPrimitive = -1; int equalsFormatCount = -1; @@ -295,7 +298,7 @@ void MKLDNNNode::selectPreferPrimitiveDescriptor(const std::vectorgetParent()->getType() == Reshape) { + if (getParentEdges().size() == 1 && getParentEdgeAt(0)->getParent()->getType() == Type::Reshape) { auto reshapeNode = getParentEdgeAt(0)->getParent(); if (reshapeNode->getParentEdgeAt(0)->getParent()->getChildEdges().size() != 1) return false; @@ -321,37 +324,37 @@ bool MKLDNNNode::canBeInPlace() const { return true; } -void MKLDNNNode::resolveInPlaceEdges() { +void Node::resolveInPlaceEdges() { const NodeDesc *selected_pd = getSelectedPrimitiveDescriptor(); if (!selected_pd) IE_THROW() << "Cannot find selected primitive descriptor for node: " << getName(); for (size_t i = 0; i < getParentEdges().size() && i < selected_pd->getConfig().inConfs.size(); i++) { auto parentEdge = getParentEdgeAt(i); - if (parentEdge->getStatus() != MKLDNNEdge::Status::NotAllocated || selected_pd->getConfig().inConfs[i].inPlace() < 0) + if (parentEdge->getStatus() != Edge::Status::NotAllocated || selected_pd->getConfig().inConfs[i].inPlace() < 0) continue; auto memMgr = parentEdge->getMemory().getDnnlMemoryMngr(); - parentEdge->getMemoryPtr().reset(new MKLDNNMemory(getEngine())); + parentEdge->getMemoryPtr().reset(new Memory(getEngine())); parentEdge->getMemoryPtr()->Create(selected_pd->getConfig().inConfs[i].getMemDesc(), memMgr); - parentEdge->changeStatus(MKLDNNEdge::Status::Allocated); + parentEdge->changeStatus(Edge::Status::Allocated); } for (size_t i = 0; i < getChildEdges().size() && i < selected_pd->getConfig().outConfs.size(); i++) { auto childEdge = getChildEdgeAt(i); - if (childEdge->getStatus() != MKLDNNEdge::Status::NotAllocated || selected_pd->getConfig().outConfs[i].inPlace() < 0) + if (childEdge->getStatus() != Edge::Status::NotAllocated || selected_pd->getConfig().outConfs[i].inPlace() < 0) continue; auto memMgr = childEdge->getMemory().getDnnlMemoryMngr(); - childEdge->getMemoryPtr().reset(new MKLDNNMemory(getEngine())); + childEdge->getMemoryPtr().reset(new Memory(getEngine())); childEdge->getMemoryPtr()->Create(selected_pd->getConfig().outConfs[i].getMemDesc(), memMgr); - childEdge->changeStatus(MKLDNNEdge::Status::Allocated); + childEdge->changeStatus(Edge::Status::Allocated); } } -MemoryDescPtr MKLDNNNode::getBaseMemDescAtInputPort(size_t portNum) const { +MemoryDescPtr Node::getBaseMemDescAtInputPort(size_t portNum) const { if (auto primDesc = getSelectedPrimitiveDescriptor()) { const auto& inConfs = primDesc->getConfig().inConfs; if (inConfs.size() < portNum) { @@ -362,7 +365,7 @@ MemoryDescPtr MKLDNNNode::getBaseMemDescAtInputPort(size_t portNum) const { IE_THROW() << "Can't get input memory desc, primitive descriptor is not selected"; } -MemoryDescPtr MKLDNNNode::getBaseMemDescAtOutputPort(size_t portNum) const { +MemoryDescPtr Node::getBaseMemDescAtOutputPort(size_t portNum) const { if (auto primDesc = getSelectedPrimitiveDescriptor()) { const auto& outConfs = primDesc->getConfig().outConfs; if (outConfs.size() < portNum) { @@ -373,7 +376,7 @@ MemoryDescPtr MKLDNNNode::getBaseMemDescAtOutputPort(size_t portNum) const { IE_THROW() << "Can't get output memory desc, primitive descriptor is not selected"; } -std::string MKLDNNNode::getPrimitiveDescriptorType() { +std::string Node::getPrimitiveDescriptorType() { auto selectedPrimitiveDesc = getSelectedPrimitiveDescriptor(); impl_desc_type type = impl_desc_type::undef; @@ -442,7 +445,7 @@ std::string MKLDNNNode::getPrimitiveDescriptorType() { return str_type; } -const MKLDNNEdgePtr MKLDNNNode::getParentEdgeAt(size_t idx) const { +const EdgePtr Node::getParentEdgeAt(size_t idx) const { if (idx >= parentEdges.size()) IE_THROW() << "Node " << getName() << " contains less parent edges than " << idx; auto parentEdgePtr = parentEdges[idx].lock(); @@ -451,7 +454,7 @@ const MKLDNNEdgePtr MKLDNNNode::getParentEdgeAt(size_t idx) const { return parentEdgePtr; } -const MKLDNNEdgePtr MKLDNNNode::getChildEdgeAt(size_t idx) const { +const EdgePtr Node::getChildEdgeAt(size_t idx) const { if (idx >= childEdges.size()) IE_THROW() << "Node " << getName() << " contains less child edges than " << idx; auto childEdgePtr = childEdges[idx].lock(); @@ -460,11 +463,11 @@ const MKLDNNEdgePtr MKLDNNNode::getChildEdgeAt(size_t idx) const { return childEdgePtr; } -const std::vector MKLDNNNode::getParentEdgesAtPort(size_t idx) const { +const std::vector Node::getParentEdgesAtPort(size_t idx) const { if (idx >= inputShapes.size()) IE_THROW() << "Node " << getName() << " contains less input ports than " << idx; - std::vector res; + std::vector res; for (auto &edge_w : parentEdges) { auto edge = edge_w.lock(); if (!edge) @@ -474,11 +477,11 @@ const std::vector MKLDNNNode::getParentEdgesAtPort(size_t idx) co return res; } -const std::vector MKLDNNNode::getChildEdgesAtPort(size_t idx) const { +const std::vector Node::getChildEdgesAtPort(size_t idx) const { if (idx >= outputShapes.size()) IE_THROW() << "Node " << getName() << " contains less output ports than " << idx; - std::vector res; + std::vector res; for (auto &edge_w : childEdges) { auto edge = edge_w.lock(); if (!edge) @@ -489,7 +492,7 @@ const std::vector MKLDNNNode::getChildEdgesAtPort(size_t idx) con } -std::vector MKLDNNNode::getAvailableFormatsForDims(const Shape &dims) const { +std::vector Node::getAvailableFormatsForDims(const Shape &dims) const { if (dims.getRank() == 0) return {memory::format_tag::x}; else if (dims.getRank() == 1) @@ -506,13 +509,13 @@ std::vector MKLDNNNode::getAvailableFormatsForDims(const Sha return {memory::format_tag::any}; } -void MKLDNNNode::execute(mkldnn::stream strm) { +void Node::execute(mkldnn::stream strm) { if (prim) { (*prim).execute(strm, primArgs); } } -void MKLDNNNode::executeDynamic(mkldnn::stream strm) { +void Node::executeDynamic(mkldnn::stream strm) { if (needShapeInfer()) { redefineOutputMemory(shapeInfer()); } @@ -527,7 +530,7 @@ void MKLDNNNode::executeDynamic(mkldnn::stream strm) { updateLastInputDims(); } -void MKLDNNNode::redefineOutputMemory(const std::vector &newOutputShapes) { +void Node::redefineOutputMemory(const std::vector &newOutputShapes) { if (newOutputShapes.size() != outputShapes.size()) { IE_THROW() << "Number shapes mismatch with real outputs number for node with name: " << getName(); } @@ -551,7 +554,7 @@ void MKLDNNNode::redefineOutputMemory(const std::vector &newOutputSh } } -void MKLDNNNode::initSupportedPrimitiveDescriptors() { +void Node::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -602,11 +605,11 @@ void MKLDNNNode::initSupportedPrimitiveDescriptors() { } } -void MKLDNNNode::filterSupportedPrimitiveDescriptors() { +void Node::filterSupportedPrimitiveDescriptors() { // Compare by format tag auto areCompatible = [](const MemoryDesc& desc, mkldnn::memory::format_tag fmt) -> bool { auto fmt_tdesc = DnnlBlockedMemoryDesc(desc.getShape(), - MKLDNNExtensionUtils::IEPrecisionToDataType(desc.getPrecision()), + DnnlExtensionUtils::IEPrecisionToDataType(desc.getPrecision()), fmt); return desc.isCompatible(fmt_tdesc); }; @@ -636,7 +639,7 @@ void MKLDNNNode::filterSupportedPrimitiveDescriptors() { } } -void MKLDNNNode::initDescriptor(const NodeConfig& config) { +void Node::initDescriptor(const NodeConfig& config) { if (!getSelectedPrimitiveDescriptor()) { return; } @@ -717,7 +720,7 @@ void MKLDNNNode::initDescriptor(const NodeConfig& config) { selectedPD->setConfig(rightConfig); } -void MKLDNNNode::prepareMemory(mkldnn::primitive_desc_iterator& itpd) { +void Node::prepareMemory(mkldnn::primitive_desc_iterator& itpd) { for (size_t i = 0; i < getChildEdges().size(); i++) { auto &dstMemPtr = getChildEdgeAt(i)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) @@ -742,17 +745,17 @@ void MKLDNNNode::prepareMemory(mkldnn::primitive_desc_iterator& itpd) { // TODO [DS]: internal blobs should be removed or rewritten using Memory object auto newDesc = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(internalBlob->getTensorDesc()); - MKLDNNMemory memory{ engine }; + Memory memory{ engine }; memory.Create(newDesc, internalBlob->buffer()); - MKLDNNMemoryPtr _ptr = MKLDNNMemoryPtr(new MKLDNNMemory(engine)); + MemoryPtr _ptr = MemoryPtr(new Memory(engine)); _ptr->Create(*intDescs[i]); _ptr->SetData(memory); return _ptr; }; - MKLDNNMemoryPtr ptr; + MemoryPtr ptr; if (weightCache != nullptr) { const uint64_t data_hash = weightCache->GetHashFunc().hash( internalBlob->buffer(), internalBlob->byteSize()); @@ -770,7 +773,7 @@ void MKLDNNNode::prepareMemory(mkldnn::primitive_desc_iterator& itpd) { } } -bool MKLDNNNode::isInPlace() { +bool Node::isInPlace() { if (inplace == InPlaceType::Unknown) { auto selected_pd = getSelectedPrimitiveDescriptor(); if (selected_pd == nullptr) @@ -795,9 +798,9 @@ bool MKLDNNNode::isInPlace() { return inplace == InPlaceType::InPlace; } -bool MKLDNNNode::isConstant() { +bool Node::isConstant() { if (constant == ConstantType::Unknown) { - std::vector checkNodes; + std::vector checkNodes; for (size_t i = 0; i < getChildEdges().size(); i++) { checkNodes.push_back(getChildEdgeAt(i)->getChild()); } @@ -822,7 +825,7 @@ bool MKLDNNNode::isConstant() { return constant == ConstantType::Const; } -MKLDNNNode::ConstantType MKLDNNNode::checkConstant(LOOK look, std::vector& checkNodes) { +Node::ConstantType Node::checkConstant(LOOK look, std::vector& checkNodes) { if (constant == ConstantType::Unknown) { if (look == LOOK_DOWN) { for (size_t i = 0; i < getChildEdges().size(); i++) { @@ -839,7 +842,7 @@ MKLDNNNode::ConstantType MKLDNNNode::checkConstant(LOOK look, std::vector& MKLDNNNode::getPrimitivesPriority() { +const std::vector& Node::getPrimitivesPriority() { std::vector priorities = { impl_desc_type::unknown, impl_desc_type::brgconv_avx512_amx_1x1, @@ -903,7 +906,7 @@ const std::vector& MKLDNNNode::getPrimitivesPriority() { return implPriorities; } -PortDescBasePtr MKLDNNNode::getConsistentInputDesc(const NodeConfig &config, size_t idx) const { +PortDescBasePtr Node::getConsistentInputDesc(const NodeConfig &config, size_t idx) const { int num = getParentEdgeAt(idx)->getInputNum(); auto *selectedPD = getParentEdgeAt(idx)->getParent()->getSelectedPrimitiveDescriptor(); if (!selectedPD) @@ -937,7 +940,7 @@ PortDescBasePtr MKLDNNNode::getConsistentInputDesc(const NodeConfig &config, siz return config.inConfs[idx].getPortDesc(); } -PortDescBasePtr MKLDNNNode::getConsistentOutputDesc(const NodeConfig &config, size_t idx) const { +PortDescBasePtr Node::getConsistentOutputDesc(const NodeConfig &config, size_t idx) const { int num = getChildEdgeAt(idx)->getOutputNum(); auto *selectedPD = getChildEdgeAt(idx)->getChild()->getSelectedPrimitiveDescriptor(); if (!selectedPD) @@ -971,7 +974,7 @@ PortDescBasePtr MKLDNNNode::getConsistentOutputDesc(const NodeConfig &config, si return config.outConfs[idx].getPortDesc(); } -void MKLDNNNode::initOptimalPrimitiveDescriptor() { +void Node::initOptimalPrimitiveDescriptor() { auto selected_pd = getSelectedPrimitiveDescriptor(); if (selected_pd == nullptr) IE_THROW() << "Preferable primitive descriptor is not set."; @@ -996,12 +999,12 @@ void MKLDNNNode::initOptimalPrimitiveDescriptor() { config.outConfs[i].setMemDesc(outPortDesc->getMemDesc()); } } - if (getType() != RNNSeq && getType() != RNNCell) { + if (getType() != Type::RNNSeq && getType() != Type::RNNCell) { initDescriptor(config); } } -bool MKLDNNNode::isConfigDefined(const NodeConfig &config) const { +bool Node::isConfigDefined(const NodeConfig &config) const { for (const auto& configs : {config.inConfs, config.outConfs}) { for (const auto &dc : configs) { if (!dc.getMemDesc()->isDefined()) @@ -1011,26 +1014,26 @@ bool MKLDNNNode::isConfigDefined(const NodeConfig &config) const { return true; } -MemoryDescPtr MKLDNNNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { +MemoryDescPtr Node::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { if (getInputShapeAtPort(idx).isDynamic()) { - return MKLDNNExtensionUtils::makeUndefinedDesc(primitive_desc_it.src_desc(idx), getInputShapeAtPort(idx)); + return DnnlExtensionUtils::makeUndefinedDesc(primitive_desc_it.src_desc(idx), getInputShapeAtPort(idx)); } - return MKLDNNExtensionUtils::makeDescriptor(primitive_desc_it.src_desc(idx)); + return DnnlExtensionUtils::makeDescriptor(primitive_desc_it.src_desc(idx)); } -MemoryDescPtr MKLDNNNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { +MemoryDescPtr Node::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { if (getOutputShapeAtPort(idx).isDynamic()) { - return MKLDNNExtensionUtils::makeUndefinedDesc(primitive_desc_it.dst_desc(idx), getOutputShapeAtPort(idx)); + return DnnlExtensionUtils::makeUndefinedDesc(primitive_desc_it.dst_desc(idx), getOutputShapeAtPort(idx)); } - return MKLDNNExtensionUtils::makeDescriptor(primitive_desc_it.dst_desc(idx)); + return DnnlExtensionUtils::makeDescriptor(primitive_desc_it.dst_desc(idx)); } -int MKLDNNNode::batchToProcess() const { +int Node::batchToProcess() const { return dynBatchLim == 0 ? getMaxBatch() : std::min(getMaxBatch(), dynBatchLim); } // TODO [DS]: how we should process this for dynamic shape? -size_t MKLDNNNode::getMaxBatch() const { +size_t Node::getMaxBatch() const { // FIXME: batch != 0 dims number if (!inputShapes.empty()) { if (inputShapes[0].getRank()) @@ -1047,7 +1050,7 @@ size_t MKLDNNNode::getMaxBatch() const { return 0; } -void MKLDNNNode::setDynamicBatchLim(int lim) { +void Node::setDynamicBatchLim(int lim) { dynBatchLim = lim; auto setDynamicBatch = [this](int argType, int newBatch) { @@ -1071,9 +1074,9 @@ void MKLDNNNode::setDynamicBatchLim(int lim) { } } -void MKLDNNNode::appendPostOpArgs(const mkldnn::primitive_attr& attr, +void Node::appendPostOpArgs(const mkldnn::primitive_attr& attr, std::unordered_map& primArgs, - const std::vector& postOpsArgs) { + const std::vector& postOpsArgs) { constexpr size_t maxPrimArgsCapacity = 32; auto post_ops = attr.get_post_ops(); int idx = 0; @@ -1099,7 +1102,7 @@ void MKLDNNNode::appendPostOpArgs(const mkldnn::primitive_attr& attr, } } -bool MKLDNNNode::isFusedWith(Type fusedNodeType) const { +bool Node::isFusedWith(Type fusedNodeType) const { for (auto fusedNode : fusedWith) { if (fusedNode->type == fusedNodeType) return true; @@ -1108,7 +1111,7 @@ bool MKLDNNNode::isFusedWith(Type fusedNodeType) const { return false; } -InferenceEngine::Layout MKLDNNNode::getWeightsLayoutByDims(SizeVector dims, bool isGrouped) { +InferenceEngine::Layout Node::getWeightsLayoutByDims(SizeVector dims, bool isGrouped) { switch (dims.size()) { case 0: return InferenceEngine::Layout::SCALAR; @@ -1129,41 +1132,41 @@ InferenceEngine::Layout MKLDNNNode::getWeightsLayoutByDims(SizeVector dims, bool } } -void MKLDNNNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, std::vector& postOpsMem) { +void Node::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, std::vector& postOpsMem) { IE_THROW() << "Fusing of " << NameFromType(this->getType()) << " operation is not implemented"; } -void MKLDNNNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, std::vector& postOpsMem) { +void Node::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, std::vector& postOpsMem) { IE_THROW() << "Fusing of " << NameFromType(this->getType()) << " operation is not implemented"; } -void MKLDNNNode::appendBinPostOps(mkldnn::post_ops& ops, const std::vector& binaryShape, std::vector& binaryPostOpsMem) { +void Node::appendBinPostOps(mkldnn::post_ops& ops, const std::vector& binaryShape, std::vector& binaryPostOpsMem) { IE_THROW() << "Binary fusing of " << NameFromType(this->getType()) << " operation is not implemented"; } -std::vector MKLDNNNode::getInputPrecisions() const { +std::vector Node::getInputPrecisions() const { std::vector inputPrecisions; for (size_t i = 0; i < getParentEdges().size(); i++) { auto parentEdge = getParentEdgeAt(i); - if (parentEdge && parentEdge->getStatus() == MKLDNNEdge::Status::Validated) { - inputPrecisions.emplace_back(MKLDNNExtensionUtils::DataTypeToIEPrecision((parentEdge->getMemoryPtr()->GetDataType()))); + if (parentEdge && parentEdge->getStatus() == Edge::Status::Validated) { + inputPrecisions.emplace_back(DnnlExtensionUtils::DataTypeToIEPrecision((parentEdge->getMemoryPtr()->GetDataType()))); } } return inputPrecisions; } -std::vector MKLDNNNode::getOutputPrecisions() const { +std::vector Node::getOutputPrecisions() const { std::vector outputPrecisions; for (size_t i = 0; i < getChildEdges().size(); i++) { auto childEdge = getChildEdgeAt(i); - if (childEdge && childEdge->getStatus() == MKLDNNEdge::Status::Validated) { - outputPrecisions.emplace_back(MKLDNNExtensionUtils::DataTypeToIEPrecision((childEdge->getMemoryPtr()->GetDataType()))); + if (childEdge && childEdge->getStatus() == Edge::Status::Validated) { + outputPrecisions.emplace_back(DnnlExtensionUtils::DataTypeToIEPrecision((childEdge->getMemoryPtr()->GetDataType()))); } } return outputPrecisions; } -InferenceEngine::Precision MKLDNNNode::getRuntimePrecision() const { +InferenceEngine::Precision Node::getRuntimePrecision() const { // Base implementation consider precision only on data path and // assumes it is placed on 0-th port (which is true for almost all layers) InferenceEngine::Precision runtimePrecision = Precision::UNSPECIFIED; @@ -1180,8 +1183,8 @@ InferenceEngine::Precision MKLDNNNode::getRuntimePrecision() const { return runtimePrecision; } -MKLDNNNode* MKLDNNNode::NodesFactory::create(const std::shared_ptr& op, const mkldnn::engine& eng, - const MKLDNNExtensionManager::Ptr& extMgr, MKLDNNWeightsSharing::Ptr &w_cache) { +Node* Node::NodesFactory::create(const std::shared_ptr& op, const mkldnn::engine& eng, + const ExtensionManager::Ptr& extMgr, WeightsSharing::Ptr &w_cache) { // getExceptionDescWithoutStatus removes redundant information from the exception message. For instance, the NotImplemented // exception is generated in the form: full_path_to_src_file:line_number [ NOT_IMPLEMENTED ] reason. // An example for gather node: @@ -1200,17 +1203,17 @@ MKLDNNNode* MKLDNNNode::NodesFactory::create(const std::shared_ptr } return desc; }; - MKLDNNNode *newNode = nullptr; + Node *newNode = nullptr; std::string errorMessage; { - std::unique_ptr ol(createNodeIfRegistered(intel_cpu, Generic, op, eng, w_cache)); + std::unique_ptr ol(createNodeIfRegistered(intel_cpu, Type::Generic, op, eng, w_cache)); if (ol != nullptr && ol->created(extMgr)) newNode = ol.release(); } if (newNode == nullptr) { try { - std::unique_ptr ol(createNodeIfRegistered(intel_cpu, TypeFromName(op->get_type_name()), op, eng, w_cache)); + std::unique_ptr ol(createNodeIfRegistered(intel_cpu, TypeFromName(op->get_type_name()), op, eng, w_cache)); if (ol != nullptr && ol->created(extMgr)) newNode = ol.release(); } catch (const InferenceEngine::Exception& ex) { @@ -1224,7 +1227,7 @@ MKLDNNNode* MKLDNNNode::NodesFactory::create(const std::shared_ptr if (newNode == nullptr) { try { - std::unique_ptr ol(new MKLDNNReferenceNode(op, eng, w_cache, errorMessage)); + std::unique_ptr ol(new Reference(op, eng, w_cache, errorMessage)); if (ol != nullptr && ol->created(extMgr)) newNode = ol.release(); } catch (const InferenceEngine::Exception& ex) { @@ -1241,11 +1244,11 @@ MKLDNNNode* MKLDNNNode::NodesFactory::create(const std::shared_ptr // WA-start : TI node requires all attributes to construct internal subgpath // including extManager, socket and mkldnn::eng. if (newNode) { - if (newNode->getType() == TensorIterator) { - if (auto ti = dynamic_cast(newNode)) + if (newNode->getType() == Type::TensorIterator) { + if (auto ti = dynamic_cast(newNode)) ti->setExtManager(extMgr); - } else if (newNode->getType() == If) { - if (auto ifNode = dynamic_cast(newNode)) + } else if (newNode->getType() == Type::If) { + if (auto ifNode = dynamic_cast(newNode)) ifNode->setExtManager(extMgr); } } @@ -1262,14 +1265,14 @@ MKLDNNNode* MKLDNNNode::NodesFactory::create(const std::shared_ptr return newNode; } -bool MKLDNNNode::canBePerformedAsScaleShift(const MKLDNNNode *parentNode) const { +bool Node::canBePerformedAsScaleShift(const Node *parentNode) const { IE_ASSERT(parentNode); size_t fusingPort = 0; const size_t channelAxis = parentNode->getFusingAxis(); for (size_t i = 0; i < getParentEdges().size(); i++) { - MKLDNNNode *node = getParentEdgesAtPort(i)[0]->getParent().get(); + Node *node = getParentEdgesAtPort(i)[0]->getParent().get(); if (node == nullptr) { IE_THROW() << "Cannot get parent node for " << getName() << " on " << i << " port"; } @@ -1277,7 +1280,7 @@ bool MKLDNNNode::canBePerformedAsScaleShift(const MKLDNNNode *parentNode) const fusingPort = i; continue; } - if (node->getType() != Input || !node->isConstant()) { + if (node->getType() != Type::Input || !node->isConstant()) { return false; } } @@ -1296,54 +1299,59 @@ bool MKLDNNNode::canBePerformedAsScaleShift(const MKLDNNNode *parentNode) const }; const auto isConvertablePowerStatic = [&]() { - if (getAlgorithm() == EltwisePowerStatic) { - const auto eltwise = dynamic_cast(this); + if (getAlgorithm() == Algorithm::EltwisePowerStatic) { + const auto eltwise = dynamic_cast(this); if (!eltwise) { - IE_THROW() << "Cannot cast " << getName() << " to MKLDNNEltwiseNode"; + IE_THROW() << "Cannot cast " << getName() << " to Eltwise"; } return eltwise->getAlpha() == 1.0f; } return false; }; - return (one_of(getAlgorithm(), EltwiseAdd, EltwiseMultiply, EltwiseSubtract, EltwiseDivide, EltwisePrelu, EltwiseMulAdd) && isBroadcastableToDataInput()) + return (one_of(getAlgorithm(), Algorithm::EltwiseAdd, + Algorithm::EltwiseMultiply, + Algorithm::EltwiseSubtract, + Algorithm::EltwiseDivide, + Algorithm::EltwisePrelu, + Algorithm::EltwiseMulAdd) && isBroadcastableToDataInput()) || isConvertablePowerStatic(); } // @todo shifts for Subtract and scales for Divide are replaced with // Add (with opposite sign) and Multiply (with inverse value) for legacy dephwise post ops // This can be avoided after dephwise post ops are gone -std::pair, std::vector> MKLDNNNode::getScalesAndShifts(const MKLDNNNode *parentNode) const { +std::pair, std::vector> Node::getScalesAndShifts(const Node *parentNode) const { std::vector scales, shifts; - const auto fillValuesFrom = [&](const MKLDNNNodePtr& constInput, std::vector& buffer) { - auto *constInputNode = dynamic_cast(constInput.get()); + const auto fillValuesFrom = [&](const NodePtr& constInput, std::vector& buffer) { + auto *constInputNode = dynamic_cast(constInput.get()); if (!constInputNode) { - IE_THROW() << "Cannot cast " << constInput->getName() << " to MKLDNNInputNode"; + IE_THROW() << "Cannot cast " << constInput->getName() << " to Input"; } auto constBlob = constInputNode->getMemoryPtr(); const auto elementsCount = constBlob->GetDescWithType()->getPaddedElementsCount(); buffer.resize(elementsCount); cpu_convert(constBlob->GetPtr(), &buffer[0], - MKLDNNExtensionUtils::DataTypeToIEPrecision(constBlob->GetDataType()), + DnnlExtensionUtils::DataTypeToIEPrecision(constBlob->GetDataType()), Precision::FP32, elementsCount); }; const auto constPort = getParentEdgesAtPort(0)[0]->getParent().get() == parentNode ? 1 : 0; - if (one_of(getAlgorithm(), EltwiseMultiply, EltwiseDivide, EltwisePrelu)) { + if (one_of(getAlgorithm(), Algorithm::EltwiseMultiply, Algorithm::EltwiseDivide, Algorithm::EltwisePrelu)) { fillValuesFrom(getParentEdgesAtPort(constPort)[0]->getParent(), scales); - } else if (one_of(getAlgorithm(), EltwiseAdd, EltwiseSubtract)) { + } else if (one_of(getAlgorithm(), Algorithm::EltwiseAdd, Algorithm::EltwiseSubtract)) { fillValuesFrom(getParentEdgesAtPort(constPort)[0]->getParent(), shifts); - } else if (one_of(getAlgorithm(), EltwiseMulAdd)) { + } else if (one_of(getAlgorithm(), Algorithm::EltwiseMulAdd)) { fillValuesFrom(getParentEdgesAtPort(1)[0]->getParent(), scales); fillValuesFrom(getParentEdgesAtPort(2)[0]->getParent(), shifts); - } else if (one_of(getAlgorithm(), EltwisePowerStatic)) { - const auto power = dynamic_cast(this); + } else if (one_of(getAlgorithm(), Algorithm::EltwisePowerStatic)) { + const auto power = dynamic_cast(this); if (!power) { - IE_THROW() << "Cannot cast " << getName() << " to MKLDNNEltwiseNode"; + IE_THROW() << "Cannot cast " << getName() << " to Eltwise"; } scales.push_back(power->getBeta()); shifts.push_back(power->getGamma()); @@ -1352,20 +1360,20 @@ std::pair, std::vector> MKLDNNNode::getScalesAndShifts } switch (getAlgorithm()) { - case EltwiseAdd: { + case Algorithm::EltwiseAdd: { scales.resize(shifts.size(), 1.0f); break; } - case EltwiseSubtract: { + case Algorithm::EltwiseSubtract: { scales.resize(shifts.size(), 1.0f); std::transform(shifts.begin(), shifts.end(), shifts.begin(), [](float shift){ return -1.0f * shift; }); break; } - case EltwiseMultiply: { + case Algorithm::EltwiseMultiply: { shifts.resize(scales.size(), 0.0f); break; } - case EltwiseDivide: { + case Algorithm::EltwiseDivide: { shifts.resize(scales.size(), 0.0f); std::transform(scales.begin(), scales.end(), scales.begin(), [](float scale){ return 1.0f / scale; }); break; @@ -1376,21 +1384,21 @@ std::pair, std::vector> MKLDNNNode::getScalesAndShifts return {scales, shifts}; } -bool MKLDNNNode::isInputTensorAtPortEmpty(size_t port) const { +bool Node::isInputTensorAtPortEmpty(size_t port) const { if (inputShapes.size() <= port) { IE_THROW() << "Incorrect input port number for node " << getName(); } return getParentEdgesAtPort(port)[0]->getMemory().GetShape().hasZeroDims(); } -bool MKLDNNNode::isOutputTensorAtPortEmpty(size_t port) const { +bool Node::isOutputTensorAtPortEmpty(size_t port) const { if (outputShapes.size() <= port) { IE_THROW() << "Incorrect output port number for node " << getName(); } return getChildEdgesAtPort(port)[0]->getMemory().GetShape().hasZeroDims(); } -bool MKLDNNNode::hasEmptyInputTensors() const { +bool Node::hasEmptyInputTensors() const { for (size_t i = 0; i < getParentEdges().size(); i++) { if (isInputTensorAtPortEmpty(i)) return true; @@ -1398,7 +1406,7 @@ bool MKLDNNNode::hasEmptyInputTensors() const { return false; } -bool MKLDNNNode::hasEmptyOutputTensors() const { +bool Node::hasEmptyOutputTensors() const { for (size_t i = 0; i < outputShapes.size(); i++) { if (isOutputTensorAtPortEmpty(i)) return true; @@ -1406,7 +1414,7 @@ bool MKLDNNNode::hasEmptyOutputTensors() const { return false; } -bool MKLDNNNode::inputShapesDefined() const { +bool Node::inputShapesDefined() const { for (size_t i = 0; i < getParentEdges().size(); i++) { if (!getParentEdgesAtPort(i)[0]->getMemory().getDesc().isDefined()) { return false; @@ -1415,7 +1423,7 @@ bool MKLDNNNode::inputShapesDefined() const { return true; } -bool MKLDNNNode::outputShapesDefined() const { +bool Node::outputShapesDefined() const { for (size_t i = 0; i < outputShapes.size(); i++) { if (!getChildEdgesAtPort(i)[0]->getMemory().getDesc().isDefined()) { return false; @@ -1424,15 +1432,15 @@ bool MKLDNNNode::outputShapesDefined() const { return true; } -bool MKLDNNNode::shapesDefined() const { +bool Node::shapesDefined() const { return inputShapesDefined() && outputShapesDefined(); } -bool MKLDNNNode::needPrepareParams() const { +bool Node::needPrepareParams() const { return inputShapesModified(); } -bool MKLDNNNode::inputShapesModified() const { +bool Node::inputShapesModified() const { if (lastInputDims.size() != getParentEdges().size()) { if (lastInputDims.empty()) return true; @@ -1446,16 +1454,16 @@ bool MKLDNNNode::inputShapesModified() const { return false; } -bool MKLDNNNode::needShapeInfer() const { +bool Node::needShapeInfer() const { return inputShapesModified(); } -std::vector MKLDNNNode::shapeInfer() const { +std::vector Node::shapeInfer() const { return shapeInferGeneric(); } -std::vector MKLDNNNode::shapeInferGeneric(const std::vector& input_shapes, - uint32_t input_value_port_mask) const { +std::vector Node::shapeInferGeneric(const std::vector& input_shapes, + uint32_t input_value_port_mask) const { // collect input values std::map> input_values; if (input_value_port_mask) { @@ -1480,19 +1488,19 @@ std::vector MKLDNNNode::shapeInferGeneric(const std::vector output_shapes = shapeInference->infer(input_shapes, input_values); + std::vector output_shapes = shapeInference->infer(input_shapes, input_values); std::vector result(output_shapes.size()); - std::transform(output_shapes.begin(), output_shapes.end(), result.begin(), [](const ov::StaticShape& s) { + std::transform(output_shapes.begin(), output_shapes.end(), result.begin(), [](const StaticShape& s) { return s.to_shape(); }); return result; } -std::vector MKLDNNNode::shapeInferGeneric(const std::vector& shapes, +std::vector Node::shapeInferGeneric(const std::vector& shapes, uint32_t input_value_port_mask) const { - std::vector input_shapes; + std::vector input_shapes; input_shapes.reserve(shapes.size()); for (size_t i = 0; i < shapes.size(); i++) @@ -1501,8 +1509,8 @@ std::vector MKLDNNNode::shapeInferGeneric(const std::vector& return shapeInferGeneric(input_shapes, input_value_port_mask); } -std::vector MKLDNNNode::shapeInferGeneric(uint32_t input_value_port_mask) const { - std::vector input_shapes; +std::vector Node::shapeInferGeneric(uint32_t input_value_port_mask) const { + std::vector input_shapes; const auto & iranks = shapeInference->get_input_ranks(); input_shapes.reserve(iranks.size()); @@ -1518,7 +1526,7 @@ std::vector MKLDNNNode::shapeInferGeneric(uint32_t input_value_port_ return shapeInferGeneric(input_shapes, input_value_port_mask); } -void MKLDNNNode::updateLastInputDims() { +void Node::updateLastInputDims() { if (lastInputDims.size() != getParentEdges().size()) { if (!lastInputDims.empty()) IE_THROW() << "Input dims and parent edges number mismatch!"; @@ -1529,23 +1537,38 @@ void MKLDNNNode::updateLastInputDims() { lastInputDims[i] = getParentEdgesAtPort(i)[0]->getMemory().getStaticDims(); } -bool MKLDNNNode::canFuseSimpleOperation(const MKLDNNNodePtr& node) const { - if (node->getType() == FakeQuantize) { - bool ret = node->getAlgorithm() != FQBinarization; +bool Node::canFuseSimpleOperation(const NodePtr& node) const { + if (node->getType() == Type::FakeQuantize) { + bool ret = node->getAlgorithm() != Algorithm::FQBinarization; for (size_t i = 1; i < node->getParentEdges().size(); i++) { ret &= node->getParentEdgesAtPort(i)[0]->getParent()->getChildEdges().size() == 1; } return ret; - } else if (node->getType() == Eltwise) { + } else if (node->getType() == Type::Eltwise) { return one_of(node->getAlgorithm(), - EltwiseRelu, EltwiseGelu, EltwiseElu, EltwiseSigmoid, EltwiseClamp, EltwiseTanh, - EltwiseSwish, EltwiseHswish, EltwiseMish, EltwiseHsigmoid, EltwiseRoundHalfToEven, - EltwiseRoundHalfAwayFromZero, EltwiseAbs, EltwiseSqrt, EltwiseSoftRelu) || + Algorithm::EltwiseRelu, + Algorithm::EltwiseGelu, + Algorithm::EltwiseElu, + Algorithm::EltwiseSigmoid, + Algorithm::EltwiseClamp, + Algorithm::EltwiseTanh, + Algorithm::EltwiseSwish, + Algorithm::EltwiseHswish, + Algorithm::EltwiseMish, + Algorithm::EltwiseHsigmoid, + Algorithm::EltwiseRoundHalfToEven, + Algorithm::EltwiseRoundHalfAwayFromZero, + Algorithm::EltwiseAbs, + Algorithm::EltwiseSqrt, + Algorithm::EltwiseSoftRelu) || node->canBePerformedAsScaleShift(this); } return false; } -void MKLDNNNode::addFusedNode(const MKLDNNNodePtr &fusingNode) { +void Node::addFusedNode(const NodePtr &fusingNode) { fusedWith.push_back(fusingNode); } + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/node.h b/src/plugins/intel_cpu/src/node.h index 8b4747cb415..51f4079c841 100644 --- a/src/plugins/intel_cpu/src/node.h +++ b/src/plugins/intel_cpu/src/node.h @@ -13,7 +13,7 @@ #include #include "cpu_memory.h" #include "edge.h" -#include "descriptor.h" +#include "dnnl_descriptor.h" #include "selective_build.h" #include "mkldnn/iml_type_mapper.h" #include "extension_mngr.h" @@ -37,9 +37,9 @@ namespace ov { namespace intel_cpu { -using MKLDNNNodePtr = std::shared_ptr; -using MKLDNNNodeConstPtr = std::shared_ptr; -using MKLDNNNodeWeakPtr = std::weak_ptr; +using NodePtr = std::shared_ptr; +using NodeConstPtr = std::shared_ptr; +using NodeWeakPtr = std::weak_ptr; class PortConfigurator { public: @@ -94,10 +94,10 @@ private: impl_desc_type implementationType; }; -class MKLDNNNode { +class Node { public: - MKLDNNNode(const MKLDNNNode &) = delete; - MKLDNNNode & operator = (const MKLDNNNode &) = delete; + Node(const Node &) = delete; + Node & operator = (const Node &) = delete; using AttrPtr = std::shared_ptr; @@ -108,12 +108,12 @@ public: struct PerfCounters { PerfCounters(std::string const& name) : execute(openvino::itt::handle(name)) - , getSupportedDescriptors(openvino::itt::handle>("MKLDNNNode::getSupportedDescriptors")) - , initSupportedPrimitiveDescriptors(openvino::itt::handle>("MKLDNNNode::initSupportedPrimitiveDescriptors")) - , filterSupportedPrimitiveDescriptors(openvino::itt::handle>("MKLDNNNode::filterSupportedPrimitiveDescriptors")) - , selectOptimalPrimitiveDescriptor(openvino::itt::handle>("MKLDNNNode::selectOptimalPrimitiveDescriptor")) - , createPrimitive(openvino::itt::handle>("MKLDNNNode::createPrimitive")) - , initOptimalPrimitiveDescriptor(openvino::itt::handle>("MKLDNNNode::initOptimalPrimitiveDescriptor")) + , getSupportedDescriptors(openvino::itt::handle>("Node::getSupportedDescriptors")) + , initSupportedPrimitiveDescriptors(openvino::itt::handle>("Node::initSupportedPrimitiveDescriptors")) + , filterSupportedPrimitiveDescriptors(openvino::itt::handle>("Node::filterSupportedPrimitiveDescriptors")) + , selectOptimalPrimitiveDescriptor(openvino::itt::handle>("Node::selectOptimalPrimitiveDescriptor")) + , createPrimitive(openvino::itt::handle>("Node::createPrimitive")) + , initOptimalPrimitiveDescriptor(openvino::itt::handle>("Node::initOptimalPrimitiveDescriptor")) {} template @@ -138,27 +138,27 @@ public: class NodesFactory; static NodesFactory & factory(); - virtual ~MKLDNNNode() = default; + virtual ~Node() = default; - void addEdge(const MKLDNNEdgeWeakPtr& edge); - void removeEdge(const MKLDNNEdgeWeakPtr& edge); + void addEdge(const EdgeWeakPtr& edge); + void removeEdge(const EdgeWeakPtr& edge); virtual void cleanup(); void remove(); - const std::vector &getParentEdges() const noexcept { + const std::vector &getParentEdges() const noexcept { return parentEdges; } - const std::vector &getChildEdges() const noexcept { + const std::vector &getChildEdges() const noexcept { return childEdges; } - const MKLDNNEdgePtr getParentEdgeAt(size_t idx) const; - virtual const MKLDNNEdgePtr getChildEdgeAt(size_t idx) const; + const EdgePtr getParentEdgeAt(size_t idx) const; + virtual const EdgePtr getChildEdgeAt(size_t idx) const; - const std::vector getParentEdgesAtPort(size_t idx) const; - const std::vector getChildEdgesAtPort(size_t idx) const; + const std::vector getParentEdgesAtPort(size_t idx) const; + const std::vector getChildEdgesAtPort(size_t idx) const; bool isDropped() { return (isEdgesEmpty(childEdges) && isEdgesEmpty(parentEdges)); @@ -170,7 +170,7 @@ public: bool isInPlace(); - // must be called only after MKLDNNGraph::InitEdges() + // must be called only after Graph::InitEdges() virtual bool isExecutable() const { return !hasEmptyInputTensors(); } @@ -183,13 +183,13 @@ public: static void appendPostOpArgs(const mkldnn::primitive_attr& attr, std::unordered_map& primArgs, - const std::vector& postOpsArgs); + const std::vector& postOpsArgs); bool isFusedWith(Type type) const; - virtual void addFusedNode(const MKLDNNNodePtr &fusingNode); + virtual void addFusedNode(const NodePtr &fusingNode); - virtual void fuseInto(MKLDNNNodePtr& parentNode) { + virtual void fuseInto(NodePtr& parentNode) { // The graph supports fusing only of consecutive nodes and some graph logic requires to know through which input port a node was fused into parent one. for (int i = 0; i < getParentEdges().size(); i++) { if (getParentEdgesAtPort(i)[0]->getParent().get() == parentNode.get()) { @@ -220,15 +220,15 @@ public: fusedWith.clear(); } - void mergeWith(const MKLDNNNodePtr &merge) { + void mergeWith(const NodePtr &merge) { mergedWith.push_back(merge); } - const std::vector &getMergeWith() { + const std::vector &getMergeWith() { return mergedWith; } - const std::vector &getFusedWith() { + const std::vector &getFusedWith() { return fusedWith; } @@ -317,7 +317,7 @@ public: selectedPrimitiveDescriptorIndex = index; // Each primitive descriptor has its own InPlace status. So after new primitive descriptor selection - // we should reset InPlace type to definite new status for node using MKLDNNNode::isInPlace() + // we should reset InPlace type to definite new status for node using Node::isInPlace() inplace = InPlaceType::Unknown; } @@ -352,7 +352,7 @@ public: const std::vector& outputDesc) {} virtual void initDescriptor(const NodeConfig& config); virtual bool created() const = 0; - virtual bool created(const MKLDNNExtensionManager::Ptr& extMgr) { + virtual bool created(const ExtensionManager::Ptr& extMgr) { return created(); } @@ -422,11 +422,11 @@ public: this->typeStr = typeStr; } - virtual size_t descInputNumbers(MKLDNNDescriptor desc) { + virtual size_t descInputNumbers(DnnlDesriptor desc) { return desc.inputNumbers(); } - virtual size_t descOutputNumbers(MKLDNNDescriptor desc) { + virtual size_t descOutputNumbers(DnnlDesriptor desc) { return desc.outputNumbers(); } @@ -515,7 +515,7 @@ public: algorithm = alg; } - virtual bool canFuse(const MKLDNNNodePtr& node) const { + virtual bool canFuse(const NodePtr& node) const { return false; } @@ -523,7 +523,7 @@ public: isInQuantizedGraph = flag; } - bool canBePerformedAsScaleShift(const MKLDNNNode *parentNode = nullptr) const; + bool canBePerformedAsScaleShift(const Node *parentNode = nullptr) const; bool isDynamicNode() const { return isDynamic; @@ -555,24 +555,24 @@ public: * node from which data comes * @return pair of scales and shifts */ - std::pair, std::vector> getScalesAndShifts(const MKLDNNNode *parentNode) const; + std::pair, std::vector> getScalesAndShifts(const Node *parentNode) const; /** * @brief Appends new item into ops list with the information on how the node should be executed as post operation. * Seed node should call this routine and pass its post operations list as parameter. * @param ops List of fused post operations */ - virtual void appendPostOps(mkldnn::post_ops& ops, const VectorDims& postOpDims, std::vector& postOpsMem); + virtual void appendPostOps(mkldnn::post_ops& ops, const VectorDims& postOpDims, std::vector& postOpsMem); virtual void appendPostOps(mkldnn::post_ops& ops, const VectorDims& postOpDims, std::vector& postOpsMem); - virtual void appendBinPostOps(mkldnn::post_ops& ops, const VectorDims& postOpDims, std::vector& binaryPostOpsMem); + virtual void appendBinPostOps(mkldnn::post_ops& ops, const VectorDims& postOpDims, std::vector& binaryPostOpsMem); void setRuntimeCache(MultiCachePtr cache) { rtParamsCache = cache; } protected: - bool canFuseSimpleOperation(const MKLDNNNodePtr& node) const; + bool canFuseSimpleOperation(const NodePtr& node) const; void setType(Type type) { this->type = type; @@ -595,8 +595,8 @@ protected: std::vector inputShapes; std::vector outputShapes; - std::vector fusedWith; - std::vector mergedWith; + std::vector fusedWith; + std::vector mergedWith; std::vector implPriorities; std::vector inputMemoryFormatsFilter; std::vector outputMemoryFormatsFilter; @@ -604,8 +604,8 @@ protected: std::string originalLayers; // contains names of the original layers separated by comma - MKLDNNNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &w_cache); - MKLDNNNode(const std::string& type, const std::string& name, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &w_cache); + Node(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &w_cache); + Node(const std::string& type, const std::string& name, const mkldnn::engine& eng, WeightsSharing::Ptr &w_cache); int selectedPrimitiveDescriptorIndex = -1; bool permanent = false; @@ -624,22 +624,22 @@ protected: InPlaceType inplace = InPlaceType::Unknown; ConstantType constant = ConstantType::Unknown; std::vector internalBlobs; - std::vector internalBlobMemory; + std::vector internalBlobMemory; std::vector supportedPrimitiveDescriptors; std::unordered_map primArgs; - std::vector postOpsArgs; - MKLDNNPrimitive prim; - std::vector descs; + std::vector postOpsArgs; + Primitive prim; + std::vector descs; - MKLDNNWeightsSharing::Ptr weightCache; + WeightsSharing::Ptr weightCache; Algorithm algorithm = Algorithm::Default; bool isInQuantizedGraph = false; - friend class MKLDNNEdge; - friend class MKLDNNGraph; - friend class MKLDNNGraphOptimizer; + friend class Edge; + friend class Graph; + friend class GraphOptimizer; void selectPreferPrimitiveDescriptor(const std::vector& priority, bool ignoreConstInputs); bool isConfigDefined(const NodeConfig &config) const; @@ -745,8 +745,8 @@ protected: std::shared_ptr shapeInference; private: - std::vector parentEdges; - std::vector childEdges; + std::vector parentEdges; + std::vector childEdges; std::vector originalInputPrecisions; std::vector originalOutputPrecisions; @@ -767,11 +767,11 @@ private: MultiCachePtr rtParamsCache; - bool isEdgesEmpty(const std::vector& edges) const; + bool isEdgesEmpty(const std::vector& edges) const; template typename std::enable_if::value, PD>::type - createPd(MKLDNNDescriptor desc) { + createPd(DnnlDesriptor desc) { std::shared_ptr selected_desc_ptr = desc; std::shared_ptr backward_prim_desc_ptr = desc; return PD(*selected_desc_ptr, engine, *backward_prim_desc_ptr); @@ -779,15 +779,15 @@ private: template typename std::enable_if::value, PD>::type - createPd(MKLDNNDescriptor desc) { + createPd(DnnlDesriptor desc) { std::shared_ptr selected_desc_ptr = desc; return PD(*selected_desc_ptr, engine); } enum LOOK { LOOK_UP = 1, LOOK_DOWN = 2 }; - ConstantType checkConstant(LOOK look, std::vector& checkNodes); + ConstantType checkConstant(LOOK look, std::vector& checkNodes); - std::vector shapeInferGeneric(const std::vector& input_shapes, + std::vector shapeInferGeneric(const std::vector& input_shapes, uint32_t input_value_port_mask) const; #ifdef CPU_DEBUG_CAPS @@ -804,26 +804,24 @@ constexpr uint64_t PortMask(int n, T... rest) { return PortMask(rest...) | (1 << n); } -class MKLDNNNode::NodesFactory : public openvino::cc::Factory& op, - const mkldnn::engine &, - MKLDNNWeightsSharing::Ptr &)> { +class Node::NodesFactory : public openvino::cc::Factory& op, + const mkldnn::engine &, + WeightsSharing::Ptr &)> { public: NodesFactory(); - MKLDNNNode* create(const std::shared_ptr& op, const mkldnn::engine& eng, - const MKLDNNExtensionManager::Ptr& extMgr, MKLDNNWeightsSharing::Ptr &w_cache); + Node* create(const std::shared_ptr& op, const mkldnn::engine& eng, + const ExtensionManager::Ptr& extMgr, WeightsSharing::Ptr &w_cache); }; -template -struct MKLDNNNodeImpl : public MKLDNNNodeType { - MKLDNNNodeImpl(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) - : MKLDNNNodeType(op, eng, cache) { - MKLDNNNodeType::perfCounters().template buildClassCounters(NameFromType(MKLDNNNodeType::getType())); +template +struct NodeImpl : public NodeType { + NodeImpl(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache) + : NodeType(op, eng, cache) { + NodeType::perfCounters().template buildClassCounters(NameFromType(NodeType::getType())); } }; } // namespace intel_cpu } // namespace ov - -#define REG_MKLDNN_PRIM_FOR(__prim, __type) diff --git a/src/plugins/intel_cpu/src/nodes/adaptive_pooling.cpp b/src/plugins/intel_cpu/src/nodes/adaptive_pooling.cpp index 1bf8d92ffc9..3c279fba15d 100644 --- a/src/plugins/intel_cpu/src/nodes/adaptive_pooling.cpp +++ b/src/plugins/intel_cpu/src/nodes/adaptive_pooling.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include @@ -16,12 +16,15 @@ #include #include -using namespace ov::intel_cpu; using namespace InferenceEngine; using namespace mkldnn; using namespace mkldnn::impl::cpu::x64; -bool MKLDNNAdaptivePoolingNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +namespace ov { +namespace intel_cpu { +namespace node { + +bool AdaptivePooling::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { if (one_of(op->get_type_info(), ngraph::op::v8::AdaptiveAvgPool::get_type_info_static())) { auto adaPool = std::dynamic_pointer_cast(op); @@ -45,8 +48,8 @@ bool MKLDNNAdaptivePoolingNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { +AdaptivePooling::AdaptivePooling(const std::shared_ptr& op, const mkldnn::engine& eng, + WeightsSharing::Ptr &cache) : Node(op, eng, cache) { std::string errorMessage; if (isSupportedOperation(op, errorMessage)) { errorPrefix = "Adaptive Pooling layer with name '" + getName() + "' "; @@ -62,13 +65,13 @@ MKLDNNAdaptivePoolingNode::MKLDNNAdaptivePoolingNode(const std::shared_ptr(getParentEdgesAtPort(1)[0]->getMemoryPtr()->GetPtr()); for (size_t i = 0; i < spatialDimsCount; i++) { if (spatialDimsValue[i] != newSpatialDimsPtr[i]) return true; } - return MKLDNNNode::needShapeInfer(); + return Node::needShapeInfer(); } -std::vector MKLDNNAdaptivePoolingNode::shapeInfer() const { +std::vector AdaptivePooling::shapeInfer() const { const auto inputDims = getParentEdgesAtPort(0)[0]->getMemory().GetShape().getStaticDims(); const auto spatialDims = getParentEdgesAtPort(1)[0]->getMemory().GetShape().getStaticDims(); const auto inputRank = inputDims.size(); @@ -113,7 +116,7 @@ std::vector MKLDNNAdaptivePoolingNode::shapeInfer() const { return result; } -void MKLDNNAdaptivePoolingNode::initSupportedPrimitiveDescriptors() { +void AdaptivePooling::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -145,11 +148,11 @@ void MKLDNNAdaptivePoolingNode::initSupportedPrimitiveDescriptors() { } } -void MKLDNNAdaptivePoolingNode::executeDynamicImpl(mkldnn::stream strm) { +void AdaptivePooling::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -void MKLDNNAdaptivePoolingNode::execute(mkldnn::stream strm) { +void AdaptivePooling::execute(mkldnn::stream strm) { auto inputPrec = getParentEdgeAt(0)->getMemory().GetDataType(); auto outputPrec = getChildEdgeAt(0)->getMemory().GetDataType(); if (!(inputPrec == mkldnn_f32 && outputPrec == mkldnn_f32)) @@ -283,13 +286,15 @@ void MKLDNNAdaptivePoolingNode::execute(mkldnn::stream strm) { }}); } -bool MKLDNNAdaptivePoolingNode::created() const { - return getType() == AdaptivePooling; +bool AdaptivePooling::created() const { + return getType() == Type::AdaptivePooling; } -inline void MKLDNNAdaptivePoolingNode::setBinBorders(size_t *startPtr, size_t *endPtr, size_t idx, size_t inputLength, size_t outputLength) { +inline void AdaptivePooling::setBinBorders(size_t *startPtr, size_t *endPtr, size_t idx, size_t inputLength, size_t outputLength) { *(startPtr) = idx * inputLength / outputLength; *(endPtr) = ceil(static_cast((idx + 1) * inputLength) / outputLength); } -REG_MKLDNN_PRIM_FOR(MKLDNNAdaptivePoolingNode, AdaptivePooling) +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/adaptive_pooling.h b/src/plugins/intel_cpu/src/nodes/adaptive_pooling.h index 8967ffbd84e..59fc2612302 100644 --- a/src/plugins/intel_cpu/src/nodes/adaptive_pooling.h +++ b/src/plugins/intel_cpu/src/nodes/adaptive_pooling.h @@ -8,14 +8,15 @@ #include #include #include -#include +#include namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNAdaptivePoolingNode : public MKLDNNNode { +class AdaptivePooling : public Node { public: - MKLDNNAdaptivePoolingNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + AdaptivePooling(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; @@ -39,5 +40,6 @@ protected: void executeDynamicImpl(mkldnn::stream strm) override; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/batch_to_space.cpp b/src/plugins/intel_cpu/src/nodes/batch_to_space.cpp index ba0a07a37ad..965378565a3 100644 --- a/src/plugins/intel_cpu/src/nodes/batch_to_space.cpp +++ b/src/plugins/intel_cpu/src/nodes/batch_to_space.cpp @@ -11,10 +11,13 @@ #include #include -using namespace ov::intel_cpu; using namespace InferenceEngine; -bool MKLDNNBatchToSpaceNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +namespace ov { +namespace intel_cpu { +namespace node { + +bool BatchToSpace::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { const auto batchToSpace = std::dynamic_pointer_cast(op); if (!batchToSpace) { @@ -33,8 +36,8 @@ bool MKLDNNBatchToSpaceNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { +BatchToSpace::BatchToSpace(const std::shared_ptr& op, const mkldnn::engine& eng, + WeightsSharing::Ptr &cache) : Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -56,7 +59,7 @@ MKLDNNBatchToSpaceNode::MKLDNNBatchToSpaceNode(const std::shared_ptr(op->get_input_node_shared_ptr(2))->cast_vector(); } -void MKLDNNBatchToSpaceNode::initSupportedPrimitiveDescriptors() { +void BatchToSpace::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -96,8 +99,8 @@ void MKLDNNBatchToSpaceNode::initSupportedPrimitiveDescriptors() { } } -std::vector MKLDNNBatchToSpaceNode::shapeInfer() const { - return MKLDNNNode::shapeInferGeneric(PortMask(1, 2, 3)); +std::vector BatchToSpace::shapeInfer() const { + return Node::shapeInferGeneric(PortMask(1, 2, 3)); } static std::vector getShape5D(const SizeVector &shape) { @@ -111,7 +114,7 @@ static std::vector getShape5D(const SizeVector &shape) { } template -void MKLDNNBatchToSpaceNode::batchToSpaceKernel() { +void BatchToSpace::batchToSpaceKernel() { const auto *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); auto *dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); @@ -229,11 +232,11 @@ void MKLDNNBatchToSpaceNode::batchToSpaceKernel() { }); } -void MKLDNNBatchToSpaceNode::executeDynamicImpl(mkldnn::stream strm) { +void BatchToSpace::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -void MKLDNNBatchToSpaceNode::execute(mkldnn::stream strm) { +void BatchToSpace::execute(mkldnn::stream strm) { switch (getParentEdgeAt(0)->getMemory().getDesc().getPrecision().size()) { case 1: batchToSpaceKernel::value_type>(); break; case 2: batchToSpaceKernel::value_type>(); break; @@ -244,8 +247,10 @@ void MKLDNNBatchToSpaceNode::execute(mkldnn::stream strm) { } } -bool MKLDNNBatchToSpaceNode::created() const { - return getType() == BatchToSpace; +bool BatchToSpace::created() const { + return getType() == Type::BatchToSpace; } -REG_MKLDNN_PRIM_FOR(MKLDNNBatchToSpaceNode, BatchToSpace) +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/batch_to_space.h b/src/plugins/intel_cpu/src/nodes/batch_to_space.h index 199aa289af1..33ce1100ab9 100644 --- a/src/plugins/intel_cpu/src/nodes/batch_to_space.h +++ b/src/plugins/intel_cpu/src/nodes/batch_to_space.h @@ -12,10 +12,11 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNBatchToSpaceNode : public MKLDNNNode { +class BatchToSpace : public Node { public: - MKLDNNBatchToSpaceNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + BatchToSpace(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; @@ -39,5 +40,6 @@ private: std::string errorPrefix; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/bin_conv.cpp b/src/plugins/intel_cpu/src/nodes/bin_conv.cpp index fa6854fac7a..bdac1041ae1 100644 --- a/src/plugins/intel_cpu/src/nodes/bin_conv.cpp +++ b/src/plugins/intel_cpu/src/nodes/bin_conv.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include "ie_parallel.hpp" #include "cpu/x64/jit_generator.hpp" #include "cpu/x64/injectors/jit_uni_eltwise_injector.hpp" @@ -31,7 +31,6 @@ # endif #endif -using namespace ov::intel_cpu; using namespace InferenceEngine; using namespace mkldnn; using namespace mkldnn::impl; @@ -40,6 +39,10 @@ using namespace mkldnn::impl::cpu::x64; using namespace mkldnn::impl::utils; using namespace Xbyak; +namespace ov { +namespace intel_cpu { +namespace node { + #define GET_OFF(field) offsetof(jit_bin_conv_call_args, field) template @@ -103,13 +106,13 @@ struct jit_uni_bin_conv_kernel_f32 : public jit_uni_bin_conv_kernel, public jit_ solve_common(1, jcp_.oc_block); sub(reg_oc_work, jcp_.oc_block); - add(reg_kernel_base, jcp_.oc_block * jcp_.nb_ic * jcp_.kh * jcp_.kw * ov::intel_cpu::div_up(jcp_.ic_block, nbits) * jcp_.typesize_in); + add(reg_kernel_base, jcp_.oc_block * jcp_.nb_ic * jcp_.kh * jcp_.kw * div_up(jcp_.ic_block, nbits) * jcp_.typesize_in); if (jcp_.with_dw_conv) { add(reg_output_base, jcp_.oc_block * jcp_dw_conv_.kh * jcp_.ow * jcp_.typesize_out); } else { if (jcp_.with_binarization) - add(reg_output_base, ov::intel_cpu::div_up(jcp_.oc_block, nbits) * jcp_.typesize_out); + add(reg_output_base, div_up(jcp_.oc_block, nbits) * jcp_.typesize_out); else add(reg_output_base, jcp_.oc_block * jcp_.typesize_out); } @@ -315,16 +318,16 @@ private: int nbits = 8; for (int ki = 0; ki < kw; ki++) { - int jj_start = nstl::max(0, ov::intel_cpu::div_up(pad_l - ki * dilate_w, stride_w)); - int jj_end = ur_w - nstl::max(0, ov::intel_cpu::div_up(ki*dilate_w+pad_r-(kw-1)*dilate_w, stride_w)); + int jj_start = nstl::max(0, div_up(pad_l - ki * dilate_w, stride_w)); + int jj_end = ur_w - nstl::max(0, div_up(ki*dilate_w+pad_r-(kw-1)*dilate_w, stride_w)); int _start = (!jcp_.exclude_pad) ? 0 : jj_start; int _end = (!jcp_.exclude_pad) ? ur_w : jj_end; for (int ifm2 = 0; ifm2 < ic_blocks; ifm2++) { for (int jj = _start; jj < _end; jj++) { - int inp_off = ((ki*dilate_w + jj*stride_w - pad_l)*ov::intel_cpu::div_up(jcp_.ic, nbits) + - ifm2 * ov::intel_cpu::div_up(ic_blk, nbits)) * jcp_.typesize_in; + int inp_off = ((ki*dilate_w + jj*stride_w - pad_l)*div_up(jcp_.ic, nbits) + + ifm2 * div_up(ic_blk, nbits)) * jcp_.typesize_in; if (h_padded || jj < jj_start || jj >= jj_end) { uni_vmovups(vmm_src, ptr[reg_table + 8 * vlen]); @@ -334,10 +337,10 @@ private: for (int r = 0; r < repeats; r++) { for (int ii = 0; ii < oc_blocks; ii++) { - int ker_off = (ifm2 * kh * kw * ov::intel_cpu::div_up(ic_blk, nbits) * oc_blk - + ii * jcp_.nb_ic * ov::intel_cpu::div_up(ic_blk, nbits) * kh * kw * oc_blk - + ki * ov::intel_cpu::div_up(ic_blk, nbits) * oc_blk - + r * ov::intel_cpu::div_up(ic_blk, nbits) * (oc_blk / 2)) * jcp_.typesize_in; + int ker_off = (ifm2 * kh * kw * div_up(ic_blk, nbits) * oc_blk + + ii * jcp_.nb_ic * div_up(ic_blk, nbits) * kh * kw * oc_blk + + ki * div_up(ic_blk, nbits) * oc_blk + + r * div_up(ic_blk, nbits) * (oc_blk / 2)) * jcp_.typesize_in; uni_vmovups(vmm_tmp, ptr[aux1_reg_kernel + ker_off]); @@ -393,7 +396,7 @@ private: int kw = jcp_.kw; int nbits = 8; - int inp_mult = ov::intel_cpu::div_up(jcp_.ic_block, nbits); + int inp_mult = div_up(jcp_.ic_block, nbits); int out_mult = jcp_.oc_block; Label icb_main_loop; @@ -427,7 +430,7 @@ private: int dilate_h = jcp_.dilate_h + 1; int nbits = 8; - const int inp_mult = dilate_h * ov::intel_cpu::div_up(jcp_.ic, nbits); + const int inp_mult = dilate_h * div_up(jcp_.ic, nbits); Label t_overflow_label, no_t_overflow_label, b_overflow_label, no_b_overflow_label; @@ -447,7 +450,7 @@ private: L(t_overflow_label); { oh_step_unroll_kw(ur_w, pad_l, pad_r, oc_blocks, oc_step, true); - add(aux_reg_kernel, jcp_.typesize_in * kw * jcp_.oc_block * ov::intel_cpu::div_up(jcp_.ic_block, nbits)); + add(aux_reg_kernel, jcp_.typesize_in * kw * jcp_.oc_block * div_up(jcp_.ic_block, nbits)); dec(reg_overflow); cmp(reg_overflow, 0); jg(t_overflow_label, T_NEAR); @@ -468,7 +471,7 @@ private: { oh_step_unroll_kw(ur_w, pad_l, pad_r, oc_blocks, oc_step, false); - add(aux_reg_kernel, jcp_.typesize_in * kw * jcp_.oc_block * ov::intel_cpu::div_up(jcp_.ic_block, nbits)); + add(aux_reg_kernel, jcp_.typesize_in * kw * jcp_.oc_block * div_up(jcp_.ic_block, nbits)); add(aux_reg_input, jcp_.typesize_in * iw * inp_mult); dec(reg_kh); @@ -485,7 +488,7 @@ private: L(b_overflow_label); { oh_step_unroll_kw(ur_w, pad_l, pad_r, oc_blocks, oc_step, true); - add(aux_reg_kernel, jcp_.typesize_in * kw * jcp_.oc_block * ov::intel_cpu::div_up(jcp_.ic_block, nbits)); + add(aux_reg_kernel, jcp_.typesize_in * kw * jcp_.oc_block * div_up(jcp_.ic_block, nbits)); dec(reg_overflow); cmp(reg_overflow, 0); jg(b_overflow_label, T_NEAR); @@ -528,8 +531,8 @@ private: kw_padding[jj] = 0; for (int ki = 0; ki < jcp_.kw; ki++) { - int jj_start = nstl::max(0, ov::intel_cpu::div_up(pad_l - ki * (jcp_.dilate_w + 1), jcp_.stride_w)); - int jj_end = ur_w - nstl::max(0, ov::intel_cpu::div_up(ki * (jcp_.dilate_w + 1) + pad_r - + int jj_start = nstl::max(0, div_up(pad_l - ki * (jcp_.dilate_w + 1), jcp_.stride_w)); + int jj_end = ur_w - nstl::max(0, div_up(ki * (jcp_.dilate_w + 1) + pad_r - (jcp_.kw - 1) * (jcp_.dilate_w + 1), jcp_.stride_w)); for (int jj = jj_start; jj < jj_end; jj++) { kw_padding[jj]++; @@ -677,10 +680,10 @@ private: if (r == repeats - 1) { if (isa == x64::avx512_common && oc_step > nbits) { - const size_t o_off = (2 * ii + jj * ov::intel_cpu::div_up(jcp_.oc, nbits)); + const size_t o_off = (2 * ii + jj * div_up(jcp_.oc, nbits)); mov(ptr[reg_output + o_off * jcp_.typesize_out], reg_tmp_16); } else { - const size_t o_off = (ii + jj * ov::intel_cpu::div_up(jcp_.oc, nbits)); + const size_t o_off = (ii + jj * div_up(jcp_.oc, nbits)); mov(ptr[reg_output + o_off * jcp_.typesize_out], reg_tmp_8); } } @@ -754,8 +757,8 @@ private: int str_w = jcp_.stride_w; int nbits = 8; - const int inp_mult = ov::intel_cpu::div_up(jcp_.ic, nbits); - const int out_mult = jcp_.with_dw_conv ? jcp_.oc_block : jcp_.with_binarization ? ov::intel_cpu::div_up(jcp_.oc, nbits) : jcp_.oc; + const int inp_mult = div_up(jcp_.ic, nbits); + const int out_mult = jcp_.with_dw_conv ? jcp_.oc_block : jcp_.with_binarization ? div_up(jcp_.oc, nbits) : jcp_.oc; int l_pad = jcp_.l_pad; int r_pad = nstl::max(0, (jcp_.ow - 1) * str_w + (kw - 1) * dilate_w @@ -872,7 +875,7 @@ private: } }; -bool MKLDNNBinaryConvolutionNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool BinaryConvolution::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { if (isDynamicNgraphNode(op)) { errorMessage = "Doesn't support op with dynamic shapes"; @@ -894,9 +897,9 @@ bool MKLDNNBinaryConvolutionNode::isSupportedOperation(const std::shared_ptr& op, - const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) - : MKLDNNNode(op, eng, cache) { +BinaryConvolution::BinaryConvolution(const std::shared_ptr& op, + const mkldnn::engine& eng, WeightsSharing::Ptr &cache) + : Node(op, eng, cache) { std::string errorMessage; if (isSupportedOperation(op, errorMessage)) { errorPrefix = "BinaryConvolution node with name '" + getName() + "' "; @@ -926,15 +929,15 @@ MKLDNNBinaryConvolutionNode::MKLDNNBinaryConvolutionNode(const std::shared_ptr(fusedWith[i].get()); + auto *eltwiseNode = dynamic_cast(fusedWith[i].get()); if (eltwiseNode && eltwiseNode->isSpecialConvolutionAddFusing()) { withSum = true; expectedInputEdgesNum++; @@ -960,7 +963,7 @@ void MKLDNNBinaryConvolutionNode::getSupportedDescriptors() { } } -void MKLDNNBinaryConvolutionNode::initSupportedPrimitiveDescriptors() { +void BinaryConvolution::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -1015,7 +1018,7 @@ void MKLDNNBinaryConvolutionNode::initSupportedPrimitiveDescriptors() { } } -void MKLDNNBinaryConvolutionNode::createPrimitive() { +void BinaryConvolution::createPrimitive() { auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor(); if (!selectedPrimitiveDescriptor) IE_THROW() << "CPU binary convolution with name '" << getName() << "' doesn't have primitive descriptors."; @@ -1079,7 +1082,7 @@ void MKLDNNBinaryConvolutionNode::createPrimitive() { auto srcPrecision = getParentEdgeAt(0)->getMemory().getDesc().getPrecision(); auto dstPrecision = getChildEdgeAt(0)->getMemory().getDesc().getPrecision(); - jcp.dst_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(dstPrecision); + jcp.dst_dt = DnnlExtensionUtils::IEPrecisionToDataType(dstPrecision); jcp.typesize_in = srcPrecision == Precision::BIN ? 1 : srcPrecision.size(); jcp.typesize_out = dstPrecision == Precision::BIN ? 1 : dstPrecision.size(); @@ -1102,16 +1105,16 @@ void MKLDNNBinaryConvolutionNode::createPrimitive() { bin_conv_kernel->create_ker(); } -bool MKLDNNBinaryConvolutionNode::canFuse(const MKLDNNNodePtr& node) const { +bool BinaryConvolution::canFuse(const NodePtr& node) const { if (implType == impl_desc_type::ref) return false; // Binarization have to be last operation in fusing chain - if (isFusedWith(FakeQuantize)) + if (isFusedWith(Type::FakeQuantize)) return false; - if (node->getType() == FakeQuantize) { - bool ret = node->getAlgorithm() == FQBinarization; + if (node->getType() == Type::FakeQuantize) { + bool ret = node->getAlgorithm() == Algorithm::FQBinarization; for (size_t i = 1; i < node->getParentEdges().size(); i++) { ret &= node->getParentEdgesAtPort(i)[0]->getParent()->getChildEdges().size() == 1; } @@ -1121,12 +1124,12 @@ bool MKLDNNBinaryConvolutionNode::canFuse(const MKLDNNNodePtr& node) const { } } -void MKLDNNBinaryConvolutionNode::setPostOps(mkldnn::primitive_attr &attr) { +void BinaryConvolution::setPostOps(mkldnn::primitive_attr &attr) { mkldnn::post_ops ops; postOpsDataPtrs.clear(); for (auto &node : fusedWith) { - auto* eltwiseNode = dynamic_cast(node.get()); + auto* eltwiseNode = dynamic_cast(node.get()); if (eltwiseNode) { if (eltwiseNode->isSpecialConvolutionAddFusing()) { ops.append_sum(1.0); @@ -1137,7 +1140,7 @@ void MKLDNNBinaryConvolutionNode::setPostOps(mkldnn::primitive_attr &attr) { continue; } - auto* fakeQuantizeNode = dynamic_cast(node.get()); + auto* fakeQuantizeNode = dynamic_cast(node.get()); if (fakeQuantizeNode) { fakeQuantizeNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims(), postOpsDataPtrs); continue; @@ -1149,13 +1152,13 @@ void MKLDNNBinaryConvolutionNode::setPostOps(mkldnn::primitive_attr &attr) { attr.set_post_ops(ops); } -void MKLDNNBinaryConvolutionNode::executeOptimized(const uint8_t* src, const uint8_t* weights, uint8_t* dst, +void BinaryConvolution::executeOptimized(const uint8_t* src, const uint8_t* weights, uint8_t* dst, const std::vector& s_str, const std::vector& w_str, const std::vector& d_str) { auto dst_f32 = reinterpret_cast(dst); const int MB = jcp.mb; - int ocb_work = ov::intel_cpu::div_up(jcp.nb_oc, jcp.nb_oc_blocking); + int ocb_work = div_up(jcp.nb_oc, jcp.nb_oc_blocking); int nbits = 8; parallel_for4d(MB, jcp.ngroups, ocb_work, jcp.oh, [&](int n, int g, int ocbb, int oh) { @@ -1165,8 +1168,8 @@ void MKLDNNBinaryConvolutionNode::executeOptimized(const uint8_t* src, const uin auto par_conv = jit_bin_conv_call_args(); const int ij = oh * jcp.stride_h; - const int i_t_overflow = nstl::min(jcp.kh, ov::intel_cpu::div_up(nstl::max(0, jcp.t_pad - ij), (jcp.dilate_h+1))); - const int i_b_overflow = nstl::min(jcp.kh, ov::intel_cpu::div_up(nstl::max(jcp.ih, ij + (jcp.kh-1) * (jcp.dilate_h+1) - + const int i_t_overflow = nstl::min(jcp.kh, div_up(nstl::max(0, jcp.t_pad - ij), (jcp.dilate_h+1))); + const int i_b_overflow = nstl::min(jcp.kh, div_up(nstl::max(jcp.ih, ij + (jcp.kh-1) * (jcp.dilate_h+1) - jcp.t_pad+1) - jcp.ih, (jcp.dilate_h + 1))); const size_t _oc = g * jcp.nb_oc + ocb; @@ -1199,7 +1202,7 @@ void MKLDNNBinaryConvolutionNode::executeOptimized(const uint8_t* src, const uin }); } -void MKLDNNBinaryConvolutionNode::executeReference(const uint8_t* src, const uint8_t* weights, uint8_t* dst, +void BinaryConvolution::executeReference(const uint8_t* src, const uint8_t* weights, uint8_t* dst, const std::vector& s_str, const std::vector& w_str, const std::vector& d_str) { auto dst_fp = reinterpret_cast(dst); @@ -1276,12 +1279,12 @@ void MKLDNNBinaryConvolutionNode::executeReference(const uint8_t* src, const uin const int i_left_overflow = nstl::max(0, (padL - ow * KSW)); const int i_right_overflow = nstl::max(IW, (ow * KSW + (KW - 1) * (KDW + 1) - padL + 1)) - IW; const int kw_padding = - KW - ov::intel_cpu::div_up(i_left_overflow, (KDW + 1)) - ov::intel_cpu::div_up(i_right_overflow, (KDW + 1)); + KW - div_up(i_left_overflow, (KDW + 1)) - div_up(i_right_overflow, (KDW + 1)); const int i_top_overflow = nstl::max(0, (padT - oh * KSH)); const int i_bottom_overflow = nstl::max(IH, (oh * KSH + (KH - 1) * (KDH + 1) - padT + 1)) - IH; const int kh_padding = - KH - ov::intel_cpu::div_up(i_top_overflow, (KDH + 1)) - ov::intel_cpu::div_up(i_bottom_overflow, (KDH + 1)); + KH - div_up(i_top_overflow, (KDH + 1)) - div_up(i_bottom_overflow, (KDH + 1)); base_value = IC * kh_padding * kw_padding; } else { @@ -1294,7 +1297,7 @@ void MKLDNNBinaryConvolutionNode::executeReference(const uint8_t* src, const uin }); } -void MKLDNNBinaryConvolutionNode::execute(mkldnn::stream strm) { +void BinaryConvolution::execute(mkldnn::stream strm) { auto &srcMemory = getParentEdgeAt(0)->getMemoryPtr(); auto &weightsMemory = getParentEdgeAt(1)->getMemoryPtr(); auto &dstMemory = getChildEdgeAt(0)->getMemoryPtr(); @@ -1333,8 +1336,10 @@ void MKLDNNBinaryConvolutionNode::execute(mkldnn::stream strm) { } } -bool MKLDNNBinaryConvolutionNode::created() const { - return getType() == BinaryConvolution; +bool BinaryConvolution::created() const { + return getType() == Type::BinaryConvolution; } -REG_MKLDNN_PRIM_FOR(MKLDNNBinaryConvolutionNode, BinaryConvolution); +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/bin_conv.h b/src/plugins/intel_cpu/src/nodes/bin_conv.h index 0106c82486d..c0f40a0f2f0 100644 --- a/src/plugins/intel_cpu/src/nodes/bin_conv.h +++ b/src/plugins/intel_cpu/src/nodes/bin_conv.h @@ -12,6 +12,7 @@ namespace ov { namespace intel_cpu { +namespace node { struct jit_bin_conv_params { int mb; @@ -74,9 +75,9 @@ struct jit_uni_bin_conv_kernel { const mkldnn_primitive_attr &attr_; }; -class MKLDNNBinaryConvolutionNode : public MKLDNNNode { +class BinaryConvolution : public Node { public: - MKLDNNBinaryConvolutionNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + BinaryConvolution(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override; void createPrimitive() override; @@ -87,7 +88,7 @@ public: return false; } void setPostOps(mkldnn::primitive_attr &attr); - bool canFuse(const MKLDNNNodePtr& node) const override; + bool canFuse(const NodePtr& node) const override; static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; @@ -122,5 +123,6 @@ private: std::string errorPrefix; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/broadcast.cpp b/src/plugins/intel_cpu/src/nodes/broadcast.cpp index b5b52918e1e..3289068fd2b 100644 --- a/src/plugins/intel_cpu/src/nodes/broadcast.cpp +++ b/src/plugins/intel_cpu/src/nodes/broadcast.cpp @@ -14,10 +14,13 @@ #include #include "common/cpu_memcpy.h" -using namespace ov::intel_cpu; using namespace InferenceEngine; -bool MKLDNNBroadcastNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +namespace ov { +namespace intel_cpu { +namespace node { + +bool Broadcast::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { if (!ov::is_type(op)) { errorMessage = "Only Broadcast operations from opset1 are supported."; @@ -46,8 +49,8 @@ bool MKLDNNBroadcastNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { +Broadcast::Broadcast(const std::shared_ptr& op, const mkldnn::engine& eng, + WeightsSharing::Ptr &cache) : Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -81,7 +84,7 @@ MKLDNNBroadcastNode::MKLDNNBroadcastNode(const std::shared_ptr& op, co } } -void MKLDNNBroadcastNode::getSupportedDescriptors() { +void Broadcast::getSupportedDescriptors() { if (!isDynamicNode()) { const auto& srcDims = getInputShapeAtPort(INPUT_DATA_IDX).getDims(); repeats.assign(targetShape.begin(), targetShape.end()); @@ -100,18 +103,18 @@ void MKLDNNBroadcastNode::getSupportedDescriptors() { } } -void MKLDNNBroadcastNode::initSupportedPrimitiveDescriptors() { +void Broadcast::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; supportedPrimitiveDescriptors = getSupportedConfigs(this); } -bool MKLDNNBroadcastNode::needPrepareParams() const { +bool Broadcast::needPrepareParams() const { return needPrepareParamsVar; } -void MKLDNNBroadcastNode::prepareParams() { +void Broadcast::prepareParams() { if (!constMap[TARGET_SHAPE_IDX]) { const auto& targetShapeMem = getParentEdgesAtPort(TARGET_SHAPE_IDX)[0]->getMemory(); const int32_t* targetShapeData = reinterpret_cast(targetShapeMem.GetPtr()); @@ -149,7 +152,7 @@ void MKLDNNBroadcastNode::prepareParams() { optimizedCase = prepareOptimizedParams(this, srcBlockedDims, dstBlockedDims); } -bool MKLDNNBroadcastNode::needShapeInfer() const { +bool Broadcast::needShapeInfer() const { needPrepareParamsVar = true; if (inputShapesModified()) { return true; @@ -181,19 +184,19 @@ bool MKLDNNBroadcastNode::needShapeInfer() const { return false; } -std::vector MKLDNNBroadcastNode::shapeInfer() const { - return MKLDNNNode::shapeInferGeneric(PortMask(TARGET_SHAPE_IDX, AXES_MAPPING_IDX)); +std::vector Broadcast::shapeInfer() const { + return Node::shapeInferGeneric(PortMask(TARGET_SHAPE_IDX, AXES_MAPPING_IDX)); } -bool MKLDNNBroadcastNode::isExecutable() const { +bool Broadcast::isExecutable() const { return !isInputTensorAtPortEmpty(0); } -void MKLDNNBroadcastNode::executeDynamicImpl(mkldnn::stream strm) { +void Broadcast::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -void MKLDNNBroadcastNode::execute(mkldnn::stream strm) { +void Broadcast::execute(mkldnn::stream strm) { if (optimizedCase) { optimizedExecute(getParentEdgeAt(INPUT_DATA_IDX)->getMemoryPtr(), getChildEdgeAt(0)->getMemoryPtr()); } else { @@ -201,7 +204,7 @@ void MKLDNNBroadcastNode::execute(mkldnn::stream strm) { } } -void MKLDNNBroadcastNode::plainExecute(mkldnn::stream strm) { +void Broadcast::plainExecute(mkldnn::stream strm) { VectorDims srcDims = getParentEdgeAt(INPUT_DATA_IDX)->getMemory().getStaticDims(); const auto& dstDims = getChildEdgeAt(0)->getMemory().getStaticDims(); const auto& dataSrcRank = getParentEdgeAt(INPUT_DATA_IDX)->getMemory().GetShape().getRank(); @@ -257,8 +260,10 @@ void MKLDNNBroadcastNode::plainExecute(mkldnn::stream strm) { }); } -bool MKLDNNBroadcastNode::created() const { - return getType() == Broadcast; +bool Broadcast::created() const { + return getType() == Type::Broadcast; } -REG_MKLDNN_PRIM_FOR(MKLDNNBroadcastNode, Broadcast) +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/broadcast.h b/src/plugins/intel_cpu/src/nodes/broadcast.h index 9e9c711a52c..3b55e29f032 100644 --- a/src/plugins/intel_cpu/src/nodes/broadcast.h +++ b/src/plugins/intel_cpu/src/nodes/broadcast.h @@ -10,13 +10,13 @@ #include #include - namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNBroadcastNode : public MKLDNNNode, public TileBroadcastCommon { +class Broadcast : public Node, public TileBroadcastCommon { public: - MKLDNNBroadcastNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + Broadcast(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; @@ -52,5 +52,6 @@ private: std::string errorPrefix; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/bucketize.cpp b/src/plugins/intel_cpu/src/nodes/bucketize.cpp index ea9197cec04..ba30fdd1581 100644 --- a/src/plugins/intel_cpu/src/nodes/bucketize.cpp +++ b/src/plugins/intel_cpu/src/nodes/bucketize.cpp @@ -10,10 +10,13 @@ #include "ie_parallel.hpp" #include "bucketize.h" -using namespace ov::intel_cpu; using namespace InferenceEngine; -bool MKLDNNBucketizeNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +namespace ov { +namespace intel_cpu { +namespace node { + +bool Bucketize::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { const auto bucketsize = std::dynamic_pointer_cast(op); if (!bucketsize) { @@ -26,8 +29,8 @@ bool MKLDNNBucketizeNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { +Bucketize::Bucketize(const std::shared_ptr& op, const mkldnn::engine& eng, + WeightsSharing::Ptr &cache) : Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -47,7 +50,7 @@ MKLDNNBucketizeNode::MKLDNNBucketizeNode(const std::shared_ptr& op with_right = bucketsize->get_with_right_bound(); } -void MKLDNNBucketizeNode::initSupportedPrimitiveDescriptors() { +void Bucketize::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -73,7 +76,7 @@ void MKLDNNBucketizeNode::initSupportedPrimitiveDescriptors() { impl_desc_type::ref_any); } -void MKLDNNBucketizeNode::execute(mkldnn::stream strm) { +void Bucketize::execute(mkldnn::stream strm) { auto precision_mask = getPrecisionMask(input_precision, boundaries_precision, output_precision); switch (precision_mask) { @@ -172,7 +175,7 @@ void MKLDNNBucketizeNode::execute(mkldnn::stream strm) { } } -void MKLDNNBucketizeNode::prepareParams() { +void Bucketize::prepareParams() { auto& inputTensorMemPtr = getParentEdgeAt(INPUT_TENSOR_PORT)->getMemoryPtr(); auto& inputBinsMemPtr = getParentEdgeAt(INPUT_BINS_PORT)->getMemoryPtr(); auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); @@ -203,16 +206,16 @@ void MKLDNNBucketizeNode::prepareParams() { std::accumulate(input_tensor_dims.begin(), input_tensor_dims.end(), size_t(1), std::multiplies()); } -bool MKLDNNBucketizeNode::isExecutable() const { +bool Bucketize::isExecutable() const { return !isInputTensorAtPortEmpty(0); } -std::vector MKLDNNBucketizeNode::shapeInfer() const { +std::vector Bucketize::shapeInfer() const { return {getParentEdgesAtPort(0)[0]->getMemory().getStaticDims()}; } template -void MKLDNNBucketizeNode::bucketize() { +void Bucketize::bucketize() { const auto *input_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); const auto *boundaries_data = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetPtr()); auto *output_data = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); @@ -235,8 +238,10 @@ void MKLDNNBucketizeNode::bucketize() { }); } -bool MKLDNNBucketizeNode::created() const { - return getType() == Bucketize; +bool Bucketize::created() const { + return getType() == Type::Bucketize; } -REG_MKLDNN_PRIM_FOR(MKLDNNBucketizeNode, Bucketize) +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/bucketize.h b/src/plugins/intel_cpu/src/nodes/bucketize.h index ae3be644ac3..ada09d25403 100644 --- a/src/plugins/intel_cpu/src/nodes/bucketize.h +++ b/src/plugins/intel_cpu/src/nodes/bucketize.h @@ -9,10 +9,11 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNBucketizeNode : public MKLDNNNode { +class Bucketize : public Node { public: - MKLDNNBucketizeNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + Bucketize(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; @@ -47,5 +48,6 @@ private: std::string errorPrefix; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/color_convert.cpp b/src/plugins/intel_cpu/src/nodes/color_convert.cpp index 1e9bcfe7720..36f20d4d04f 100644 --- a/src/plugins/intel_cpu/src/nodes/color_convert.cpp +++ b/src/plugins/intel_cpu/src/nodes/color_convert.cpp @@ -19,6 +19,7 @@ using namespace Xbyak; namespace ov { namespace intel_cpu { +namespace node { namespace { std::tuple getAlgorithmFor(const std::shared_ptr& op) { @@ -33,11 +34,11 @@ std::tuple getAlgorithmFor(const std::shared_ptrget_type_name() + " is not supported."); } -class Converter : public MKLDNNColorConvertNode::Converter { - using Base = MKLDNNColorConvertNode::Converter; +class Converter : public ColorConvert::Converter { + using Base = ColorConvert::Converter; public: - Converter(MKLDNNNode *node); + Converter(Node *node); Shapes shapeInfer() const override; bool singlePlane() const; @@ -46,14 +47,14 @@ public: std::tuple yuv_to_rgb(float y, float u, float v); }; -Converter::Converter(MKLDNNNode *node) +Converter::Converter(Node *node) : Base(node, node->getAlgorithm() == Algorithm::ColorConvertNV12toRGB || node->getAlgorithm() == Algorithm::ColorConvertI420toRGB ? ColorFormat { { 0, 1, 2 } } : ColorFormat { { 2, 1, 0 } }) { } -MKLDNNColorConvertNode::Converter::Shapes +ColorConvert::Converter::Shapes Converter::shapeInfer() const { const auto & dims = inputDims(0); if (dims.size() != 4) @@ -275,14 +276,14 @@ void jit_uni_converter::store_tail(const variable & dst, namespace nv12 { -MKLDNNColorConvertNode::Converter::PrimitiveDescs supportedPrimitiveDescs(MKLDNNNode *node) { +ColorConvert::Converter::PrimitiveDescs supportedPrimitiveDescs(Node *node) { const LayoutType layout = LayoutType::ncsp; // 0,1,2,3 const Precision precision = node->getOriginalInputPrecisionAtPort(0) == Precision::U8 ? Precision::U8 : Precision::FP32; - MKLDNNColorConvertNode::Converter::PrimitiveDescs descs; + ColorConvert::Converter::PrimitiveDescs descs; descs.emplace_back(std::vector { node->getOriginalInputsNumber(), { layout, precision } }, std::vector { { layout, precision } }, @@ -301,7 +302,7 @@ class TwoPlaneConvert; class RefConverter : public Converter { public: - RefConverter(MKLDNNNode *node); + RefConverter(Node *node); protected: template @@ -315,7 +316,7 @@ protected: size_t stride_uv); }; -RefConverter::RefConverter(MKLDNNNode *node) +RefConverter::RefConverter(Node *node) : Converter(node) { if (node->getOriginalInputsNumber() != (singlePlane() ? 1: 2)) IE_THROW() <<"NV12Converter node has incorrect number of inputs"; @@ -553,7 +554,7 @@ const jit_uni_converter & jit_converter_get() { template class SinglePlaneConvert : public Converter { public: - SinglePlaneConvert(MKLDNNNode *node) + SinglePlaneConvert(Node *node) : Converter(node) { jit_converter_create(); } @@ -588,7 +589,7 @@ public: template class TwoPlaneConvert : public Converter { public: - TwoPlaneConvert(MKLDNNNode *node) + TwoPlaneConvert(Node *node) : Converter(node) { jit_converter_create(); } @@ -624,14 +625,14 @@ public: namespace i420 { -MKLDNNColorConvertNode::Converter::PrimitiveDescs supportedPrimitiveDescs(MKLDNNNode *node) { +ColorConvert::Converter::PrimitiveDescs supportedPrimitiveDescs(Node *node) { const LayoutType layout = LayoutType::ncsp; // 0,1,2,3 const Precision precision = node->getOriginalInputPrecisionAtPort(0) == Precision::U8 ? Precision::U8 : Precision::FP32; - MKLDNNColorConvertNode::Converter::PrimitiveDescs descs; + ColorConvert::Converter::PrimitiveDescs descs; descs.emplace_back(std::vector { node->getOriginalInputsNumber(), { layout, precision } }, std::vector { { layout, precision } }, @@ -650,7 +651,7 @@ class ThreePlaneConvert; class RefConverter : public Converter { public: - RefConverter(MKLDNNNode *node); + RefConverter(Node *node); protected: template @@ -665,7 +666,7 @@ protected: size_t stride_uv); }; -RefConverter::RefConverter(MKLDNNNode *node) +RefConverter::RefConverter(Node *node) : Converter(node) { if (node->getOriginalInputsNumber() != (singlePlane() ? 1: 3)) IE_THROW() <<"I420Converter node has incorrect number of inputs"; @@ -902,7 +903,7 @@ const jit_uni_converter & jit_converter_get() { template class SinglePlaneConvert : public Converter { public: - SinglePlaneConvert(MKLDNNNode *node) + SinglePlaneConvert(Node *node) : Converter(node) { jit_converter_create(); } @@ -939,7 +940,7 @@ public: template class ThreePlaneConvert : public Converter { public: - ThreePlaneConvert(MKLDNNNode *node) + ThreePlaneConvert(Node *node) : Converter(node) { jit_converter_create(); } @@ -977,50 +978,50 @@ public: } // namespace -MKLDNNColorConvertNode::Converter::Converter(MKLDNNNode *node, const ColorFormat & colorFormat) +ColorConvert::Converter::Converter(Node *node, const ColorFormat & colorFormat) : _node(node) , _colorFormat(colorFormat) { } -InferenceEngine::Precision MKLDNNColorConvertNode::Converter::inputPrecision(size_t idx) const { +InferenceEngine::Precision ColorConvert::Converter::inputPrecision(size_t idx) const { return _node->getParentEdgesAtPort(idx)[0]->getMemory().getDesc().getPrecision(); } -InferenceEngine::Precision MKLDNNColorConvertNode::Converter::outputPrecision(size_t idx) const { +InferenceEngine::Precision ColorConvert::Converter::outputPrecision(size_t idx) const { return _node->getChildEdgesAtPort(idx)[0]->getMemory().getDesc().getPrecision(); } -const void * MKLDNNColorConvertNode::Converter::input(size_t idx) const { +const void * ColorConvert::Converter::input(size_t idx) const { return _node->getParentEdgeAt(idx)->getMemoryPtr()->GetPtr(); } -void * MKLDNNColorConvertNode::Converter::output(size_t idx) const { +void * ColorConvert::Converter::output(size_t idx) const { return _node->getChildEdgeAt(idx)->getMemoryPtr()->GetPtr(); } -const VectorDims & MKLDNNColorConvertNode::Converter::inputDims(size_t idx) const { +const VectorDims & ColorConvert::Converter::inputDims(size_t idx) const { return _node->getParentEdgesAtPort(idx)[0]->getMemory().getStaticDims(); } -bool MKLDNNColorConvertNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool ColorConvert::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { Algorithm alg; std::tie(alg, errorMessage) = getAlgorithmFor(op); return alg != Algorithm::Default; } -MKLDNNColorConvertNode::MKLDNNColorConvertNode(const std::shared_ptr& op, +ColorConvert::ColorConvert(const std::shared_ptr& op, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &cache) - : MKLDNNNode(op, eng, cache) { + WeightsSharing::Ptr &cache) + : Node(op, eng, cache) { std::string errorMessage; std::tie(algorithm, errorMessage) = getAlgorithmFor(op); if (algorithm == Algorithm::Default) IE_THROW(NotImplemented) << errorMessage; } -void MKLDNNColorConvertNode::getSupportedDescriptors() {} +void ColorConvert::getSupportedDescriptors() {} -void MKLDNNColorConvertNode::initSupportedPrimitiveDescriptors() { +void ColorConvert::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -1054,9 +1055,9 @@ void MKLDNNColorConvertNode::initSupportedPrimitiveDescriptors() { } } -void MKLDNNColorConvertNode::initSupportedNV12Impls() { +void ColorConvert::initSupportedNV12Impls() { #define SUPPORTED_IMPL(Impl, type, desc_type) \ - [](MKLDNNNode *node) { \ + [](Node *node) { \ return new nv12::Impl(node); \ }; @@ -1081,9 +1082,9 @@ void MKLDNNColorConvertNode::initSupportedNV12Impls() { #undef SUPPORTED_IMPL } -void MKLDNNColorConvertNode::initSupportedI420Impls() { +void ColorConvert::initSupportedI420Impls() { #define SUPPORTED_IMPL(Impl, type, desc_type) \ - [](MKLDNNNode *node) { \ + [](Node *node) { \ return new i420::Impl(node); \ }; @@ -1108,7 +1109,7 @@ void MKLDNNColorConvertNode::initSupportedI420Impls() { #undef SUPPORTED_IMPL } -void MKLDNNColorConvertNode::createPrimitive() { +void ColorConvert::createPrimitive() { const NodeDesc *desc = getSelectedPrimitiveDescriptor(); if (!desc) IE_THROW() << getTypeStr() + " node with name '" + getName() + "' " @@ -1127,33 +1128,32 @@ void MKLDNNColorConvertNode::createPrimitive() { } } -void MKLDNNColorConvertNode::execute(mkldnn::stream strm) { +void ColorConvert::execute(mkldnn::stream strm) { if (!_impl) IE_THROW() << getTypeStr() + " node with name '" + getName() + "' " << "has no any implemented converter"; _impl->execute(strm); } -bool MKLDNNColorConvertNode::created() const { - return getType() == ColorConvert; +bool ColorConvert::created() const { + return getType() == Type::ColorConvert; } -std::vector MKLDNNColorConvertNode::shapeInfer() const { +std::vector ColorConvert::shapeInfer() const { if (!_impl) IE_THROW() << getTypeStr() + " node with name '" + getName() + "' " << "has no any implemented converter"; return _impl->shapeInfer(); } -bool MKLDNNColorConvertNode::needPrepareParams() const { +bool ColorConvert::needPrepareParams() const { return false; } -void MKLDNNColorConvertNode::executeDynamicImpl(mkldnn::stream strm) { +void ColorConvert::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -REG_MKLDNN_PRIM_FOR(MKLDNNColorConvertNode, ColorConvert); - +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/color_convert.h b/src/plugins/intel_cpu/src/nodes/color_convert.h index 17c63e25d17..8d4134def0c 100644 --- a/src/plugins/intel_cpu/src/nodes/color_convert.h +++ b/src/plugins/intel_cpu/src/nodes/color_convert.h @@ -12,12 +12,13 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNColorConvertNode : public MKLDNNNode { +class ColorConvert : public Node { public: - MKLDNNColorConvertNode(const std::shared_ptr& op, - const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &cache); + ColorConvert(const std::shared_ptr& op, + const mkldnn::engine& eng, + WeightsSharing::Ptr &cache); class Converter; public: @@ -37,7 +38,7 @@ private: void initSupportedI420Impls(); private: - using ConverterBuilder = std::function; + using ConverterBuilder = std::function; using SupportedImpls = multidim_map, // Input port configurator std::vector, // Output port configurator @@ -63,7 +64,7 @@ public: using ColorFormat = std::array; - Converter(MKLDNNNode *node, const ColorFormat & colorFormat); + Converter(Node *node, const ColorFormat & colorFormat); virtual ~Converter() = default; InferenceEngine::Precision inputPrecision(size_t idx) const; InferenceEngine::Precision outputPrecision(size_t idx) const; @@ -74,9 +75,10 @@ public: virtual void execute(mkldnn::stream strm) = 0; protected: - MKLDNNNode *_node; + Node *_node; ColorFormat _colorFormat; // RGB: {0,1,2}, BGR: {2,1,0} }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/common/blocked_desc_creator.cpp b/src/plugins/intel_cpu/src/nodes/common/blocked_desc_creator.cpp index f5d82a838ee..19441d47402 100644 --- a/src/plugins/intel_cpu/src/nodes/common/blocked_desc_creator.cpp +++ b/src/plugins/intel_cpu/src/nodes/common/blocked_desc_creator.cpp @@ -6,9 +6,11 @@ #include using namespace InferenceEngine; -using namespace ov::intel_cpu; +namespace ov { +namespace intel_cpu { namespace { + constexpr size_t channelsPos = 1lu; class PlainFormatCreator : public BlockedDescCreator { @@ -67,6 +69,7 @@ public: private: size_t _blockSize; }; + } // namespace const BlockedDescCreator::CreatorsMap& BlockedDescCreator::getCommonCreators() { @@ -119,3 +122,6 @@ BlockedDescCreator::makeFilteredRange(const CreatorsMap &map, BlockedDescCreator auto last = first.end(); return std::make_pair(first, last); } + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/common/cpu_convert.cpp b/src/plugins/intel_cpu/src/nodes/common/cpu_convert.cpp index d671a26ecb6..468e2667cf2 100644 --- a/src/plugins/intel_cpu/src/nodes/common/cpu_convert.cpp +++ b/src/plugins/intel_cpu/src/nodes/common/cpu_convert.cpp @@ -17,12 +17,13 @@ #include #include "mkldnn/ie_mkldnn.h" -using namespace ov::intel_cpu; using namespace InferenceEngine; using namespace mkldnn::impl::utils; using namespace mkldnn::impl::cpu::x64; using namespace Xbyak; +namespace ov { +namespace intel_cpu { namespace { template @@ -470,52 +471,52 @@ bool isConversionTruncatesRange(const Precision & from, const Precision & to) { } // namespace -#define MKLDNN_CVT(ST, DT) OV_CASE2(Precision::ST, Precision::DT, PrecisionInfo::value_type, PrecisionInfo::value_type) +#define INTEL_CPU_CVT(ST, DT) OV_CASE2(Precision::ST, Precision::DT, PrecisionInfo::value_type, PrecisionInfo::value_type) -#define MKLDNN_CVT_LIST \ - MKLDNN_CVT(U8, I8), MKLDNN_CVT(U8, U16), MKLDNN_CVT(U8, I16), MKLDNN_CVT(U8, U32), \ - MKLDNN_CVT(U8, I32), MKLDNN_CVT(U8, U64), MKLDNN_CVT(U8, I64), MKLDNN_CVT(U8, FP32), \ - MKLDNN_CVT(U8, FP16), MKLDNN_CVT(U8, BF16), MKLDNN_CVT(U8, FP64), MKLDNN_CVT(U8, BOOL), \ - MKLDNN_CVT(I8, U8), MKLDNN_CVT(I8, U16), MKLDNN_CVT(I8, I16), MKLDNN_CVT(I8, U32), \ - MKLDNN_CVT(I8, I32), MKLDNN_CVT(I8, U64), MKLDNN_CVT(I8, I64), MKLDNN_CVT(I8, FP32), \ - MKLDNN_CVT(I8, FP16), MKLDNN_CVT(I8, BF16), MKLDNN_CVT(I8, FP64), MKLDNN_CVT(I8, BOOL), \ - MKLDNN_CVT(U16, U8), MKLDNN_CVT(U16, I8), MKLDNN_CVT(U16, I16), MKLDNN_CVT(U16, U32), \ - MKLDNN_CVT(U16, I32), MKLDNN_CVT(U16, U64), MKLDNN_CVT(U16, I64), MKLDNN_CVT(U16, FP32), \ - MKLDNN_CVT(U16, FP16), MKLDNN_CVT(U16, BF16), MKLDNN_CVT(U16, FP64), MKLDNN_CVT(U16, BOOL), \ - MKLDNN_CVT(I16, U8), MKLDNN_CVT(I16, I8), MKLDNN_CVT(I16, U16), MKLDNN_CVT(I16, U32), \ - MKLDNN_CVT(I16, I32), MKLDNN_CVT(I16, U64), MKLDNN_CVT(I16, I64), MKLDNN_CVT(I16, FP32), \ - MKLDNN_CVT(I16, FP16), MKLDNN_CVT(I16, BF16), MKLDNN_CVT(I16, FP64), MKLDNN_CVT(I16, BOOL), \ - MKLDNN_CVT(U32, U8), MKLDNN_CVT(U32, I8), MKLDNN_CVT(U32, U16), MKLDNN_CVT(U32, I16), \ - MKLDNN_CVT(U32, I32), MKLDNN_CVT(U32, U64), MKLDNN_CVT(U32, I64), MKLDNN_CVT(U32, FP32), \ - MKLDNN_CVT(U32, FP16), MKLDNN_CVT(U32, BF16), MKLDNN_CVT(U32, FP64), MKLDNN_CVT(U32, BOOL), \ - MKLDNN_CVT(I32, U8), MKLDNN_CVT(I32, I8), MKLDNN_CVT(I32, U16), MKLDNN_CVT(I32, I16), \ - MKLDNN_CVT(I32, U32), MKLDNN_CVT(I32, U64), MKLDNN_CVT(I32, I64), MKLDNN_CVT(I32, FP32), \ - MKLDNN_CVT(I32, FP16), MKLDNN_CVT(I32, BF16), MKLDNN_CVT(I32, FP64), MKLDNN_CVT(I32, BOOL), \ - MKLDNN_CVT(U64, U8), MKLDNN_CVT(U64, I8), MKLDNN_CVT(U64, U16), MKLDNN_CVT(U64, I16), \ - MKLDNN_CVT(U64, U32), MKLDNN_CVT(U64, I32), MKLDNN_CVT(U64, I64), MKLDNN_CVT(U64, FP32), \ - MKLDNN_CVT(U64, FP16), MKLDNN_CVT(U64, BF16), MKLDNN_CVT(U64, FP64), MKLDNN_CVT(U64, BOOL), \ - MKLDNN_CVT(I64, U8), MKLDNN_CVT(I64, I8), MKLDNN_CVT(I64, U16), MKLDNN_CVT(I64, I16), \ - MKLDNN_CVT(I64, U32), MKLDNN_CVT(I64, I32), MKLDNN_CVT(I64, U64), MKLDNN_CVT(I64, FP32), \ - MKLDNN_CVT(I64, FP16), MKLDNN_CVT(I64, BF16), MKLDNN_CVT(I64, FP64), MKLDNN_CVT(I64, BOOL), \ - MKLDNN_CVT(FP32, U8), MKLDNN_CVT(FP32, I8), MKLDNN_CVT(FP32, U16), MKLDNN_CVT(FP32, I16), \ - MKLDNN_CVT(FP32, U32), MKLDNN_CVT(FP32, I32), MKLDNN_CVT(FP32, U64), MKLDNN_CVT(FP32, I64), \ - MKLDNN_CVT(FP32, FP16), MKLDNN_CVT(FP32, BF16), MKLDNN_CVT(FP32, FP64), MKLDNN_CVT(FP32, BOOL), \ - MKLDNN_CVT(FP16, U8), MKLDNN_CVT(FP16, I8), MKLDNN_CVT(FP16, U16), MKLDNN_CVT(FP16, I16), \ - MKLDNN_CVT(FP16, U32), MKLDNN_CVT(FP16, I32), MKLDNN_CVT(FP16, U64), MKLDNN_CVT(FP16, I64), \ - MKLDNN_CVT(FP16, FP32), MKLDNN_CVT(FP16, BF16), MKLDNN_CVT(FP16, FP64), MKLDNN_CVT(FP16, BOOL), \ - MKLDNN_CVT(BF16, U8), MKLDNN_CVT(BF16, I8), MKLDNN_CVT(BF16, U16), MKLDNN_CVT(BF16, I16), \ - MKLDNN_CVT(BF16, U32), MKLDNN_CVT(BF16, I32), MKLDNN_CVT(BF16, U64), MKLDNN_CVT(BF16, I64), \ - MKLDNN_CVT(BF16, FP32), MKLDNN_CVT(BF16, FP16), MKLDNN_CVT(BF16, FP64), MKLDNN_CVT(BF16, BOOL), \ - MKLDNN_CVT(FP64, U8), MKLDNN_CVT(FP64, I8), MKLDNN_CVT(FP64, U16), MKLDNN_CVT(FP64, I16), \ - MKLDNN_CVT(FP64, U32), MKLDNN_CVT(FP64, I32), MKLDNN_CVT(FP64, U64), MKLDNN_CVT(FP64, I64), \ - MKLDNN_CVT(FP64, FP32), MKLDNN_CVT(FP64, FP16), MKLDNN_CVT(FP64, BF16), MKLDNN_CVT(FP64, BOOL), \ - MKLDNN_CVT(BOOL, U8), MKLDNN_CVT(BOOL, I8), MKLDNN_CVT(BOOL, U16), MKLDNN_CVT(BOOL, I16), \ - MKLDNN_CVT(BOOL, U32), MKLDNN_CVT(BOOL, I32), MKLDNN_CVT(BOOL, U64), MKLDNN_CVT(BOOL, I64), \ - MKLDNN_CVT(BOOL, FP32), MKLDNN_CVT(BOOL, FP16), MKLDNN_CVT(BOOL, BF16), MKLDNN_CVT(BOOL, FP64), \ - MKLDNN_CVT(U8, U8), MKLDNN_CVT(I8, I8), MKLDNN_CVT(U16, U16), MKLDNN_CVT(I16, I16), \ - MKLDNN_CVT(U32, U32), MKLDNN_CVT(I32, I32), MKLDNN_CVT(U64, U64), MKLDNN_CVT(I64, I64), \ - MKLDNN_CVT(FP32, FP32), MKLDNN_CVT(FP16, FP16), MKLDNN_CVT(BF16, BF16), MKLDNN_CVT(FP64, FP64), \ - MKLDNN_CVT(BOOL, BOOL) +#define INTEL_CPU_CVT_LIST \ + INTEL_CPU_CVT(U8, I8), INTEL_CPU_CVT(U8, U16), INTEL_CPU_CVT(U8, I16), INTEL_CPU_CVT(U8, U32), \ + INTEL_CPU_CVT(U8, I32), INTEL_CPU_CVT(U8, U64), INTEL_CPU_CVT(U8, I64), INTEL_CPU_CVT(U8, FP32), \ + INTEL_CPU_CVT(U8, FP16), INTEL_CPU_CVT(U8, BF16), INTEL_CPU_CVT(U8, FP64), INTEL_CPU_CVT(U8, BOOL), \ + INTEL_CPU_CVT(I8, U8), INTEL_CPU_CVT(I8, U16), INTEL_CPU_CVT(I8, I16), INTEL_CPU_CVT(I8, U32), \ + INTEL_CPU_CVT(I8, I32), INTEL_CPU_CVT(I8, U64), INTEL_CPU_CVT(I8, I64), INTEL_CPU_CVT(I8, FP32), \ + INTEL_CPU_CVT(I8, FP16), INTEL_CPU_CVT(I8, BF16), INTEL_CPU_CVT(I8, FP64), INTEL_CPU_CVT(I8, BOOL), \ + INTEL_CPU_CVT(U16, U8), INTEL_CPU_CVT(U16, I8), INTEL_CPU_CVT(U16, I16), INTEL_CPU_CVT(U16, U32), \ + INTEL_CPU_CVT(U16, I32), INTEL_CPU_CVT(U16, U64), INTEL_CPU_CVT(U16, I64), INTEL_CPU_CVT(U16, FP32), \ + INTEL_CPU_CVT(U16, FP16), INTEL_CPU_CVT(U16, BF16), INTEL_CPU_CVT(U16, FP64), INTEL_CPU_CVT(U16, BOOL), \ + INTEL_CPU_CVT(I16, U8), INTEL_CPU_CVT(I16, I8), INTEL_CPU_CVT(I16, U16), INTEL_CPU_CVT(I16, U32), \ + INTEL_CPU_CVT(I16, I32), INTEL_CPU_CVT(I16, U64), INTEL_CPU_CVT(I16, I64), INTEL_CPU_CVT(I16, FP32), \ + INTEL_CPU_CVT(I16, FP16), INTEL_CPU_CVT(I16, BF16), INTEL_CPU_CVT(I16, FP64), INTEL_CPU_CVT(I16, BOOL), \ + INTEL_CPU_CVT(U32, U8), INTEL_CPU_CVT(U32, I8), INTEL_CPU_CVT(U32, U16), INTEL_CPU_CVT(U32, I16), \ + INTEL_CPU_CVT(U32, I32), INTEL_CPU_CVT(U32, U64), INTEL_CPU_CVT(U32, I64), INTEL_CPU_CVT(U32, FP32), \ + INTEL_CPU_CVT(U32, FP16), INTEL_CPU_CVT(U32, BF16), INTEL_CPU_CVT(U32, FP64), INTEL_CPU_CVT(U32, BOOL), \ + INTEL_CPU_CVT(I32, U8), INTEL_CPU_CVT(I32, I8), INTEL_CPU_CVT(I32, U16), INTEL_CPU_CVT(I32, I16), \ + INTEL_CPU_CVT(I32, U32), INTEL_CPU_CVT(I32, U64), INTEL_CPU_CVT(I32, I64), INTEL_CPU_CVT(I32, FP32), \ + INTEL_CPU_CVT(I32, FP16), INTEL_CPU_CVT(I32, BF16), INTEL_CPU_CVT(I32, FP64), INTEL_CPU_CVT(I32, BOOL), \ + INTEL_CPU_CVT(U64, U8), INTEL_CPU_CVT(U64, I8), INTEL_CPU_CVT(U64, U16), INTEL_CPU_CVT(U64, I16), \ + INTEL_CPU_CVT(U64, U32), INTEL_CPU_CVT(U64, I32), INTEL_CPU_CVT(U64, I64), INTEL_CPU_CVT(U64, FP32), \ + INTEL_CPU_CVT(U64, FP16), INTEL_CPU_CVT(U64, BF16), INTEL_CPU_CVT(U64, FP64), INTEL_CPU_CVT(U64, BOOL), \ + INTEL_CPU_CVT(I64, U8), INTEL_CPU_CVT(I64, I8), INTEL_CPU_CVT(I64, U16), INTEL_CPU_CVT(I64, I16), \ + INTEL_CPU_CVT(I64, U32), INTEL_CPU_CVT(I64, I32), INTEL_CPU_CVT(I64, U64), INTEL_CPU_CVT(I64, FP32), \ + INTEL_CPU_CVT(I64, FP16), INTEL_CPU_CVT(I64, BF16), INTEL_CPU_CVT(I64, FP64), INTEL_CPU_CVT(I64, BOOL), \ + INTEL_CPU_CVT(FP32, U8), INTEL_CPU_CVT(FP32, I8), INTEL_CPU_CVT(FP32, U16), INTEL_CPU_CVT(FP32, I16), \ + INTEL_CPU_CVT(FP32, U32), INTEL_CPU_CVT(FP32, I32), INTEL_CPU_CVT(FP32, U64), INTEL_CPU_CVT(FP32, I64), \ + INTEL_CPU_CVT(FP32, FP16), INTEL_CPU_CVT(FP32, BF16), INTEL_CPU_CVT(FP32, FP64), INTEL_CPU_CVT(FP32, BOOL), \ + INTEL_CPU_CVT(FP16, U8), INTEL_CPU_CVT(FP16, I8), INTEL_CPU_CVT(FP16, U16), INTEL_CPU_CVT(FP16, I16), \ + INTEL_CPU_CVT(FP16, U32), INTEL_CPU_CVT(FP16, I32), INTEL_CPU_CVT(FP16, U64), INTEL_CPU_CVT(FP16, I64), \ + INTEL_CPU_CVT(FP16, FP32), INTEL_CPU_CVT(FP16, BF16), INTEL_CPU_CVT(FP16, FP64), INTEL_CPU_CVT(FP16, BOOL), \ + INTEL_CPU_CVT(BF16, U8), INTEL_CPU_CVT(BF16, I8), INTEL_CPU_CVT(BF16, U16), INTEL_CPU_CVT(BF16, I16), \ + INTEL_CPU_CVT(BF16, U32), INTEL_CPU_CVT(BF16, I32), INTEL_CPU_CVT(BF16, U64), INTEL_CPU_CVT(BF16, I64), \ + INTEL_CPU_CVT(BF16, FP32), INTEL_CPU_CVT(BF16, FP16), INTEL_CPU_CVT(BF16, FP64), INTEL_CPU_CVT(BF16, BOOL), \ + INTEL_CPU_CVT(FP64, U8), INTEL_CPU_CVT(FP64, I8), INTEL_CPU_CVT(FP64, U16), INTEL_CPU_CVT(FP64, I16), \ + INTEL_CPU_CVT(FP64, U32), INTEL_CPU_CVT(FP64, I32), INTEL_CPU_CVT(FP64, U64), INTEL_CPU_CVT(FP64, I64), \ + INTEL_CPU_CVT(FP64, FP32), INTEL_CPU_CVT(FP64, FP16), INTEL_CPU_CVT(FP64, BF16), INTEL_CPU_CVT(FP64, BOOL), \ + INTEL_CPU_CVT(BOOL, U8), INTEL_CPU_CVT(BOOL, I8), INTEL_CPU_CVT(BOOL, U16), INTEL_CPU_CVT(BOOL, I16), \ + INTEL_CPU_CVT(BOOL, U32), INTEL_CPU_CVT(BOOL, I32), INTEL_CPU_CVT(BOOL, U64), INTEL_CPU_CVT(BOOL, I64), \ + INTEL_CPU_CVT(BOOL, FP32), INTEL_CPU_CVT(BOOL, FP16), INTEL_CPU_CVT(BOOL, BF16), INTEL_CPU_CVT(BOOL, FP64), \ + INTEL_CPU_CVT(U8, U8), INTEL_CPU_CVT(I8, I8), INTEL_CPU_CVT(U16, U16), INTEL_CPU_CVT(I16, I16), \ + INTEL_CPU_CVT(U32, U32), INTEL_CPU_CVT(I32, I32), INTEL_CPU_CVT(U64, U64), INTEL_CPU_CVT(I64, I64), \ + INTEL_CPU_CVT(FP32, FP32), INTEL_CPU_CVT(FP16, FP16), INTEL_CPU_CVT(BF16, BF16), INTEL_CPU_CVT(FP64, FP64), \ + INTEL_CPU_CVT(BOOL, BOOL) void cpu_convert(const void *srcPtr, void *dstPtr, Precision srcPrc, Precision dstPrc, const size_t size) { cpu_convert(srcPtr, dstPtr, srcPrc, dstPrc, dstPrc, size); @@ -553,11 +554,14 @@ void cpu_convert(const void *srcPtr, dstPrc, false }; - OV_SWITCH(intel_cpu, ConvertPrecision, ctx, std::tie(srcPrc, dstPrc), MKLDNN_CVT_LIST); + OV_SWITCH(intel_cpu, ConvertPrecision, ctx, std::tie(srcPrc, dstPrc), INTEL_CPU_CVT_LIST); if (!ctx.converted) IE_THROW() << "cpu_convert can't convert from: " << srcPrc << " precision to: " << dstPrc; } } -#undef MKLDNN_CVT -#undef MKLDNN_CVT_LIST +#undef INTEL_CPU_CVT +#undef INTEL_CPU_CVT_LIST + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/common/cpu_convert.h b/src/plugins/intel_cpu/src/nodes/common/cpu_convert.h index c40936e1345..714f39c0034 100644 --- a/src/plugins/intel_cpu/src/nodes/common/cpu_convert.h +++ b/src/plugins/intel_cpu/src/nodes/common/cpu_convert.h @@ -4,6 +4,9 @@ #include +namespace ov { +namespace intel_cpu { + /** * @brief Copy size elements from buffer specified srcPtr pointer to buffer specified dstPtr. * If the precisions srcPrc and dstPrc are different, a conversion from srcPrc to dstPrc is performed. @@ -48,3 +51,6 @@ void cpu_convert(const void *srcPtr, InferenceEngine::Precision interimPrc, InferenceEngine::Precision dstPrc, const size_t size); + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/common/cpu_memcpy.h b/src/plugins/intel_cpu/src/nodes/common/cpu_memcpy.h index bebe20d7771..955304e5313 100755 --- a/src/plugins/intel_cpu/src/nodes/common/cpu_memcpy.h +++ b/src/plugins/intel_cpu/src/nodes/common/cpu_memcpy.h @@ -7,6 +7,9 @@ #include #include "ie_api.h" +namespace ov { +namespace intel_cpu { + /** * @brief Copies bytes between buffers with security enhancements * Copies count bytes from src to dest. If the source and destination @@ -47,3 +50,6 @@ inline int cpu_memcpy_s(void* dst, size_t dst_size, const void* src, size_t coun #endif return 0; } + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/common/dnnl_executor.cpp b/src/plugins/intel_cpu/src/nodes/common/dnnl_executor.cpp index 3eb7a8872d5..72a3d0554c7 100644 --- a/src/plugins/intel_cpu/src/nodes/common/dnnl_executor.cpp +++ b/src/plugins/intel_cpu/src/nodes/common/dnnl_executor.cpp @@ -5,7 +5,9 @@ #include "dnnl_executor.h" using namespace mkldnn; -using namespace ov::intel_cpu; + +namespace ov { +namespace intel_cpu { DnnlExecutor::IntermReorder::IntermReorder(const mkldnn::memory::desc& descSrc, const mkldnn::memory::desc& descDst, @@ -47,3 +49,6 @@ void DnnlExecutor::exec(std::unordered_map primArgs, mkldnn bool DnnlExecutor::needReordering() const { return !inputReorders.empty() || !outputReorders.empty(); } + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/common/dnnl_executor.h b/src/plugins/intel_cpu/src/nodes/common/dnnl_executor.h index 8d673b1bd02..43e38ee1a04 100644 --- a/src/plugins/intel_cpu/src/nodes/common/dnnl_executor.h +++ b/src/plugins/intel_cpu/src/nodes/common/dnnl_executor.h @@ -32,7 +32,7 @@ class DnnlExecutor { protected: DnnlExecutor() = default; - MKLDNNPrimitive execPrim; + Primitive execPrim; // key is the port number for the primitive that needs memory reordering std::unordered_map inputReorders; std::unordered_map outputReorders; diff --git a/src/plugins/intel_cpu/src/nodes/common/fp16_utils.h b/src/plugins/intel_cpu/src/nodes/common/fp16_utils.h index 7d949ab63e9..d02b3e744fe 100644 --- a/src/plugins/intel_cpu/src/nodes/common/fp16_utils.h +++ b/src/plugins/intel_cpu/src/nodes/common/fp16_utils.h @@ -4,6 +4,9 @@ #pragma once +namespace ov { +namespace intel_cpu { + typedef short ie_fp16; // Function to convert F32 into F16 @@ -80,3 +83,5 @@ inline float f16tof32(ie_fp16 x) { return asfloat(u); } +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/common/permute_kernel.cpp b/src/plugins/intel_cpu/src/nodes/common/permute_kernel.cpp index 53b7caa63a7..d0ee1a58554 100644 --- a/src/plugins/intel_cpu/src/nodes/common/permute_kernel.cpp +++ b/src/plugins/intel_cpu/src/nodes/common/permute_kernel.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include "cpu_memcpy.h" #include "utils/bfloat16.hpp" @@ -15,7 +15,6 @@ #include using namespace InferenceEngine; -using namespace ov::intel_cpu; using namespace mkldnn; using namespace mkldnn::impl; using namespace mkldnn::impl::cpu::x64; @@ -24,6 +23,9 @@ using namespace Xbyak; #define GET_OFF(field) offsetof(jit_args_permute, field) +namespace ov { +namespace intel_cpu { + template struct jit_uni_permute_kernel_f32 : public jit_uni_permute_kernel, public jit_generator { DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_uni_permute_kernel_f32) @@ -410,3 +412,6 @@ bool PermuteParams::operator==(const PermuteParams& rhs) const { (dst_block_order == rhs.dst_block_order) && (order == rhs.order) && (data_size == rhs.data_size); } + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/common/softmax.cpp b/src/plugins/intel_cpu/src/nodes/common/softmax.cpp index ea26f0e914e..236dc329773 100644 --- a/src/plugins/intel_cpu/src/nodes/common/softmax.cpp +++ b/src/plugins/intel_cpu/src/nodes/common/softmax.cpp @@ -16,7 +16,6 @@ #include using namespace InferenceEngine; -using namespace ov::intel_cpu; using namespace mkldnn; using namespace mkldnn::impl::cpu; using namespace mkldnn::impl::cpu::x64; @@ -24,6 +23,9 @@ using namespace mkldnn::impl::utils; #define GET_OFF(field) offsetof(jit_args_softmax, field) +namespace ov { +namespace intel_cpu { + struct jit_args_softmax { const void* src; void* dst; @@ -325,3 +327,6 @@ void SoftmaxGeneric::execute(const uint8_t *src_data, uint8_t *dst_data, int B, IE_THROW() << "Unsupported input precision: " << input_prec.name(); } } + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/common/softmax.h b/src/plugins/intel_cpu/src/nodes/common/softmax.h index b1660282833..30e50b00391 100644 --- a/src/plugins/intel_cpu/src/nodes/common/softmax.h +++ b/src/plugins/intel_cpu/src/nodes/common/softmax.h @@ -10,6 +10,9 @@ #include "defs.h" #include "ie_parallel.hpp" +namespace ov { +namespace intel_cpu { + struct jit_uni_softmax_kernel; static inline @@ -51,3 +54,5 @@ private: std::shared_ptr softmax_kernel; }; +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/common/tile_broadcast_utils.cpp b/src/plugins/intel_cpu/src/nodes/common/tile_broadcast_utils.cpp index b4e190df761..5e98bd78503 100644 --- a/src/plugins/intel_cpu/src/nodes/common/tile_broadcast_utils.cpp +++ b/src/plugins/intel_cpu/src/nodes/common/tile_broadcast_utils.cpp @@ -10,7 +10,9 @@ #include "memory_desc/dnnl_blocked_memory_desc.h" using namespace InferenceEngine; -using namespace ov::intel_cpu; + +namespace ov { +namespace intel_cpu { VectorDims TileBroadcastCommon::calculateDenseStrides(const VectorDims &dims) { VectorDims strides(dims.size(), 1); @@ -87,10 +89,10 @@ bool TileBroadcastCommon::canBeExecutedInNSPCLayout(VectorDims srcBlockedDims, V return optimizedDims.size() <= maxNDims; } -std::vector TileBroadcastCommon::getSupportedConfigs(const MKLDNNNode *node) { +std::vector TileBroadcastCommon::getSupportedConfigs(const Node *node) { std::vector supportedPrimitiveDescriptors; auto precision = node->getOriginalInputPrecisionAtPort(0); - auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); + auto dataType = DnnlExtensionUtils::IEPrecisionToDataType(precision); const auto& srcDims = node->getInputShapeAtPort(0).getDims(); const auto& inDataShape = node->getInputShapeAtPort(0); @@ -150,8 +152,8 @@ std::vector TileBroadcastCommon::getSupportedConfigs(const MKLDNNNode } } - auto inFmt = MKLDNNExtensionUtils::GetPlainFormatByRank(inDataShape.getRank()); - auto outFmt = MKLDNNExtensionUtils::GetPlainFormatByRank(outDataShapeRank); + auto inFmt = DnnlExtensionUtils::GetPlainFormatByRank(inDataShape.getRank()); + auto outFmt = DnnlExtensionUtils::GetPlainFormatByRank(outDataShapeRank); if (inFmt == mkldnn::memory::format_tag::undef || outFmt == mkldnn::memory::format_tag::undef) { config.inConfs[0].setMemDesc(std::make_shared(precision, node->getInputShapeAtPort(0))); for (int i = 0; i < config.outConfs.size(); i++) { @@ -167,7 +169,7 @@ std::vector TileBroadcastCommon::getSupportedConfigs(const MKLDNNNode return supportedPrimitiveDescriptors; } -bool TileBroadcastCommon::prepareOptimizedParams(const MKLDNNNode *node, VectorDims& srcBlockedDims, VectorDims& dstBlockedDims) { +bool TileBroadcastCommon::prepareOptimizedParams(const Node *node, VectorDims& srcBlockedDims, VectorDims& dstBlockedDims) { while (srcBlockedDims.size() < dstBlockedDims.size()) { srcBlockedDims.insert(srcBlockedDims.begin(), 1); } @@ -244,7 +246,7 @@ void TileBroadcastCommon::broadcastScalar(const char *srcData, char *dstData, si } } -void TileBroadcastCommon::optimizedExecute(const MKLDNNMemoryPtr& srcMemory, const MKLDNNMemoryPtr& dstMemory) { +void TileBroadcastCommon::optimizedExecute(const MemoryPtr& srcMemory, const MemoryPtr& dstMemory) { auto srcData = reinterpret_cast(srcMemory->GetPtr()); auto dstData = reinterpret_cast(dstMemory->GetPtr()); @@ -287,3 +289,6 @@ void TileBroadcastCommon::optimizedExecute(const MKLDNNMemoryPtr& srcMemory, con }); } } + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/common/tile_broadcast_utils.h b/src/plugins/intel_cpu/src/nodes/common/tile_broadcast_utils.h index c10a3e03a0c..78916d83921 100644 --- a/src/plugins/intel_cpu/src/nodes/common/tile_broadcast_utils.h +++ b/src/plugins/intel_cpu/src/nodes/common/tile_broadcast_utils.h @@ -16,10 +16,10 @@ namespace intel_cpu { class TileBroadcastCommon { protected: static VectorDims calculateDenseStrides(const VectorDims &dims); - std::vector getSupportedConfigs(const MKLDNNNode *node); - bool prepareOptimizedParams(const MKLDNNNode *node, VectorDims& srcBlockedDims, VectorDims& dstBlockedDims); + std::vector getSupportedConfigs(const Node *node); + bool prepareOptimizedParams(const Node *node, VectorDims& srcBlockedDims, VectorDims& dstBlockedDims); - void optimizedExecute(const MKLDNNMemoryPtr& srcMemory, const MKLDNNMemoryPtr& dstMemory); + void optimizedExecute(const MemoryPtr& srcMemory, const MemoryPtr& dstMemory); VectorDims repeats; bool optimizedCase = false; diff --git a/src/plugins/intel_cpu/src/nodes/concat.cpp b/src/plugins/intel_cpu/src/nodes/concat.cpp index 3785d3c40a2..5944144c557 100644 --- a/src/plugins/intel_cpu/src/nodes/concat.cpp +++ b/src/plugins/intel_cpu/src/nodes/concat.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include "mkldnn.hpp" #include "mkldnn/iml_type_mapper.h" @@ -24,18 +24,20 @@ #include using namespace mkldnn; -using namespace ov::intel_cpu; using namespace InferenceEngine; +namespace ov { +namespace intel_cpu { +namespace node { namespace { constexpr size_t channelAxis = 1lu; } -bool MKLDNNConcatNode::isExecutable() const { +bool Concat::isExecutable() const { return !hasEmptyOutputTensors() && !isOptimized(); } -bool MKLDNNConcatNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool Concat::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { const auto concatOp = ngraph::as_type_ptr(op); if (!concatOp) { @@ -48,8 +50,8 @@ bool MKLDNNConcatNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) - : MKLDNNNode(op, eng, cache) { +Concat::Concat(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache) + : Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -67,7 +69,7 @@ MKLDNNConcatNode::MKLDNNConcatNode(const std::shared_ptr& op, cons this->axis = axis; } -void MKLDNNConcatNode::getSupportedDescriptors() { +void Concat::getSupportedDescriptors() { const auto& firstParentDims = getInputShapeAtPort(0).getDims(); for (size_t i = 1; i < getParentEdges().size(); i++) { const auto& dims = getInputShapeAtPort(i).getDims(); @@ -94,7 +96,7 @@ void MKLDNNConcatNode::getSupportedDescriptors() { } } -void MKLDNNConcatNode::initSupportedPrimitiveDescriptors() { +void Concat::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -221,7 +223,7 @@ void MKLDNNConcatNode::initSupportedPrimitiveDescriptors() { } } -void MKLDNNConcatNode::selectOptimalPrimitiveDescriptor() { +void Concat::selectOptimalPrimitiveDescriptor() { std::vector canSelectPrimitive; // The double connection marks that some tensor should @@ -337,22 +339,22 @@ void MKLDNNConcatNode::selectOptimalPrimitiveDescriptor() { selectPrimitiveDescriptorByIndex(0); } -bool MKLDNNConcatNode::created() const { - return getType() == Concatenation; +bool Concat::created() const { + return getType() == Type::Concatenation; } -bool MKLDNNConcatNode::isOptimized() const { +bool Concat::isOptimized() const { return getSelectedPrimitiveDescriptor() && getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].inPlace() >= 0; } -bool MKLDNNConcatNode::needPrepareParams() const { +bool Concat::needPrepareParams() const { if (canOptimizeNspc) { return false; } return inputShapesModified(); } -void MKLDNNConcatNode::prepareParams() { +void Concat::prepareParams() { if (canOptimizeNspc || isOptimized()) return; @@ -395,7 +397,7 @@ void MKLDNNConcatNode::prepareParams() { prim.reset(new concat(primitive_desc)); } -size_t MKLDNNConcatNode::inverseOrder(const SizeVector& order, size_t axis) { +size_t Concat::inverseOrder(const SizeVector& order, size_t axis) { for (size_t i = 0; i < order.size(); i++) { if (axis == order[i]) { return i; @@ -404,13 +406,13 @@ size_t MKLDNNConcatNode::inverseOrder(const SizeVector& order, size_t axis) { return -1; } -void MKLDNNConcatNode::initOptimalPrimitiveDescriptor() { +void Concat::initOptimalPrimitiveDescriptor() { auto selected_pd = getSelectedPrimitiveDescriptor(); if (selected_pd == nullptr) IE_THROW() << "Preferable primitive descriptor is not set."; if (!isOptimized()) { - MKLDNNNode::initOptimalPrimitiveDescriptor(); + Node::initOptimalPrimitiveDescriptor(); auto config = selected_pd->getConfig(); if (!isConfigDefined(config)) { for (size_t i = 0; i < config.inConfs.size(); i++) { @@ -486,12 +488,12 @@ void MKLDNNConcatNode::initOptimalPrimitiveDescriptor() { canOptimizeNspc = axis == channelAxis && getSelectedPrimitiveDescriptor()->getConfig().outConfs.front().getMemDesc()->hasLayoutType(LayoutType::nspc); } -void MKLDNNConcatNode::execute(mkldnn::stream strm) { +void Concat::execute(mkldnn::stream strm) { if (isOptimized()) { return; } - const MKLDNNMemory& dst_memory = getChildEdgeAt(0)->getMemory(); + const Memory& dst_memory = getChildEdgeAt(0)->getMemory(); if (canOptimizeNspc) { execNspcSpecCase(); return; @@ -512,15 +514,15 @@ void MKLDNNConcatNode::execute(mkldnn::stream strm) { (*prim).execute(strm, mem_ags); } -InferenceEngine::Precision MKLDNNConcatNode::getRuntimePrecision() const { +InferenceEngine::Precision Concat::getRuntimePrecision() const { return getMaxPrecision(getInputPrecisions()); } -void MKLDNNConcatNode::execNspcSpecCase() { - const MKLDNNMemory& dst_memory = getChildEdgeAt(0)->getMemory(); +void Concat::execNspcSpecCase() { + const Memory& dst_memory = getChildEdgeAt(0)->getMemory(); const size_t num_src = getParentEdges().size(); uint8_t* dst_ptr = reinterpret_cast(dst_memory.GetData()); - const size_t dataSize = MKLDNNExtensionUtils::sizeOfDataType(dst_memory.GetDataType()); + const size_t dataSize = DnnlExtensionUtils::sizeOfDataType(dst_memory.GetDataType()); std::vector channelsDataSize; size_t channels_size = 0; @@ -530,7 +532,7 @@ void MKLDNNConcatNode::execNspcSpecCase() { size_t nonZeroInShapes = 0; int firstNonZeroEdge = -1; for (size_t i = 0; i < num_src; i++) { - const MKLDNNMemory& src_mem = getParentEdgesAtPort(i)[0]->getMemory(); + const Memory& src_mem = getParentEdgesAtPort(i)[0]->getMemory(); if (src_mem.GetShape().hasZeroDims()) { continue; } @@ -558,4 +560,6 @@ void MKLDNNConcatNode::execNspcSpecCase() { }); } -REG_MKLDNN_PRIM_FOR(MKLDNNConcatNode, Concatenation); \ No newline at end of file +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/concat.h b/src/plugins/intel_cpu/src/nodes/concat.h index 62ef337043b..a58d9f465eb 100644 --- a/src/plugins/intel_cpu/src/nodes/concat.h +++ b/src/plugins/intel_cpu/src/nodes/concat.h @@ -11,10 +11,11 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNConcatNode : public MKLDNNNode { +class Concat : public Node { public: - MKLDNNConcatNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + Concat(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; void getSupportedDescriptors() override; @@ -45,5 +46,6 @@ private: InferenceEngine::Precision outputPrecision = InferenceEngine::Precision::FP32; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/conv.cpp b/src/plugins/intel_cpu/src/nodes/conv.cpp index ba7ca58c3a4..8c901d49954 100644 --- a/src/plugins/intel_cpu/src/nodes/conv.cpp +++ b/src/plugins/intel_cpu/src/nodes/conv.cpp @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include @@ -25,9 +25,11 @@ #include using namespace mkldnn; -using namespace ov::intel_cpu; using namespace InferenceEngine; +namespace ov { +namespace intel_cpu { +namespace node { namespace { struct ConvKey { @@ -96,16 +98,16 @@ bool ConvKey::operator==(const ConvKey &rhs) const { } // namespace -class MKLDNNConvolutionNode::FusedSubgraph { +class Convolution::FusedSubgraph { public: - FusedSubgraph(const std::vector &opList, const MKLDNNConvolutionNode &conv, MKLDNNWeightsSharing::Ptr weightCache) { - _graph = std::unique_ptr(new MKLDNNGraph()); + FusedSubgraph(const std::vector &opList, const Convolution &conv, WeightsSharing::Ptr weightCache) { + _graph = std::unique_ptr(new Graph()); - std::unordered_set nodesSet; - std::vector edges; + std::unordered_set nodesSet; + std::vector edges; - auto addEdge = [&](const MKLDNNNodePtr& parent, const MKLDNNNodePtr& child, size_t parentPort, size_t childPort) -> void { - auto edge = std::make_shared(parent, child, parentPort, childPort); + auto addEdge = [&](const NodePtr& parent, const NodePtr& child, size_t parentPort, size_t childPort) -> void { + auto edge = std::make_shared(parent, child, parentPort, childPort); child->addEdge(edge); edges.push_back(edge); nodesSet.insert(parent); @@ -114,15 +116,15 @@ public: //Make inputs const auto &inpMemDesc1 = conv.getBaseMemDescAtOutputPort(0); - auto inp0 = std::make_shared(inpMemDesc1, "inp0", "Parameter", conv.getEngine(), weightCache); + auto inp0 = std::make_shared(inpMemDesc1, "inp0", "Parameter", conv.getEngine(), weightCache); inputs.push_back(inp0); const size_t sumPortNum = conv.getParentEdges().size() - 1; const auto &inpMemDesc2 = conv.getBaseMemDescAtInputPort(sumPortNum); - auto inp1 = std::make_shared(inpMemDesc2, "inp1", "Parameter", conv.getEngine(), weightCache); + auto inp1 = std::make_shared(inpMemDesc2, "inp1", "Parameter", conv.getEngine(), weightCache); inputs.push_back(inp1); - auto itr = std::find_if(opList.begin(), opList.end(), [](const MKLDNNNodePtr &node) { - if (auto eltwise = std::dynamic_pointer_cast(node)) { + auto itr = std::find_if(opList.begin(), opList.end(), [](const NodePtr &node) { + if (auto eltwise = std::dynamic_pointer_cast(node)) { return eltwise->isSpecialConvolutionAddFusing(); } return false; @@ -140,7 +142,7 @@ public: while (++itr != opList.end()) { auto parentNode = *parentItr; auto currentNode = *itr; - if (FakeQuantize == currentNode->getType()) { + if (Type::FakeQuantize == currentNode->getType()) { parentNode->addFusedNode(currentNode); } else { addEdge(parentNode, currentNode, 0, 0); @@ -157,29 +159,29 @@ public: //Make output const auto &outMemDesc = conv.getBaseMemDescAtOutputPort(0); - auto out = std::make_shared(outMemDesc, "out", "Result", conv.getEngine(), weightCache); + auto out = std::make_shared(outMemDesc, "out", "Result", conv.getEngine(), weightCache); addEdge(*parentItr, out, 0, 0); outputs.push_back(out); - std::vector nodes(nodesSet.begin(), nodesSet.end()); + std::vector nodes(nodesSet.begin(), nodesSet.end()); _graph->CreateGraph(nodes, edges, weightCache, "fused_subgraph"); } - std::shared_ptr getInput(size_t idx) const { + std::shared_ptr getInput(size_t idx) const { if (idx < inputs.size()) { return inputs[idx]; } else { - IE_THROW(OutOfBounds) << "Unexpected input index in MKLDNNConvolutionNode::fusedSubgraph::getInput idx=" << idx + IE_THROW(OutOfBounds) << "Unexpected input index in Convolution::fusedSubgraph::getInput idx=" << idx << " inputs.size()=" << inputs.size(); } } - std::shared_ptr getOutput(size_t idx) const { + std::shared_ptr getOutput(size_t idx) const { if (idx < outputs.size()) { return outputs[idx]; } else { - IE_THROW(OutOfBounds) << "Unexpected output index in MKLDNNConvolutionNode::fusedSubgraph::getInput idx=" << idx + IE_THROW(OutOfBounds) << "Unexpected output index in Convolution::fusedSubgraph::getInput idx=" << idx << " inputs.size()=" << outputs.size(); } } @@ -190,12 +192,12 @@ public: } private: - std::unique_ptr _graph; - std::vector> inputs; - std::vector> outputs; + std::unique_ptr _graph; + std::vector> inputs; + std::vector> outputs; }; -bool MKLDNNConvolutionNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool Convolution::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { if (!ngraph::is_type(op) && !ngraph::is_type(op)) { errorMessage = "Only opset1 Convolution and GroupConvolution operations are supported"; @@ -217,8 +219,8 @@ bool MKLDNNConvolutionNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) - : MKLDNNNode(op, eng, cache), withBiases(false), withSum(false), withDWConv(false), +Convolution::Convolution(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache) + : Node(op, eng, cache), withBiases(false), withSum(false), withDWConv(false), isGrouped(false), dw_conv_oc(0), dw_conv_ih(0), dw_conv_iw(0), dw_conv_in_dt(memory::data_type::undef), groupNum(1lu), IC(1), groupIC(1), groupOC(1), eltwisePrecision(Precision::FP32) { std::string errorMessage; @@ -230,7 +232,7 @@ MKLDNNConvolutionNode::MKLDNNConvolutionNode(const std::shared_ptr auto groupConvolutionOp = ngraph::as_type_ptr(op); if (convolutionOp) { - algorithm = ConvolutionCommon; + algorithm = Algorithm::ConvolutionCommon; groupNum = 1; isGrouped = false; @@ -253,7 +255,7 @@ MKLDNNConvolutionNode::MKLDNNConvolutionNode(const std::shared_ptr paddingR = convolutionOp->get_pads_end(); autoPadding = one_of(convolutionOp->get_auto_pad(), ov::op::PadType::SAME_UPPER, ov::op::PadType::SAME_LOWER); } else if (groupConvolutionOp) { - algorithm = ConvolutionGrouped; + algorithm = Algorithm::ConvolutionGrouped; groupNum = groupConvolutionOp->input_value(1).get_shape()[0]; isGrouped = true; @@ -278,19 +280,19 @@ MKLDNNConvolutionNode::MKLDNNConvolutionNode(const std::shared_ptr } } -bool MKLDNNConvolutionNode::canBeExecutedInInt8() const { - auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(0)); +bool Convolution::canBeExecutedInInt8() const { + auto inputDataType = DnnlExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(0)); if (!inputZeroPoints.empty()) inputDataType = memory::data_type::u8; - auto weightsDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(1)); + auto weightsDataType = DnnlExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(1)); if (!weightsZeroPoints.empty()) weightsDataType = memory::data_type::s8; return one_of(inputDataType, memory::data_type::u8, memory::data_type::s8) && weightsDataType == memory::data_type::s8; } -InferenceEngine::Precision MKLDNNConvolutionNode::fusedEltwisePrecision(const MKLDNNNodePtr& fusingNode) const { +InferenceEngine::Precision Convolution::fusedEltwisePrecision(const NodePtr& fusingNode) const { InferenceEngine::Precision eltwisePrecision; int fusingPort = fusingNode->getFusingPort(); @@ -305,7 +307,7 @@ InferenceEngine::Precision MKLDNNConvolutionNode::fusedEltwisePrecision(const MK return eltwisePrecision; } -void MKLDNNConvolutionNode::getSupportedDescriptors() { +void Convolution::getSupportedDescriptors() { if (!descs.empty()) return; @@ -316,44 +318,44 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() { // winograd support only constant weights and bias isWino = std::find(implPriorities.begin(), implPriorities.end(), impl_desc_type::jit_avx512_winograd) != implPriorities.end() && mkldnn::impl::cpu::x64::mayiuse(mkldnn::impl::cpu::x64::avx512_common) && !canBeExecutedInInt8() && - getParentEdgeAt(1)->getParent()->isConstant() && getParentEdgeAt(1)->getParent()->getType() == Input && - (withBiases ? (getParentEdgeAt(2)->getParent()->isConstant() && getParentEdgeAt(2)->getParent()->getType() == Input) : true); + getParentEdgeAt(1)->getParent()->isConstant() && getParentEdgeAt(1)->getParent()->getType() == Type::Input && + (withBiases ? (getParentEdgeAt(2)->getParent()->isConstant() && getParentEdgeAt(2)->getParent()->getType() == Type::Input) : true); } int expectedInputEdgesNum = static_cast(getOriginalInputsNumber()); for (int i = 0; i < fusedWith.size(); i++) { - if (fusedWith[i]->getType() == Convolution) { + if (fusedWith[i]->getType() == Type::Convolution) { expectedInputEdgesNum += static_cast(fusedWith[i]->getOriginalInputsNumber()) - 1; } - if (fusedWith[i]->getAlgorithm() == EltwiseAdd) { - auto* eltwiseNode = dynamic_cast(fusedWith[i].get()); + if (fusedWith[i]->getAlgorithm() == Algorithm::EltwiseAdd) { + auto* eltwiseNode = dynamic_cast(fusedWith[i].get()); if (eltwiseNode && eltwiseNode->isSpecialConvolutionAddFusing()) { expectedInputEdgesNum++; } } } - auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(0)); + auto inputDataType = DnnlExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(0)); if (!inputZeroPoints.empty()) inputDataType = memory::data_type::u8; - auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalOutputPrecisionAtPort(0)); - eltwisePrecision = MKLDNNExtensionUtils::DataTypeToIEPrecision(outputDataType); + auto outputDataType = DnnlExtensionUtils::IEPrecisionToDataType(getOriginalOutputPrecisionAtPort(0)); + eltwisePrecision = DnnlExtensionUtils::DataTypeToIEPrecision(outputDataType); if (!fusedWith.empty()) { - outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0)); - eltwisePrecision = MKLDNNExtensionUtils::DataTypeToIEPrecision(outputDataType); + outputDataType = DnnlExtensionUtils::IEPrecisionToDataType(fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0)); + eltwisePrecision = DnnlExtensionUtils::DataTypeToIEPrecision(outputDataType); } // We need to make sure that convolution output and second input of fused Eltwise operation // have equal precision sizes since they use the same physical memory. In case precisions are different we upscale to FP32. if (outputDataType != memory::data_type::f32 && outputDataType != memory::data_type::bf16 && withSum) { for (int i = 0; i < fusedWith.size(); i++) { - if (fusedWith[i]->getAlgorithm() == EltwiseAdd) { - auto* eltwiseNode = dynamic_cast(fusedWith[i].get()); + if (fusedWith[i]->getAlgorithm() == Algorithm::EltwiseAdd) { + auto* eltwiseNode = dynamic_cast(fusedWith[i].get()); if (eltwiseNode && eltwiseNode->isSpecialConvolutionAddFusing()) { eltwisePrecision = fusedEltwisePrecision(fusedWith[i]); - if (MKLDNNExtensionUtils::DataTypeToIEPrecision(outputDataType).size() != eltwisePrecision.size()) { + if (DnnlExtensionUtils::DataTypeToIEPrecision(outputDataType).size() != eltwisePrecision.size()) { eltwisePrecision = Precision::FP32; outputDataType = memory::data_type::f32; } @@ -371,13 +373,13 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() { int ndims = getInputShapeAtPort(0).getRank(); - withDWConv = isFusedWith(Convolution); + withDWConv = isFusedWith(Type::Convolution); if (withDWConv && isDynamicNode()) { IE_THROW() << "DW convolution is fused into convolution node " << getName() << " with dynamic shape."; } for (int i = 0; i < fusedWith.size(); i++) { - auto *convolutionNode = dynamic_cast(fusedWith[i].get()); + auto *convolutionNode = dynamic_cast(fusedWith[i].get()); if (convolutionNode) { auto& inActivationDims = convolutionNode->inputShapes[0].getStaticDims(); dw_conv_ih = inActivationDims[convolutionNode->inputShapes[0].getRank() - 2]; @@ -393,9 +395,9 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() { if (canBeExecutedInInt8()) { if (i == 0) { - dw_conv_in_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalOutputPrecisionAtPort(0)); + dw_conv_in_dt = DnnlExtensionUtils::IEPrecisionToDataType(getOriginalOutputPrecisionAtPort(0)); } else { - dw_conv_in_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(fusedWith[i - 1]->getOriginalOutputPrecisionAtPort(0)); + dw_conv_in_dt = DnnlExtensionUtils::IEPrecisionToDataType(fusedWith[i - 1]->getOriginalOutputPrecisionAtPort(0)); } } else { dw_conv_in_dt = memory::data_type::f32; @@ -433,8 +435,8 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() { && !(isDepthWise() && ndims == 5)) ? memory::data_type::bf16 : memory::data_type::f32; eltwisePrecision = Precision::FP32; for (int i = 0; i < fusedWith.size(); i++) { - if (fusedWith[i]->getAlgorithm() == EltwiseAdd) { - auto* eltwiseNode = dynamic_cast(fusedWith[i].get()); + if (fusedWith[i]->getAlgorithm() == Algorithm::EltwiseAdd) { + auto* eltwiseNode = dynamic_cast(fusedWith[i].get()); if (eltwiseNode && eltwiseNode->isSpecialConvolutionAddFusing()) { eltwisePrecision = fusedEltwisePrecision(fusedWith[i]); // TODO(amalyshe): there might be situation when convolution can be executed in BF16, @@ -445,7 +447,7 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() { // bofore the fused convolution. This behaviour might be more correct regarding expected markup // of the graph but performance of first and second approaches might be different. Need to verify outputDataType = eltwisePrecision == Precision::BF16 ? memory::data_type::bf16 : memory::data_type::f32; - eltwisePrecision = MKLDNNExtensionUtils::DataTypeToIEPrecision(outputDataType); + eltwisePrecision = DnnlExtensionUtils::DataTypeToIEPrecision(outputDataType); } } } @@ -497,7 +499,7 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() { } } -void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims, bool initWeights = false) { +void Convolution::setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims, bool initWeights = false) { mkldnn::post_ops ops; const bool useLegacyPostOps = true; // @todo remove after issue with performance of binary post ops fixed @@ -511,17 +513,17 @@ void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, const Vecto }; for (auto &node : fusedWith) { - if (node->getType() == Split || node->getType() == Concatenation) + if (node->getType() == Type::Split || node->getType() == Type::Concatenation) continue; - if (auto* eltwiseNode = dynamic_cast(node.get())) { + if (auto* eltwiseNode = dynamic_cast(node.get())) { if (eltwiseNode->isSpecialConvolutionAddFusing()) { if (withSumBroadcast) { break; } - ops.append_sum(1.0, MKLDNNExtensionUtils::IEPrecisionToDataType(eltwisePrecision)); + ops.append_sum(1.0, DnnlExtensionUtils::IEPrecisionToDataType(eltwisePrecision)); } else { - if (useLegacyPostOps || eltwiseNode->getMKLDNNAlgorithm() != mkldnn::algorithm::undef) { + if (useLegacyPostOps || eltwiseNode->getOneDnnAlgorithm() != mkldnn::algorithm::undef) { eltwiseNode->appendPostOps(ops, dims, postOpsArgs); } else { eltwiseNode->appendBinPostOps(ops, getBinPostOpShape(), postOpsArgs); @@ -530,7 +532,7 @@ void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, const Vecto continue; } - if (auto* fakeQuantizeNode = dynamic_cast(node.get())) { + if (auto* fakeQuantizeNode = dynamic_cast(node.get())) { if (useLegacyPostOps) { fakeQuantizeNode->appendPostOps(ops, dims, postOpsArgs); } else { @@ -539,7 +541,7 @@ void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, const Vecto continue; } - auto* convolutionNode = dynamic_cast(node.get()); + auto* convolutionNode = dynamic_cast(node.get()); if (convolutionNode) { if (initWeights) { postOpsArgs.push_back(getParentEdgeAt(getOriginalInputsNumber() + 0)->getMemoryPtr()); @@ -564,11 +566,11 @@ void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, const Vecto attr.set_post_ops(ops); } -void MKLDNNConvolutionNode::selectOptimalPrimitiveDescriptor() { +void Convolution::selectOptimalPrimitiveDescriptor() { selectPreferPrimitiveDescriptor(getPrimitivesPriority(), true); } -void MKLDNNConvolutionNode::initSupportedPrimitiveDescriptors() { +void Convolution::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -603,7 +605,7 @@ void MKLDNNConvolutionNode::initSupportedPrimitiveDescriptors() { } if (withDWConv) { - auto weightsPrc = MKLDNNExtensionUtils::IEPrecisionToDataType(dw_conv_in_dt == mkldnn_u8 ? Precision::I8 : Precision::FP32); + auto weightsPrc = DnnlExtensionUtils::IEPrecisionToDataType(dw_conv_in_dt == mkldnn_u8 ? Precision::I8 : Precision::FP32); auto biasPrc = memory::data_type::f32; std::vector dwWeightsDims({dw_conv_oc, 1, 1, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS]}); @@ -653,8 +655,8 @@ void MKLDNNConvolutionNode::initSupportedPrimitiveDescriptors() { } } -bool MKLDNNConvolutionNode::created() const { - return getType() == Convolution; +bool Convolution::created() const { + return getType() == Type::Convolution; } namespace { @@ -693,7 +695,7 @@ createDescriptorInternal(const mkldnn::memory::desc& inputDesc, } } // namespace -void MKLDNNConvolutionNode::createDescriptor(const std::vector& inputDesc, +void Convolution::createDescriptor(const std::vector& inputDesc, const std::vector& outputDesc) { MemoryDescPtr inpDesc; if (inputDesc[0]->isDefined()) { @@ -723,12 +725,12 @@ void MKLDNNConvolutionNode::createDescriptor(const std::vector& i wdt = memory::data_type::s8; } - mkldnn::memory::desc weightDnnlDesc(MKLDNNExtensionUtils::convertToDnnlDims(weightDims), wdt, memory::format_tag::any); + mkldnn::memory::desc weightDnnlDesc(DnnlExtensionUtils::convertToDnnlDims(weightDims), wdt, memory::format_tag::any); mkldnn::memory::desc biasDnnlDesc; if (withBiases) { memory::data_type bdt = memory::data_type::f32; - biasDnnlDesc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(biasesDims), bdt, memory::format_tag::any); + biasDnnlDesc = mkldnn::memory::desc(DnnlExtensionUtils::convertToDnnlDims(biasesDims), bdt, memory::format_tag::any); } std::vector algorithms; @@ -744,12 +746,12 @@ void MKLDNNConvolutionNode::createDescriptor(const std::vector& i } } -void MKLDNNConvolutionNode::addZeroPoints(mkldnn::primitive_attr& attr) { +void Convolution::addZeroPoints(mkldnn::primitive_attr& attr) { if (!inputZeroPoints.empty()) { attr.set_input_zero_points(inputZeroPoints.size(), 1 << 1 /*through C dim*/); if (!inputZeroPointsMemPtr) { - inputZeroPointsMemPtr.reset(new MKLDNNMemory(getEngine())); + inputZeroPointsMemPtr.reset(new Memory(getEngine())); DnnlBlockedMemoryDesc memoryDesc(Precision::U8, {inputZeroPoints.size()}); inputZeroPointsMemPtr->Create(memoryDesc, inputZeroPoints.data()); } @@ -759,7 +761,7 @@ void MKLDNNConvolutionNode::addZeroPoints(mkldnn::primitive_attr& attr) { attr.set_weights_zero_points(weightsZeroPoints.size(), 1 << 1 /*through C dim*/); if (!weightsZeroPointsMemPtr) { - weightsZeroPointsMemPtr.reset(new MKLDNNMemory(getEngine())); + weightsZeroPointsMemPtr.reset(new Memory(getEngine())); DnnlBlockedMemoryDesc memoryDesc(Precision::FP32, {weightsZeroPoints.size()}); weightsZeroPointsMemPtr->Create(memoryDesc, weightsZeroPoints.data()); } @@ -769,14 +771,14 @@ void MKLDNNConvolutionNode::addZeroPoints(mkldnn::primitive_attr& attr) { attr.set_output_compensations(outputCompensation.size(), 1 << 1 /*through C dim*/); if (!outputCompensationMemPtr) { - outputCompensationMemPtr.reset(new MKLDNNMemory(getEngine())); + outputCompensationMemPtr.reset(new Memory(getEngine())); DnnlBlockedMemoryDesc memoryDesc(Precision::I32, {outputCompensation.size()}); outputCompensationMemPtr->Create(memoryDesc, outputCompensation.data()); } } } -void MKLDNNConvolutionNode::initDescriptor(const NodeConfig& config) { +void Convolution::initDescriptor(const NodeConfig& config) { auto *selectedPD = getSelectedPrimitiveDescriptor(); if (!selectedPD) { return; @@ -823,7 +825,7 @@ void MKLDNNConvolutionNode::initDescriptor(const NodeConfig& config) { } if (withDWConv) { - auto weightsPrc = MKLDNNExtensionUtils::IEPrecisionToDataType(dw_conv_in_dt == mkldnn_u8 ? Precision::I8 : Precision::FP32); + auto weightsPrc = DnnlExtensionUtils::IEPrecisionToDataType(dw_conv_in_dt == mkldnn_u8 ? Precision::I8 : Precision::FP32); auto biasPrc = memory::data_type::f32; std::vector dwWeightsDims({dw_conv_oc, 1, 1, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS]}); @@ -877,13 +879,13 @@ void MKLDNNConvolutionNode::initDescriptor(const NodeConfig& config) { selectedPD->setConfig(rightConfig); } -void MKLDNNConvolutionNode::filterSupportedPrimitiveDescriptors() { - MKLDNNNode::filterSupportedPrimitiveDescriptors(); +void Convolution::filterSupportedPrimitiveDescriptors() { + Node::filterSupportedPrimitiveDescriptors(); // We also need to filter descs in Convolution node filterSupportedDescriptors(); } -void MKLDNNConvolutionNode::filterSupportedDescriptors() { +void Convolution::filterSupportedDescriptors() { if (!inputMemoryFormatsFilter.empty() || !outputMemoryFormatsFilter.empty()) { if (inputMemoryFormatsFilter.size() > 1 || outputMemoryFormatsFilter.size() > 1) { IE_THROW() << "Incorrect number of input or output memory formats for Convolution node"; @@ -892,11 +894,11 @@ void MKLDNNConvolutionNode::filterSupportedDescriptors() { while (itd != descs.end()) { bool isSuitableDesc = true; if (!inputMemoryFormatsFilter.empty()) { - auto src_tdesc = MKLDNNExtensionUtils::makeDescriptor(std::shared_ptr(*itd)->data.src_desc); + auto src_tdesc = DnnlExtensionUtils::makeDescriptor(std::shared_ptr(*itd)->data.src_desc); isSuitableDesc &= src_tdesc->isSame(inputMemoryFormatsFilter[0]); } if (!outputMemoryFormatsFilter.empty()) { - auto dst_tdesc = MKLDNNExtensionUtils::makeDescriptor(std::shared_ptr(*itd)->data.dst_desc); + auto dst_tdesc = DnnlExtensionUtils::makeDescriptor(std::shared_ptr(*itd)->data.dst_desc); isSuitableDesc &= dst_tdesc->isSame(outputMemoryFormatsFilter[0]); } if (!isSuitableDesc) { @@ -908,7 +910,7 @@ void MKLDNNConvolutionNode::filterSupportedDescriptors() { } } -bool MKLDNNConvolutionNode::isPossibleToSkipInitConfig(MKLDNNDescriptor &desc) const { +bool Convolution::isPossibleToSkipInitConfig(DnnlDesriptor &desc) const { // WA: In some cases, we can predict in advance the type of primitive that will be called in the future. // In particular, isPossibleToSkipInitConfig() checks whether we can skip the creation of primitives with // gemm implementation, which significantly increase the network load time. @@ -931,8 +933,8 @@ bool MKLDNNConvolutionNode::isPossibleToSkipInitConfig(MKLDNNDescriptor &desc) c isPossibleJitPlanar = false; std::shared_ptr convDesc(desc); - auto srcMemDesc = MKLDNNExtensionUtils::makeDescriptor(convDesc->data.src_desc); - auto dstMemDesc = MKLDNNExtensionUtils::makeDescriptor(convDesc->data.dst_desc); + auto srcMemDesc = DnnlExtensionUtils::makeDescriptor(convDesc->data.src_desc); + auto dstMemDesc = DnnlExtensionUtils::makeDescriptor(convDesc->data.dst_desc); auto srcDataType = convDesc->data.src_desc.data_type; auto dstDataType = convDesc->data.dst_desc.data_type; bool isPlanarFloatConv = srcMemDesc->hasLayoutType(LayoutType::ncsp) @@ -943,51 +945,51 @@ bool MKLDNNConvolutionNode::isPossibleToSkipInitConfig(MKLDNNDescriptor &desc) c return !isPossibleJitPlanar && isPlanarFloatConv; } -std::shared_ptr MKLDNNConvolutionNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { +std::shared_ptr Convolution::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { auto desc = idx > 0 ? primitive_desc_it.weights_desc(idx - 1) : primitive_desc_it.src_desc(idx); if (getInputShapeAtPort(idx).isDynamic()) { - return MKLDNNExtensionUtils::makeUndefinedDesc(desc, getInputShapeAtPort(idx)); + return DnnlExtensionUtils::makeUndefinedDesc(desc, getInputShapeAtPort(idx)); } - return MKLDNNExtensionUtils::makeDescriptor(desc); + return DnnlExtensionUtils::makeDescriptor(desc); } -bool MKLDNNConvolutionNode::canFuse(const MKLDNNNodePtr& node) const { +bool Convolution::canFuse(const NodePtr& node) const { return canFuseSimpleOperation(node); } -mkldnn::memory MKLDNNConvolutionNode::getWeights() const { +mkldnn::memory Convolution::getWeights() const { return getParentEdgeAt(1)->getMemory().GetPrimitive(); } -void MKLDNNConvolutionNode::setDynamicBatchLim(int lim) { +void Convolution::setDynamicBatchLim(int lim) { if (!execPtr) { IE_THROW() << "Can't set dynamic batch for Convolution node with name: " << getName() << ", because executor is not compiled"; } if (execPtr->needReordering()) { IE_THROW() << "Can't execute Convolution node with dynamic batch via executor with reorders"; } - MKLDNNNode::setDynamicBatchLim(lim); + Node::setDynamicBatchLim(lim); } -mkldnn::memory MKLDNNConvolutionNode::getBias() const { +mkldnn::memory Convolution::getBias() const { return getParentEdgeAt(2)->getMemory().GetPrimitive(); } -InferenceEngine::Precision MKLDNNConvolutionNode::getRuntimePrecision() const { +InferenceEngine::Precision Convolution::getRuntimePrecision() const { std::vector inputPrecisions; // Don't take bias precision into account size_t inputsNumLimit = 2; for (size_t i = 0; i < std::min(getParentEdges().size(), inputsNumLimit); i++) { auto parentEdge = getParentEdgeAt(i); - if (parentEdge && parentEdge->getStatus() == MKLDNNEdge::Status::Validated) { - inputPrecisions.emplace_back(MKLDNNExtensionUtils::DataTypeToIEPrecision((parentEdge->getMemoryPtr()->GetDataType()))); + if (parentEdge && parentEdge->getStatus() == Edge::Status::Validated) { + inputPrecisions.emplace_back(DnnlExtensionUtils::DataTypeToIEPrecision((parentEdge->getMemoryPtr()->GetDataType()))); } } return getMaxPrecision(inputPrecisions); } -bool MKLDNNConvolutionNode::isNspcAvailable() const { +bool Convolution::isNspcAvailable() const { using impl::cpu::x64::mayiuse; // do not use in non-quantized networks until it is enforced externally @@ -1062,8 +1064,8 @@ bool MKLDNNConvolutionNode::isNspcAvailable() const { return true; } -InferenceEngine::Blob::Ptr MKLDNNConvolutionNode::createInternalBlob(InferenceEngine::SizeVector dims, size_t edgeNum, bool isGrouped) { - const auto constNode = std::dynamic_pointer_cast(getParentEdgeAt(edgeNum)->getParent()); +InferenceEngine::Blob::Ptr Convolution::createInternalBlob(InferenceEngine::SizeVector dims, size_t edgeNum, bool isGrouped) { + const auto constNode = std::dynamic_pointer_cast(getParentEdgeAt(edgeNum)->getParent()); if (!constNode) { IE_THROW() << "Cannot cast " << edgeNum << " input to Input node for " << getName() << "."; } @@ -1084,14 +1086,14 @@ InferenceEngine::Blob::Ptr MKLDNNConvolutionNode::createInternalBlob(InferenceEn cpu_convert(blb->GetPtr(), internalBlob->buffer(), - MKLDNNExtensionUtils::DataTypeToIEPrecision(blb->GetDataType()), + DnnlExtensionUtils::DataTypeToIEPrecision(blb->GetDataType()), internalBlob->getTensorDesc().getPrecision(), elementsCount); return internalBlob; } -void MKLDNNConvolutionNode::prepareParams() { +void Convolution::prepareParams() { auto srcMemPtr = getParentEdgesAtPort(0)[0]->getMemoryPtr(); auto wghMemPtr = getParentEdgesAtPort(1)[0]->getMemoryPtr(); auto dstMemPtr = getOutputMemory(); @@ -1101,7 +1103,7 @@ void MKLDNNConvolutionNode::prepareParams() { IE_THROW() << "Input memory was not allocated."; if (!wghMemPtr || !wghMemPtr->isAllocated()) IE_THROW() << "Weight memory was not allocated."; - MKLDNNMemoryPtr biasMemPtr = nullptr; + MemoryPtr biasMemPtr = nullptr; if (withBiases) { biasMemPtr = getParentEdgesAtPort(2)[0]->getMemoryPtr(); if (!biasMemPtr || !biasMemPtr->isAllocated()) @@ -1161,27 +1163,27 @@ void MKLDNNConvolutionNode::prepareParams() { const std::vector& dilation, const std::vector& paddingL, const std::vector& paddingR, - mkldnn::algorithm alg) -> std::shared_ptr { + mkldnn::algorithm alg) -> std::shared_ptr { mkldnn::memory::desc dnnlBiasDesc; if (biasDescPtr) { // WA to align IR bias representation (3 to 5 rank tensors) to oneDNN representation (1 rank tensor) dnnlBiasDesc = biasDescPtr->getDnnlDesc().reshape({dstDesc.dims()[1]}); } - return std::make_shared(createDescriptorInternal(srcDesc, - wghDesc, - dnnlBiasDesc, - dstDesc, - (biasDescPtr != nullptr), - stride, - dilation, - paddingL, - paddingR, - alg)); + return std::make_shared(createDescriptorInternal(srcDesc, + wghDesc, + dnnlBiasDesc, + dstDesc, + (biasDescPtr != nullptr), + stride, + dilation, + paddingL, + paddingR, + alg)); }; const auto alg = (key.implType & impl_desc_type::winograd) ? mkldnn::algorithm::convolution_winograd : mkldnn::algorithm::convolution_direct; - std::shared_ptr desc = createMkldnnConvDesc(key.inp0->getDnnlDesc(), + std::shared_ptr desc = createMkldnnConvDesc(key.inp0->getDnnlDesc(), key.inp1->getDnnlDesc(), key.out->getDnnlDesc(), key.bias, @@ -1213,25 +1215,25 @@ void MKLDNNConvolutionNode::prepareParams() { } if (!execPtr) { - auto inDesc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(key.inp0->getShape().getStaticDims()), + auto inDesc = mkldnn::memory::desc(DnnlExtensionUtils::convertToDnnlDims(key.inp0->getShape().getStaticDims()), key.inp0->getDataType(), memory::format_tag::any); - auto wghDesc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(key.inp1->getShape().getStaticDims()), + auto wghDesc = mkldnn::memory::desc(DnnlExtensionUtils::convertToDnnlDims(key.inp1->getShape().getStaticDims()), key.inp1->getDataType(), memory::format_tag::any); - auto outDesc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(key.out->getShape().getStaticDims()), + auto outDesc = mkldnn::memory::desc(DnnlExtensionUtils::convertToDnnlDims(key.out->getShape().getStaticDims()), key.out->getDataType(), memory::format_tag::any); - std::shared_ptr reorderConvDesc = createMkldnnConvDesc(inDesc, - wghDesc, - outDesc, - key.bias, - key.stride, - key.dilation, - key.paddingL, - key.paddingR, - mkldnn::algorithm::convolution_direct); + std::shared_ptr reorderConvDesc = createMkldnnConvDesc(inDesc, + wghDesc, + outDesc, + key.bias, + key.stride, + key.dilation, + key.paddingL, + key.paddingR, + mkldnn::algorithm::convolution_direct); auto reordItpd = reorderConvDesc->createPrimitiveDescriptorIterator(engine, key.attr); if (static_cast(reordItpd)) { @@ -1263,13 +1265,13 @@ void MKLDNNConvolutionNode::prepareParams() { } appendZeroPointsArgs(); - MKLDNNNode::appendPostOpArgs(*pAttrLocal, primArgs, postOpsArgs); + Node::appendPostOpArgs(*pAttrLocal, primArgs, postOpsArgs); } else { IE_THROW() << "Primitive descriptor was not found for node " << getName() << "."; } } -MKLDNNConvolutionNode::ConvolutionExecutor::ConvolutionExecutor(const mkldnn::convolution_forward::primitive_desc& pd, +Convolution::ConvolutionExecutor::ConvolutionExecutor(const mkldnn::convolution_forward::primitive_desc& pd, const mkldnn::memory::desc& inMemDesc, const mkldnn::memory::desc& weightMemDesc, const mkldnn::memory::desc& outMemDesc, @@ -1289,14 +1291,14 @@ MKLDNNConvolutionNode::ConvolutionExecutor::ConvolutionExecutor(const mkldnn::co } } -void MKLDNNConvolutionNode::execute(mkldnn::stream strm) { +void Convolution::execute(mkldnn::stream strm) { if (!execPtr) { IE_THROW() << "Can't execute Convolution node with name: " << getName() << ", because executor is not compiled"; } execPtr->exec(primArgs, strm); } -void MKLDNNConvolutionNode::executeDynamicImpl(mkldnn::stream strm) { +void Convolution::executeDynamicImpl(mkldnn::stream strm) { execute(strm); if (withSumBroadcast) { if (!subgraph) { @@ -1317,7 +1319,7 @@ void MKLDNNConvolutionNode::executeDynamicImpl(mkldnn::stream strm) { } } -void MKLDNNConvolutionNode::updatePadding() { +void Convolution::updatePadding() { //update padding. if (isDynamicNode() && autoPadding) { paddingL = shapeInference->get_pads_begin(); @@ -1325,7 +1327,7 @@ void MKLDNNConvolutionNode::updatePadding() { } } -void MKLDNNConvolutionNode::redefineOutputMemory(const std::vector &newOutputShapes) { +void Convolution::redefineOutputMemory(const std::vector &newOutputShapes) { if (withSum) { const size_t sumPortNum = getParentEdges().size() - 1; const auto& sumInpMem = getParentEdgesAtPort(sumPortNum).front()->getMemory(); @@ -1345,17 +1347,17 @@ void MKLDNNConvolutionNode::redefineOutputMemory(const std::vector & withSumBroadcast = false; } } - MKLDNNNode::redefineOutputMemory(newOutputShapes); + Node::redefineOutputMemory(newOutputShapes); } -MemoryDescPtr MKLDNNConvolutionNode::getSumMemDesc(primitive_desc_iterator &primitive_desc_it) { +MemoryDescPtr Convolution::getSumMemDesc(primitive_desc_iterator &primitive_desc_it) { if (getOutputShapeAtPort(0).isDynamic()) { - return MKLDNNExtensionUtils::makeUndefinedDesc(primitive_desc_it.dst_desc(0), getInputShapeAtPort(getParentEdges().size() - 1)); + return DnnlExtensionUtils::makeUndefinedDesc(primitive_desc_it.dst_desc(0), getInputShapeAtPort(getParentEdges().size() - 1)); } - return MKLDNNExtensionUtils::makeDescriptor(primitive_desc_it.dst_desc(0)); + return DnnlExtensionUtils::makeDescriptor(primitive_desc_it.dst_desc(0)); } -MKLDNNMemoryPtr MKLDNNConvolutionNode::getOutputMemory() const { +MemoryPtr Convolution::getOutputMemory() const { if (withSumBroadcast) { if (!subgraph) { IE_THROW(Unexpected) << "Fused ops subgraph has not been created in " << getTypeStr() << " with name " << getName(); @@ -1367,10 +1369,10 @@ MKLDNNMemoryPtr MKLDNNConvolutionNode::getOutputMemory() const { } } -void MKLDNNConvolutionNode::addFusedNode(const MKLDNNNodePtr &fusingNode) { - if (Eltwise == fusingNode->getType()) { - if (fusingNode->getAlgorithm() == EltwiseAdd) { - auto eltwiseNode = std::dynamic_pointer_cast(fusingNode); +void Convolution::addFusedNode(const NodePtr &fusingNode) { + if (Type::Eltwise == fusingNode->getType()) { + if (fusingNode->getAlgorithm() == Algorithm::EltwiseAdd) { + auto eltwiseNode = std::dynamic_pointer_cast(fusingNode); if (eltwiseNode && eltwiseNode->isSpecialConvolutionAddFusing()) { withSum = true; } @@ -1385,10 +1387,10 @@ void MKLDNNConvolutionNode::addFusedNode(const MKLDNNNodePtr &fusingNode) { } } } - MKLDNNNode::addFusedNode(fusingNode); + Node::addFusedNode(fusingNode); } -void MKLDNNConvolutionNode::appendZeroPointsArgs() { +void Convolution::appendZeroPointsArgs() { if (inputZeroPointsMemPtr != nullptr) { primArgs[DNNL_ARG_ATTR_ZERO_POINTS | DNNL_ARG_SRC] = inputZeroPointsMemPtr->GetPrimitive(); } @@ -1399,4 +1401,7 @@ void MKLDNNConvolutionNode::appendZeroPointsArgs() { primArgs[DNNL_ARG_ATTR_ZERO_POINTS | DNNL_ARG_DST] = outputCompensationMemPtr->GetPrimitive(); } } -REG_MKLDNN_PRIM_FOR(MKLDNNConvolutionNode, Convolution); + +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/conv.h b/src/plugins/intel_cpu/src/nodes/conv.h index 16ec521477f..a4fa9fa4c4e 100644 --- a/src/plugins/intel_cpu/src/nodes/conv.h +++ b/src/plugins/intel_cpu/src/nodes/conv.h @@ -13,12 +13,13 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNEltwiseNode; +class Eltwise; -class MKLDNNConvolutionNode : public MKLDNNNode { +class Convolution : public Node { public: - MKLDNNConvolutionNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + Convolution(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; void getSupportedDescriptors() override; @@ -38,7 +39,7 @@ public: mkldnn::memory getWeights() const; mkldnn::memory getBias() const; - size_t descInputNumbers(MKLDNNDescriptor desc) override { + size_t descInputNumbers(DnnlDesriptor desc) override { return getOriginalInputsNumber(); } @@ -55,7 +56,7 @@ public: const std::vector &getPaddingL() { return paddingL; } const std::vector &getPaddingR() { return paddingR; } - bool canFuse(const MKLDNNNodePtr& node) const override; + bool canFuse(const NodePtr& node) const override; bool isDepthWise() const { return isGrouped && 1 == groupOC && 1 == groupIC; } @@ -65,9 +66,9 @@ public: void setDynamicBatchLim(int lim) override; protected: - InferenceEngine::Precision fusedEltwisePrecision(const MKLDNNNodePtr& fusingNode) const; + InferenceEngine::Precision fusedEltwisePrecision(const NodePtr& fusingNode) const; void redefineOutputMemory(const std::vector &newOutputShapes) override; - void addFusedNode(const MKLDNNNodePtr &fusingNode) override; + void addFusedNode(const NodePtr &fusingNode) override; private: class FusedSubgraph; @@ -91,13 +92,13 @@ private: void addZeroPoints(mkldnn::primitive_attr& attr); void setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims, bool initWeights); void filterSupportedDescriptors(); - bool isPossibleToSkipInitConfig(MKLDNNDescriptor &desc) const; + bool isPossibleToSkipInitConfig(DnnlDesriptor &desc) const; bool isNspcAvailable() const; InferenceEngine::Blob::Ptr createInternalBlob(InferenceEngine::SizeVector dims, size_t edgeNum, bool isGrouped = false); void updatePadding(); MemoryDescPtr getSumMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it); - MKLDNNMemoryPtr getOutputMemory() const; + MemoryPtr getOutputMemory() const; void appendZeroPointsArgs(); @@ -135,12 +136,13 @@ private: AttrPtr pAttr; bool autoPadding = false; FusedSubgraphPtr subgraph; - std::unordered_map> fusedConstNodes; + std::unordered_map> fusedConstNodes; - MKLDNNMemoryPtr inputZeroPointsMemPtr; - MKLDNNMemoryPtr weightsZeroPointsMemPtr; - MKLDNNMemoryPtr outputCompensationMemPtr; + MemoryPtr inputZeroPointsMemPtr; + MemoryPtr weightsZeroPointsMemPtr; + MemoryPtr outputCompensationMemPtr; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/convert.cpp b/src/plugins/intel_cpu/src/nodes/convert.cpp index 72f6af480f7..3012d334040 100644 --- a/src/plugins/intel_cpu/src/nodes/convert.cpp +++ b/src/plugins/intel_cpu/src/nodes/convert.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // -#include +#include #include "convert.h" #include "common/cpu_convert.h" #include "common/blocked_desc_creator.h" @@ -11,10 +11,13 @@ #include using namespace mkldnn; -using namespace ov::intel_cpu; using namespace InferenceEngine; -bool MKLDNNConvertNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +namespace ov { +namespace intel_cpu { +namespace node { + +bool Convert::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { const auto convert = std::dynamic_pointer_cast(op); if (!convert) { @@ -27,8 +30,8 @@ bool MKLDNNConvertNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) - : MKLDNNNode(op, eng, cache) { +Convert::Convert(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache) + : Node(op, eng, cache) { std::string errorMessage; if (isSupportedOperation(op, errorMessage)) { errorPrefix = "Convert node with name '" + getName() + "'"; @@ -40,13 +43,13 @@ MKLDNNConvertNode::MKLDNNConvertNode(const std::shared_ptr& op, co origPrc = details::convertPrecision(convert->get_destination_type()); } -std::vector MKLDNNConvertNode::shapeInfer() const { +std::vector Convert::shapeInfer() const { return std::vector{getParentEdgesAtPort(0)[0]->getMemory().getStaticDims()}; } -MKLDNNConvertNode::MKLDNNConvertNode(const Shape &shape, const InferenceEngine::Precision &inPrc, const InferenceEngine::Precision &outPrc, - const std::string &nodeName, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) - : MKLDNNNode("Convert", nodeName, eng, cache) +Convert::Convert(const Shape &shape, const InferenceEngine::Precision &inPrc, const InferenceEngine::Precision &outPrc, + const std::string &nodeName, const mkldnn::engine& eng, WeightsSharing::Ptr &cache) + : Node("Convert", nodeName, eng, cache) , origPrc(outPrc) { inputShapes.push_back(shape); addOriginalInputPrecision(inPrc); @@ -58,7 +61,7 @@ MKLDNNConvertNode::MKLDNNConvertNode(const Shape &shape, const InferenceEngine:: errorPrefix = "Convert node with name '" + getName() + "'"; } -void MKLDNNConvertNode::getSupportedDescriptors() { +void Convert::getSupportedDescriptors() { // if tensor descriptors are set via setDescs method we need to update the inDims/outDims data // from correspond tensor descriptors. if (outputShapes.empty()) @@ -71,14 +74,14 @@ void MKLDNNConvertNode::getSupportedDescriptors() { IE_THROW() << errorPrefix << " has incorrect number of output edges"; } -bool MKLDNNConvertNode::isSupportedDesc(const MemoryDesc &desc) { +bool Convert::isSupportedDesc(const MemoryDesc &desc) { bool isSupported = desc.getType() & MemoryDescType::Blocked; if (desc.getType() == MemoryDescType::DnnlBlocked) isSupported &= desc.as()->hasEmptyExtraData(); return isSupported; } -void MKLDNNConvertNode::initSupportedPrimitiveDescriptors() { +void Convert::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -129,11 +132,11 @@ void MKLDNNConvertNode::initSupportedPrimitiveDescriptors() { } } -void MKLDNNConvertNode::executeDynamicImpl(mkldnn::stream strm) { +void Convert::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -void MKLDNNConvertNode::execute(mkldnn::stream strm) { +void Convert::execute(mkldnn::stream strm) { auto& parentMem = getParentEdgeAt(0)->getMemory(); auto& childMem = getChildEdgeAt(0)->getMemory(); @@ -154,8 +157,10 @@ void MKLDNNConvertNode::execute(mkldnn::stream strm) { parentPaddElemCount); } -bool MKLDNNConvertNode::created() const { - return getType() == Convert; +bool Convert::created() const { + return getType() == Type::Convert; } -REG_MKLDNN_PRIM_FOR(MKLDNNConvertNode, Convert); +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/convert.h b/src/plugins/intel_cpu/src/nodes/convert.h index d725b26fee3..4dafb06de9e 100644 --- a/src/plugins/intel_cpu/src/nodes/convert.h +++ b/src/plugins/intel_cpu/src/nodes/convert.h @@ -11,12 +11,13 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNConvertNode : public MKLDNNNode { +class Convert : public Node { public: - MKLDNNConvertNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); - MKLDNNConvertNode(const Shape &shape, const InferenceEngine::Precision &inPrc, const InferenceEngine::Precision &outPrc, - const std::string &nodeName, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + Convert(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); + Convert(const Shape &shape, const InferenceEngine::Precision &inPrc, const InferenceEngine::Precision &outPrc, + const std::string &nodeName, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; @@ -29,7 +30,7 @@ public: // This is the interface extension designed to provide inp and output tensor descriptors without the CNNLayer. // In that case the Convert node is instantiated with default CNNLayer and inp/out tensor descriptors are set via this method. - // This is useful if the Convert node is added to the graph as an auxiliary operation at the MKLDNNGraph + // This is useful if the Convert node is added to the graph as an auxiliary operation at the Graph // initialization stage. void setDescs(const MemoryDesc& input, const MemoryDesc& output) { this->input = input.clone(); @@ -54,5 +55,6 @@ private: std::string errorPrefix; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder.cpp b/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder.cpp index e4873a4fae4..0f52e27e358 100644 --- a/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder.cpp +++ b/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder.cpp @@ -9,10 +9,13 @@ #include "ie_parallel.hpp" #include "ctc_greedy_decoder.h" -using namespace ov::intel_cpu; using namespace InferenceEngine; -bool MKLDNNCTCGreedyDecoderNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +namespace ov { +namespace intel_cpu { +namespace node { + +bool CTCGreedyDecoder::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { const auto greedyDecOp = ngraph::as_type_ptr(op); if (!greedyDecOp) { @@ -25,8 +28,8 @@ bool MKLDNNCTCGreedyDecoderNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { +CTCGreedyDecoder::CTCGreedyDecoder(const std::shared_ptr& op, const mkldnn::engine& eng, + WeightsSharing::Ptr &cache) : Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -48,7 +51,7 @@ MKLDNNCTCGreedyDecoderNode::MKLDNNCTCGreedyDecoderNode(const std::shared_ptrget_ctc_merge_repeated(); } -void MKLDNNCTCGreedyDecoderNode::initSupportedPrimitiveDescriptors() { +void CTCGreedyDecoder::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -66,7 +69,7 @@ void MKLDNNCTCGreedyDecoderNode::initSupportedPrimitiveDescriptors() { impl_desc_type::ref_any); } -void MKLDNNCTCGreedyDecoderNode::execute(mkldnn::stream strm) { +void CTCGreedyDecoder::execute(mkldnn::stream strm) { const float* probabilities = reinterpret_cast(getParentEdgeAt(DATA_INDEX)->getMemoryPtr()->GetPtr()); const float* sequenceMask = reinterpret_cast(getParentEdgeAt(SEQUENCE_LENGTH_INDEX)->getMemoryPtr()->GetPtr()); float* outputSequences = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); @@ -161,16 +164,18 @@ void MKLDNNCTCGreedyDecoderNode::execute(mkldnn::stream strm) { }); } -bool MKLDNNCTCGreedyDecoderNode::created() const { - return getType() == CTCGreedyDecoder; +bool CTCGreedyDecoder::created() const { + return getType() == Type::CTCGreedyDecoder; } -void MKLDNNCTCGreedyDecoderNode::executeDynamicImpl(mkldnn::stream strm) { +void CTCGreedyDecoder::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -bool MKLDNNCTCGreedyDecoderNode::needPrepareParams() const { +bool CTCGreedyDecoder::needPrepareParams() const { return false; } -REG_MKLDNN_PRIM_FOR(MKLDNNCTCGreedyDecoderNode, CTCGreedyDecoder) +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder.h b/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder.h index 92851cf9c1b..4be3177fb05 100644 --- a/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder.h +++ b/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder.h @@ -9,10 +9,11 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNCTCGreedyDecoderNode : public MKLDNNNode { +class CTCGreedyDecoder : public Node { public: - MKLDNNCTCGreedyDecoderNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + CTCGreedyDecoder(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; @@ -30,5 +31,6 @@ private: std::string errorPrefix; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder_seq_len.cpp b/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder_seq_len.cpp index 99c1092232e..c543df3fb58 100644 --- a/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder_seq_len.cpp +++ b/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder_seq_len.cpp @@ -9,10 +9,13 @@ #include "ie_parallel.hpp" #include "ctc_greedy_decoder_seq_len.h" -using namespace ov::intel_cpu; using namespace InferenceEngine; -bool MKLDNNCTCGreedyDecoderSeqLenNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +namespace ov { +namespace intel_cpu { +namespace node { + +bool CTCGreedyDecoderSeqLen::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { const auto greedyDecOp = ngraph::as_type_ptr(op); if (!greedyDecOp) { @@ -25,8 +28,8 @@ bool MKLDNNCTCGreedyDecoderSeqLenNode::isSupportedOperation(const std::shared_pt return true; } -MKLDNNCTCGreedyDecoderSeqLenNode::MKLDNNCTCGreedyDecoderSeqLenNode(const std::shared_ptr& op, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { +CTCGreedyDecoderSeqLen::CTCGreedyDecoderSeqLen(const std::shared_ptr& op, const mkldnn::engine& eng, + WeightsSharing::Ptr &cache) : Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -47,7 +50,7 @@ MKLDNNCTCGreedyDecoderSeqLenNode::MKLDNNCTCGreedyDecoderSeqLenNode(const std::sh mergeRepeated = greedyDecOp->get_merge_repeated(); } -void MKLDNNCTCGreedyDecoderSeqLenNode::initSupportedPrimitiveDescriptors() { +void CTCGreedyDecoderSeqLen::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -71,7 +74,7 @@ void MKLDNNCTCGreedyDecoderSeqLenNode::initSupportedPrimitiveDescriptors() { impl_desc_type::ref_any); } -void MKLDNNCTCGreedyDecoderSeqLenNode::execute(mkldnn::stream strm) { +void CTCGreedyDecoderSeqLen::execute(mkldnn::stream strm) { const float* probabilities = reinterpret_cast(getParentEdgeAt(DATA_INDEX)->getMemoryPtr()->GetPtr()); const int* sequenceLengths = reinterpret_cast(getParentEdgeAt(SEQUENCE_LENGTH_INDEX)->getMemoryPtr()->GetPtr()); int* decodedClasses = reinterpret_cast(getChildEdgesAtPort(DECODED_CLASSES_INDEX)[0]->getMemoryPtr()->GetPtr()); @@ -164,16 +167,18 @@ void MKLDNNCTCGreedyDecoderSeqLenNode::execute(mkldnn::stream strm) { }); } -bool MKLDNNCTCGreedyDecoderSeqLenNode::created() const { - return getType() == CTCGreedyDecoderSeqLen; +bool CTCGreedyDecoderSeqLen::created() const { + return getType() == Type::CTCGreedyDecoderSeqLen; } -void MKLDNNCTCGreedyDecoderSeqLenNode::executeDynamicImpl(mkldnn::stream strm) { +void CTCGreedyDecoderSeqLen::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -bool MKLDNNCTCGreedyDecoderSeqLenNode::needPrepareParams() const { +bool CTCGreedyDecoderSeqLen::needPrepareParams() const { return false; } -REG_MKLDNN_PRIM_FOR(MKLDNNCTCGreedyDecoderSeqLenNode, CTCGreedyDecoderSeqLen) +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder_seq_len.h b/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder_seq_len.h index 5a391fa2212..0f058138e9f 100644 --- a/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder_seq_len.h +++ b/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder_seq_len.h @@ -9,10 +9,11 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNCTCGreedyDecoderSeqLenNode : public MKLDNNNode { +class CTCGreedyDecoderSeqLen : public Node { public: - MKLDNNCTCGreedyDecoderSeqLenNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + CTCGreedyDecoderSeqLen(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; @@ -34,5 +35,6 @@ private: std::string errorPrefix; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/ctc_loss.cpp b/src/plugins/intel_cpu/src/nodes/ctc_loss.cpp index f1e88fbeb4b..e9aaa110b69 100644 --- a/src/plugins/intel_cpu/src/nodes/ctc_loss.cpp +++ b/src/plugins/intel_cpu/src/nodes/ctc_loss.cpp @@ -8,10 +8,13 @@ #include "ie_parallel.hpp" #include "ctc_loss.h" -using namespace ov::intel_cpu; using namespace InferenceEngine; -bool MKLDNNCTCLossNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +namespace ov { +namespace intel_cpu { +namespace node { + +bool CTCLoss::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { const auto ctcLossOp = ngraph::as_type_ptr(op); if (!ctcLossOp) { @@ -24,8 +27,8 @@ bool MKLDNNCTCLossNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { +CTCLoss::CTCLoss(const std::shared_ptr& op, const mkldnn::engine& eng, + WeightsSharing::Ptr &cache) : Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -42,7 +45,7 @@ MKLDNNCTCLossNode::MKLDNNCTCLossNode(const std::shared_ptr& op, co unique = ctcLossOp->get_unique(); } -void MKLDNNCTCLossNode::initSupportedPrimitiveDescriptors() { +void CTCLoss::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -57,11 +60,11 @@ void MKLDNNCTCLossNode::initSupportedPrimitiveDescriptors() { impl_desc_type::ref_any); } -void MKLDNNCTCLossNode::executeDynamicImpl(mkldnn::stream strm) { +void CTCLoss::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -void MKLDNNCTCLossNode::execute(mkldnn::stream strm) { +void CTCLoss::execute(mkldnn::stream strm) { StatusCode returnCode = OK; const float* logits = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); @@ -277,8 +280,10 @@ void MKLDNNCTCLossNode::execute(mkldnn::stream strm) { parallel_nt(0, threadBody_3); } -bool MKLDNNCTCLossNode::created() const { - return getType() == CTCLoss; +bool CTCLoss::created() const { + return getType() == Type::CTCLoss; } -REG_MKLDNN_PRIM_FOR(MKLDNNCTCLossNode, CTCLoss) +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/ctc_loss.h b/src/plugins/intel_cpu/src/nodes/ctc_loss.h index e41475b7e55..98a634c2c60 100644 --- a/src/plugins/intel_cpu/src/nodes/ctc_loss.h +++ b/src/plugins/intel_cpu/src/nodes/ctc_loss.h @@ -9,10 +9,11 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNCTCLossNode : public MKLDNNNode { +class CTCLoss : public Node { public: - MKLDNNCTCLossNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + CTCLoss(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; @@ -32,5 +33,6 @@ private: std::string errorPrefix; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/cum_sum.cpp b/src/plugins/intel_cpu/src/nodes/cum_sum.cpp index b6f2df65a21..03c495857f9 100644 --- a/src/plugins/intel_cpu/src/nodes/cum_sum.cpp +++ b/src/plugins/intel_cpu/src/nodes/cum_sum.cpp @@ -1,7 +1,6 @@ // Copyright (C) 2018-2022 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include #include @@ -14,10 +13,13 @@ #include "cum_sum.h" #include "utils/bfloat16.hpp" -using namespace ov::intel_cpu; using namespace InferenceEngine; -bool MKLDNNCumSumNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +namespace ov { +namespace intel_cpu { +namespace node { + +bool CumSum::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { const auto cumsum = std::dynamic_pointer_cast(op); if (!cumsum) { @@ -30,8 +32,8 @@ bool MKLDNNCumSumNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { +CumSum::CumSum(const std::shared_ptr& op, const mkldnn::engine& eng, + WeightsSharing::Ptr &cache) : Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -66,7 +68,7 @@ MKLDNNCumSumNode::MKLDNNCumSumNode(const std::shared_ptr& op, cons IE_THROW() << errorPrefix << " has different 'data' input and output dimensions"; } -void MKLDNNCumSumNode::initSupportedPrimitiveDescriptors() { +void CumSum::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -91,7 +93,7 @@ void MKLDNNCumSumNode::initSupportedPrimitiveDescriptors() { impl_desc_type::ref_any); } -void MKLDNNCumSumNode::execute(mkldnn::stream strm) { +void CumSum::execute(mkldnn::stream strm) { if (inputShapes.size() == numOfInputs) axis = getAxis(getParentEdgeAt(AXIS)->getMemory(), getParentEdgeAt(CUM_SUM_DATA)->getMemory()); @@ -107,7 +109,7 @@ void MKLDNNCumSumNode::execute(mkldnn::stream strm) { } template -void MKLDNNCumSumNode::exec() { +void CumSum::exec() { const auto *input = reinterpret_cast(getParentEdgeAt(CUM_SUM_DATA)->getMemoryPtr()->GetPtr()); auto *output = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); const VectorDims strides = getParentEdgeAt(CUM_SUM_DATA)->getMemory().GetDescWithType()->getStrides(); @@ -128,7 +130,7 @@ void MKLDNNCumSumNode::exec() { } template -void MKLDNNCumSumNode::cumSum(const dataType *input, dataType *output, const VectorDims &strides) { +void CumSum::cumSum(const dataType *input, dataType *output, const VectorDims &strides) { SizeVector iterationRange(numOfDims - 1); size_t j = 0; const auto &shape = getParentEdgesAtPort(CUM_SUM_DATA)[0]->getMemory().getStaticDims(); @@ -192,7 +194,7 @@ void MKLDNNCumSumNode::cumSum(const dataType *input, dataType *output, const Vec }); } -void MKLDNNCumSumNode::parallelItInit(size_t start, std::vector& counters, const std::vector& iterationRange) { +void CumSum::parallelItInit(size_t start, std::vector& counters, const std::vector& iterationRange) { auto itCounter = counters.rbegin(); auto itWork = iterationRange.rbegin(); while (itCounter != counters.rend() && itWork != iterationRange.rend()) { @@ -203,7 +205,7 @@ void MKLDNNCumSumNode::parallelItInit(size_t start, std::vector& counter } } -inline void MKLDNNCumSumNode::parallelItStep(std::vector& counters, const std::vector& iterationRange) { +inline void CumSum::parallelItStep(std::vector& counters, const std::vector& iterationRange) { auto itCounter = counters.rbegin(); auto itWork = iterationRange.rbegin(); @@ -217,7 +219,7 @@ inline void MKLDNNCumSumNode::parallelItStep(std::vector& counters, cons } } -inline size_t MKLDNNCumSumNode::getStartOffset(const std::vector &forStartOffset, const std::vector& strides) const { +inline size_t CumSum::getStartOffset(const std::vector &forStartOffset, const std::vector& strides) const { size_t startOffset = 0; for (size_t idx = 0; idx < forStartOffset.size(); ++idx) { startOffset += forStartOffset[idx] * strides[idx]; @@ -225,7 +227,7 @@ inline size_t MKLDNNCumSumNode::getStartOffset(const std::vector &forSta return startOffset; } -size_t MKLDNNCumSumNode::getAxis(const MKLDNNMemory& _axis, const MKLDNNMemory& _data) const { +size_t CumSum::getAxis(const Memory& _axis, const Memory& _data) const { const auto& axisPrecision = _axis.getDesc().getPrecision(); const int64_t dataShapeSize = static_cast(_data.GetShape().getRank()); int64_t axisValueFromBlob = 0; @@ -249,16 +251,18 @@ size_t MKLDNNCumSumNode::getAxis(const MKLDNNMemory& _axis, const MKLDNNMemory& return axisValueFromBlob >= 0 ? axisValueFromBlob : (axisValueFromBlob + dataShapeSize); } -bool MKLDNNCumSumNode::created() const { - return getType() == CumSum; +bool CumSum::created() const { + return getType() == Type::CumSum; } -bool MKLDNNCumSumNode::needPrepareParams() const { +bool CumSum::needPrepareParams() const { return false; } -void MKLDNNCumSumNode::executeDynamicImpl(mkldnn::stream strm) { +void CumSum::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -REG_MKLDNN_PRIM_FOR(MKLDNNCumSumNode, CumSum) +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/cum_sum.h b/src/plugins/intel_cpu/src/nodes/cum_sum.h index 73c7060819f..caf8a656879 100644 --- a/src/plugins/intel_cpu/src/nodes/cum_sum.h +++ b/src/plugins/intel_cpu/src/nodes/cum_sum.h @@ -9,10 +9,11 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNCumSumNode : public MKLDNNNode { +class CumSum : public Node { public: - MKLDNNCumSumNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + CumSum(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; @@ -37,7 +38,7 @@ private: inline size_t getStartOffset(const std::vector &forStartOffset, const std::vector& strides) const; - size_t getAxis(const MKLDNNMemory& _axis, const MKLDNNMemory& _data) const; + size_t getAxis(const Memory& _axis, const Memory& _data) const; enum { CUM_SUM_DATA, AXIS, numOfInputs }; bool exclusive; @@ -50,11 +51,12 @@ private: template struct CumSumExecute { - void operator()(MKLDNNCumSumNode* node) { + void operator()(CumSum* node) { node->exec(); } }; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/deconv.cpp b/src/plugins/intel_cpu/src/nodes/deconv.cpp index adb012da93a..a6fc40bc33a 100644 --- a/src/plugins/intel_cpu/src/nodes/deconv.cpp +++ b/src/plugins/intel_cpu/src/nodes/deconv.cpp @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include "ie_parallel.hpp" #include "utils/general_utils.h" #include @@ -26,10 +26,13 @@ #include "convolution_shape_inference.hpp" using namespace mkldnn; -using namespace ov::intel_cpu; using namespace InferenceEngine; -bool MKLDNNDeconvolutionNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +namespace ov { +namespace intel_cpu { +namespace node { + +bool Deconvolution::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { if (std::dynamic_pointer_cast(op) == nullptr && std::dynamic_pointer_cast(op) == nullptr) { @@ -51,10 +54,10 @@ bool MKLDNNDeconvolutionNode::isSupportedOperation(const std::shared_ptr& op, - const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { +Deconvolution::Deconvolution(const std::shared_ptr& op, + const mkldnn::engine& eng, WeightsSharing::Ptr &cache) : Node(op, eng, cache) { internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> DnnlMemoryDescPtr { - return MKLDNNExtensionUtils::makeDescriptor(primitive_desc_it.weights_desc(0)); + return DnnlExtensionUtils::makeDescriptor(primitive_desc_it.weights_desc(0)); }); std::string errorMessage; if (isSupportedOperation(op, errorMessage)) { @@ -65,7 +68,7 @@ MKLDNNDeconvolutionNode::MKLDNNDeconvolutionNode(const std::shared_ptrget_auto_pad(), ov::op::PadType::SAME_LOWER, ov::op::PadType::SAME_UPPER); } else if (groupConvBackprop) { - algorithm = DeconvolutionGrouped; + algorithm = Algorithm::DeconvolutionGrouped; groupNum = weightDims[0]; IC = groupNum * weightDims[1]; @@ -130,8 +133,8 @@ MKLDNNDeconvolutionNode::MKLDNNDeconvolutionNode(const std::shared_ptr(); } -InferenceEngine::Blob::Ptr MKLDNNDeconvolutionNode::createWeiBlobAsIO(InferenceEngine::SizeVector dims) { - auto constNode = std::dynamic_pointer_cast(getParentEdgeAt(1)->getParent()); +InferenceEngine::Blob::Ptr Deconvolution::createWeiBlobAsIO(InferenceEngine::SizeVector dims) { + auto constNode = std::dynamic_pointer_cast(getParentEdgeAt(1)->getParent()); if (!constNode) IE_THROW() << "Cannot cast const input node for node " << getName() << "."; auto blb = constNode->getMemoryPtr(); @@ -154,7 +157,7 @@ InferenceEngine::Blob::Ptr MKLDNNDeconvolutionNode::createWeiBlobAsIO(InferenceE orderForBlockedDesc.push_back(i); BlockingDesc blkDesc(dimsForBlockedDesc, orderForBlockedDesc); - InferenceEngine::TensorDesc tensorDesc(MKLDNNExtensionUtils::DataTypeToIEPrecision(blb->GetDataType()), dims, blkDesc); + InferenceEngine::TensorDesc tensorDesc(DnnlExtensionUtils::DataTypeToIEPrecision(blb->GetDataType()), dims, blkDesc); Blob::Ptr internalBlob = InferenceEngine::make_shared_blob(tensorDesc); internalBlob->allocate(); @@ -172,8 +175,8 @@ InferenceEngine::Blob::Ptr MKLDNNDeconvolutionNode::createWeiBlobAsIO(InferenceE return internalBlob; } -bool MKLDNNDeconvolutionNode::canBeExecutedInInt8() const { - if (std::dynamic_pointer_cast(getParentEdgeAt(1)->getParent()) == nullptr) { +bool Deconvolution::canBeExecutedInInt8() const { + if (std::dynamic_pointer_cast(getParentEdgeAt(1)->getParent()) == nullptr) { return false; } @@ -208,10 +211,10 @@ bool MKLDNNDeconvolutionNode::canBeExecutedInInt8() const { return false; InferenceEngine::Precision inPrecision = getOriginalInputPrecisionAtPort(0); - auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(inPrecision); + auto inputDataType = DnnlExtensionUtils::IEPrecisionToDataType(inPrecision); InferenceEngine::Precision weiPrecision = getOriginalInputPrecisionAtPort(1); - auto weightsDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(weiPrecision); + auto weightsDataType = DnnlExtensionUtils::IEPrecisionToDataType(weiPrecision); if (isDW && (inputDataType == dnnl_s8 || dilation.size() == 3)) return false; @@ -219,14 +222,14 @@ bool MKLDNNDeconvolutionNode::canBeExecutedInInt8() const { return (inputDataType == dnnl_s8 || inputDataType == dnnl_u8) && weightsDataType == dnnl_s8; } -bool MKLDNNDeconvolutionNode::canFuse(const MKLDNNNodePtr& node) const { +bool Deconvolution::canFuse(const NodePtr& node) const { if (canBeExecutedInInt8()) return canFuseSimpleOperation(node); return (fusedWith.empty() && node->canBePerformedAsScaleShift(this)); } -std::pair MKLDNNDeconvolutionNode::makeDummyInOutShape() { +std::pair Deconvolution::makeDummyInOutShape() { auto inShape = MemoryDescUtils::makeDummyShape(getInputShapeAtPort(0)); auto outShape = getOutputShapeAtPort(0); @@ -252,7 +255,7 @@ std::pair MKLDNNDeconvolutionNode::makeDummyInOutShape() const auto& origInDims = getInputShapeAtPort(0).getDims(); const auto& weightDims = getWeightDims(); - const size_t wghOffset = getAlgorithm() == DeconvolutionGrouped ? 1 : 0; + const size_t wghOffset = getAlgorithm() == Algorithm::DeconvolutionGrouped ? 1 : 0; for (size_t i = 0; i < inputDims.size() - 2; i++) { if (origInDims[2 + i] == Shape::UNDEFINED_DIM) { inputDims[2 + i] = ((lastOutputSpatialDims[i] - (dilation[i] + 1) * @@ -269,7 +272,7 @@ std::pair MKLDNNDeconvolutionNode::makeDummyInOutShape() return {inShape.getStaticDims(), outShape.getStaticDims()}; } -void MKLDNNDeconvolutionNode::getSupportedDescriptors() { +void Deconvolution::getSupportedDescriptors() { isInt8 = canBeExecutedInInt8(); InferenceEngine::Precision inPrecision = getOriginalInputPrecisionAtPort(0); @@ -286,12 +289,12 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() { if (!one_of(outPrecision, InferenceEngine::Precision::FP32, InferenceEngine::Precision::BF16)) outPrecision = InferenceEngine::Precision::FP32; } - auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(inPrecision); - auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(outPrecision); + auto inputDataType = DnnlExtensionUtils::IEPrecisionToDataType(inPrecision); + auto outputDataType = DnnlExtensionUtils::IEPrecisionToDataType(outPrecision); if (inputDataType == memory::data_type::bf16 || outputDataType == memory::data_type::bf16) inputDataType = outputDataType = memory::data_type::bf16; if (!fusedWith.empty()) { - outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0)); + outputDataType = DnnlExtensionUtils::IEPrecisionToDataType(fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0)); } if (getParentEdges().size() != 2 && getParentEdges().size() != 3) @@ -324,9 +327,9 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() { setPostOps(*attr, outShape.getStaticDims()); } -void MKLDNNDeconvolutionNode::initPaddingR(const Shape &inShape, const Shape &outShape) { +void Deconvolution::initPaddingR(const Shape &inShape, const Shape &outShape) { for (int i = 0; i < paddingR.size(); i++) { - int with_group = getAlgorithm() == DeconvolutionGrouped ? 1 : 0; + int with_group = getAlgorithm() == Algorithm::DeconvolutionGrouped ? 1 : 0; const auto& weightDims = getWeightDims(); int krn = weightDims[with_group + 2 + i]; int src = outShape.getStaticDims()[2 + i]; @@ -338,7 +341,7 @@ void MKLDNNDeconvolutionNode::initPaddingR(const Shape &inShape, const Shape &ou } } -void MKLDNNDeconvolutionNode::setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims) { +void Deconvolution::setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims) { mkldnn::post_ops ops; auto getBinPostOpShape = [&](){ @@ -350,13 +353,13 @@ void MKLDNNDeconvolutionNode::setPostOps(mkldnn::primitive_attr &attr, const Vec }; for (auto &node : fusedWith) { - if (auto* eltwiseNode = dynamic_cast(node.get())) { + if (auto* eltwiseNode = dynamic_cast(node.get())) { // TODO [DS]: change to shape from memory // use legacy depthwise since backprop convolution does not support binary post ops eltwiseNode->appendPostOps(ops, dims, postOpsArgs); continue; } - if (auto* fakeQuantizeNode = dynamic_cast(node.get())) { + if (auto* fakeQuantizeNode = dynamic_cast(node.get())) { fakeQuantizeNode->appendBinPostOps(ops, getBinPostOpShape(), postOpsArgs); continue; } @@ -366,12 +369,12 @@ void MKLDNNDeconvolutionNode::setPostOps(mkldnn::primitive_attr &attr, const Vec attr.set_post_ops(ops); } -void MKLDNNDeconvolutionNode::filterSupportedPrimitiveDescriptors() { - MKLDNNNode::filterSupportedPrimitiveDescriptors(); +void Deconvolution::filterSupportedPrimitiveDescriptors() { + Node::filterSupportedPrimitiveDescriptors(); filterSupportedDescriptors(); } -void MKLDNNDeconvolutionNode::filterSupportedDescriptors() { +void Deconvolution::filterSupportedDescriptors() { if (!inputMemoryFormatsFilter.empty() || !outputMemoryFormatsFilter.empty()) { if (inputMemoryFormatsFilter.size() > 1 || outputMemoryFormatsFilter.size() > 1) { IE_THROW() << "Incorrect number of input or output memory formats for Deconvolution node"; @@ -381,19 +384,19 @@ void MKLDNNDeconvolutionNode::filterSupportedDescriptors() { bool isSuitableDesc = true; if (!inputMemoryFormatsFilter.empty()) { if (isInt8) { - auto src_tdesc = MKLDNNExtensionUtils::makeDescriptor(std::shared_ptr(*itd)->data.src_desc); + auto src_tdesc = DnnlExtensionUtils::makeDescriptor(std::shared_ptr(*itd)->data.src_desc); isSuitableDesc &= src_tdesc->isSame(inputMemoryFormatsFilter[0]); } else { - auto src_tdesc = MKLDNNExtensionUtils::makeDescriptor(std::shared_ptr(*itd)->data.diff_src_desc); + auto src_tdesc = DnnlExtensionUtils::makeDescriptor(std::shared_ptr(*itd)->data.diff_src_desc); isSuitableDesc &= src_tdesc->isSame(inputMemoryFormatsFilter[0]); } } if (!outputMemoryFormatsFilter.empty()) { if (isInt8) { - auto dst_tdesc = MKLDNNExtensionUtils::makeDescriptor(std::shared_ptr(*itd)->data.dst_desc); + auto dst_tdesc = DnnlExtensionUtils::makeDescriptor(std::shared_ptr(*itd)->data.dst_desc); isSuitableDesc &= dst_tdesc->isSame(outputMemoryFormatsFilter[0]); } else { - auto dst_tdesc = MKLDNNExtensionUtils::makeDescriptor(std::shared_ptr(*itd)->data.diff_dst_desc); + auto dst_tdesc = DnnlExtensionUtils::makeDescriptor(std::shared_ptr(*itd)->data.diff_dst_desc); isSuitableDesc &= dst_tdesc->isSame(outputMemoryFormatsFilter[0]); } } @@ -406,11 +409,11 @@ void MKLDNNDeconvolutionNode::filterSupportedDescriptors() { } } -bool MKLDNNDeconvolutionNode::created() const { - return getType() == Deconvolution; +bool Deconvolution::created() const { + return getType() == Type::Deconvolution; } -bool MKLDNNDeconvolutionNode::needShapeInfer() const { +bool Deconvolution::needShapeInfer() const { if (inputShapesModified()) { return true; } @@ -423,7 +426,7 @@ bool MKLDNNDeconvolutionNode::needShapeInfer() const { return false; } -std::vector MKLDNNDeconvolutionNode::shapeInfer() const { +std::vector Deconvolution::shapeInfer() const { const auto &dataMemPtr = getParentEdgesAtPort(0)[0]->getMemoryPtr(); std::vector outSpDims; if (externOutShape) { @@ -432,8 +435,8 @@ std::vector MKLDNNDeconvolutionNode::shapeInfer() const { return {shapeInferInternal(dataMemPtr->getStaticDims(), outSpDims)}; } -VectorDims MKLDNNDeconvolutionNode::shapeInferInternal(const VectorDims &inDims, std::vector outSpDims) const { - std::vector inputShapes = { +VectorDims Deconvolution::shapeInferInternal(const VectorDims &inDims, std::vector outSpDims) const { + std::vector inputShapes = { inDims, getWeightDims() }; @@ -451,22 +454,22 @@ VectorDims MKLDNNDeconvolutionNode::shapeInferInternal(const VectorDims &inDims, outSpDims.data())}); } - std::vector outputShapes = shapeInference->infer(inputShapes, inputValues); + std::vector outputShapes = shapeInference->infer(inputShapes, inputValues); return outputShapes.back().to_shape(); } -void MKLDNNDeconvolutionNode::setDynamicBatchLim(int lim) { +void Deconvolution::setDynamicBatchLim(int lim) { if (!execPtr) { IE_THROW() << "Can't set dynamic batch for Deconvolution node with name: " << getName() << ", because executor is not compiled"; } if (execPtr->needReordering()) { IE_THROW() << "Can't execute Deconvolution node with dynamic batch via executor with reorders"; } - MKLDNNNode::setDynamicBatchLim(lim); + Node::setDynamicBatchLim(lim); } -void MKLDNNDeconvolutionNode::cleanup() { +void Deconvolution::cleanup() { if (!isDynamicNode()) { internalBlobs.clear(); } @@ -480,7 +483,7 @@ void MKLDNNDeconvolutionNode::cleanup() { } } -void MKLDNNDeconvolutionNode::execute(mkldnn::stream strm) { +void Deconvolution::execute(mkldnn::stream strm) { if (!execPtr) { IE_THROW() << "Can't execute Deconvolution node with name: " << getName() << ", because executor is not compiled"; } @@ -491,10 +494,10 @@ void MKLDNNDeconvolutionNode::execute(mkldnn::stream strm) { } } -std::shared_ptr MKLDNNDeconvolutionNode::createDefaultMkldnnDeconvDesc(const mkldnn::memory::desc& srcDesc, - const mkldnn::memory::desc& wghDesc, - const mkldnn::memory::desc& dstDesc, - bool isWinograd) const { +std::shared_ptr Deconvolution::createDefaultMkldnnDeconvDesc(const mkldnn::memory::desc& srcDesc, + const mkldnn::memory::desc& wghDesc, + const mkldnn::memory::desc& dstDesc, + bool isWinograd) const { mkldnn::algorithm alg = isWinograd ? mkldnn::algorithm::convolution_winograd : mkldnn::algorithm::convolution_direct; std::shared_ptr deconv_desc; std::shared_ptr fwd_conv_pd; @@ -502,21 +505,21 @@ std::shared_ptr MKLDNNDeconvolutionNode::createDefaultMkldnnDe if (fwd_conv_pd->get(true) == nullptr) { IE_THROW() << "Forward convolution primitive descriptor is nullable for node with name: " << getName(); } - return std::make_shared(deconv_desc, fwd_conv_pd); + return std::make_shared(deconv_desc, fwd_conv_pd); } -std::shared_ptr MKLDNNDeconvolutionNode::createInt8MkldnnDeconvDesc(const mkldnn::memory::desc& srcDesc, - const mkldnn::memory::desc& wghDesc, - const mkldnn::memory::desc& dstDesc) const { - return std::make_shared(createDescriptorInternalInt8(srcDesc, wghDesc, dstDesc)); +std::shared_ptr Deconvolution::createInt8MkldnnDeconvDesc(const mkldnn::memory::desc& srcDesc, + const mkldnn::memory::desc& wghDesc, + const mkldnn::memory::desc& dstDesc) const { + return std::make_shared(createDescriptorInternalInt8(srcDesc, wghDesc, dstDesc)); } -void MKLDNNDeconvolutionNode::createDeconvPrim(std::shared_ptr desc, - MKLDNNMemoryPtr srcMemPtr, - MKLDNNMemoryPtr wghMemPtr, - MKLDNNMemoryPtr dstMemPtr, - AttrPtr attr, - impl_desc_type selectedImpl) { +void Deconvolution::createDeconvPrim(std::shared_ptr desc, + MemoryPtr srcMemPtr, + MemoryPtr wghMemPtr, + MemoryPtr dstMemPtr, + AttrPtr attr, + impl_desc_type selectedImpl) { auto itpd = desc->createPrimitiveDescriptorIterator(getEngine(), *attr); while (static_cast(itpd)) { @@ -545,17 +548,17 @@ void MKLDNNDeconvolutionNode::createDeconvPrim(std::shared_ptr } if (!itpd.next_impl()) { - auto inDesc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(srcMemPtr->getStaticDims()), + auto inDesc = mkldnn::memory::desc(DnnlExtensionUtils::convertToDnnlDims(srcMemPtr->getStaticDims()), memory::data_type::f32, memory::format_tag::any); - auto wghDesc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(wghMemPtr->getStaticDims()), + auto wghDesc = mkldnn::memory::desc(DnnlExtensionUtils::convertToDnnlDims(wghMemPtr->getStaticDims()), memory::data_type::f32, memory::format_tag::any); - auto outDesc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(dstMemPtr->getStaticDims()), + auto outDesc = mkldnn::memory::desc(DnnlExtensionUtils::convertToDnnlDims(dstMemPtr->getStaticDims()), memory::data_type::f32, memory::format_tag::any); - std::shared_ptr anyDeconvDesc = createDefaultMkldnnDeconvDesc(inDesc, wghDesc, outDesc, false); + std::shared_ptr anyDeconvDesc = createDefaultMkldnnDeconvDesc(inDesc, wghDesc, outDesc, false); auto anyDeconvItpd = anyDeconvDesc->createPrimitiveDescriptorIterator(getEngine(), *attr); if (static_cast(anyDeconvItpd)) { auto prim_desc = convolution_backward_data::primitive_desc(anyDeconvItpd.get()); @@ -571,7 +574,7 @@ void MKLDNNDeconvolutionNode::createDeconvPrim(std::shared_ptr IE_THROW() << "Primitive descriptor was not found for node " << getName() << "."; } -MKLDNNNode::AttrPtr MKLDNNDeconvolutionNode::makePrimitiveAttr(const VectorDims &dims) { +Node::AttrPtr Deconvolution::makePrimitiveAttr(const VectorDims &dims) { auto attr = std::make_shared(mkldnn::primitive_attr()); setPostOps(*attr, dims); @@ -579,11 +582,11 @@ MKLDNNNode::AttrPtr MKLDNNDeconvolutionNode::makePrimitiveAttr(const VectorDims return attr; } -MKLDNNNode::AttrPtr MKLDNNDeconvolutionNode::initPrimitiveAttr() { +Node::AttrPtr Deconvolution::initPrimitiveAttr() { return attr; } -void MKLDNNDeconvolutionNode::prepareParams() { +void Deconvolution::prepareParams() { auto srcMemPtr = getParentEdgesAtPort(0)[0]->getMemoryPtr(); auto wghMemPtr = getParentEdgesAtPort(1)[0]->getMemoryPtr(); auto dstMemPtr = getChildEdgesAtPort(0)[0]->getMemoryPtr(); @@ -621,7 +624,7 @@ void MKLDNNDeconvolutionNode::prepareParams() { mkldnn::memory::desc wgh_candidate; if (isInt8) { if (internalBlobMemory.empty()) { - wgh_candidate = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(int8WeightDims), memory::data_type::s8, memory::format_tag::any); + wgh_candidate = mkldnn::memory::desc(DnnlExtensionUtils::convertToDnnlDims(int8WeightDims), memory::data_type::s8, memory::format_tag::any); } else { wgh_candidate = internalBlobMemory.front()->GetDescWithType()->getDnnlDesc(); } @@ -629,12 +632,12 @@ void MKLDNNDeconvolutionNode::prepareParams() { wgh_candidate = getParentEdgesAtPort(1).front()->getMemory().GetDescWithType()->getDnnlDesc(); } - std::shared_ptr desc; + std::shared_ptr desc; if (isInt8) { desc = createInt8MkldnnDeconvDesc(in_candidate, wgh_candidate, out_candidate); } else { desc = createDefaultMkldnnDeconvDesc(in_candidate, wgh_candidate, out_candidate, - selected_pd->getImplementationType() == ov::intel_cpu::impl_desc_type::jit_avx512_winograd); + selected_pd->getImplementationType() == impl_desc_type::jit_avx512_winograd); } createDeconvPrim(desc, srcMemPtr, wghMemPtr, dstMemPtr, pAttrLocal, selected_pd->getImplementationType()); @@ -648,10 +651,10 @@ void MKLDNNDeconvolutionNode::prepareParams() { {DNNL_ARG_WEIGHTS, wghMemPtr->GetPrimitive()}, {DNNL_ARG_DIFF_SRC, dstMemPtr->GetPrimitive()}}; } - MKLDNNNode::appendPostOpArgs(*pAttrLocal, primArgs, postOpsArgs); + Node::appendPostOpArgs(*pAttrLocal, primArgs, postOpsArgs); } -void MKLDNNDeconvolutionNode::createPrimitive() { +void Deconvolution::createPrimitive() { if (inputShapesDefined()) { if (needPrepareParams()) prepareParams(); @@ -659,7 +662,7 @@ void MKLDNNDeconvolutionNode::createPrimitive() { } } -MKLDNNDeconvolutionNode::DefaultDeconvDescs MKLDNNDeconvolutionNode::createDescriptorInternalDefault(const mkldnn::memory::desc& in_candidate, +Deconvolution::DefaultDeconvDescs Deconvolution::createDescriptorInternalDefault(const mkldnn::memory::desc& in_candidate, const mkldnn::memory::desc& wgh_candidate, const mkldnn::memory::desc& out_candidate, mkldnn::algorithm alg) const { @@ -688,14 +691,14 @@ MKLDNNDeconvolutionNode::DefaultDeconvDescs MKLDNNDeconvolutionNode::createDescr return {deconv_desc, fwd_conv_pd}; } -MKLDNNDeconvolutionNode::Int8DeconvDesc MKLDNNDeconvolutionNode::createDescriptorInternalInt8(const mkldnn::memory::desc& in_candidate, +Deconvolution::Int8DeconvDesc Deconvolution::createDescriptorInternalInt8(const mkldnn::memory::desc& in_candidate, const mkldnn::memory::desc& wgh_candidate, const mkldnn::memory::desc& out_candidate) const { auto convertDims = [] (const std::vector& orig_dims) { return memory::dims(orig_dims.begin(), orig_dims.end()); }; - MKLDNNDeconvolutionNode::Int8DeconvDesc deconv_desc; + Deconvolution::Int8DeconvDesc deconv_desc; deconv_desc = std::make_shared(prop_kind::forward_inference, mkldnn::algorithm::deconvolution_direct, in_candidate, wgh_candidate, out_candidate, convertDims(stride), convertDims(dilation), @@ -703,7 +706,7 @@ MKLDNNDeconvolutionNode::Int8DeconvDesc MKLDNNDeconvolutionNode::createDescripto return deconv_desc; } -void MKLDNNDeconvolutionNode::createDescriptor(const std::vector &inputDesc, +void Deconvolution::createDescriptor(const std::vector &inputDesc, const std::vector &outputDesc) { auto inDesc = inputDesc[0]->isDefined() ? inputDesc[0] : inputDesc[0]->cloneWithNewDims(inShape.getStaticDims()); auto dnnlInDesc = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(*inDesc); @@ -722,10 +725,10 @@ void MKLDNNDeconvolutionNode::createDescriptor(const std::vector return; if (isInt8) { - mkldnn::memory::desc wgh_candidate(MKLDNNExtensionUtils::convertToDnnlDims(int8WeightDims), memory::data_type::s8, memory::format_tag::any); + mkldnn::memory::desc wgh_candidate(DnnlExtensionUtils::convertToDnnlDims(int8WeightDims), memory::data_type::s8, memory::format_tag::any); descs.emplace_back(createDescriptorInternalInt8(in_candidate, wgh_candidate, out_candidate)); } else { - mkldnn::memory::desc wgh_candidate(MKLDNNExtensionUtils::convertToDnnlDims(getWeightDims()), + mkldnn::memory::desc wgh_candidate(DnnlExtensionUtils::convertToDnnlDims(getWeightDims()), dnnlInDesc.getDataType(), memory::format_tag::any); for (auto alg : {mkldnn::algorithm::convolution_winograd, mkldnn::algorithm::convolution_direct}) { std::shared_ptr deconv_desc; @@ -738,7 +741,7 @@ void MKLDNNDeconvolutionNode::createDescriptor(const std::vector } } -std::shared_ptr MKLDNNDeconvolutionNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { +std::shared_ptr Deconvolution::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { if (idx == 2) { return std::make_shared(InferenceEngine::Precision::I32, Shape(getInputShapeAtPort(2).getStaticDims())); } else if (idx > 0 && isInt8) { @@ -749,34 +752,34 @@ std::shared_ptr MKLDNNDeconvolutionNode::getSrcMemDesc(mkldnn::primi auto desc = idx > 0 ? primitive_desc_it.weights_desc(idx - 1) : isInt8 ? primitive_desc_it.src_desc(idx) : primitive_desc_it.diff_dst_desc(idx); if (getInputShapeAtPort(idx).isDynamic()) { - return MKLDNNExtensionUtils::makeUndefinedDesc(desc, getInputShapeAtPort(idx)); + return DnnlExtensionUtils::makeUndefinedDesc(desc, getInputShapeAtPort(idx)); } - return MKLDNNExtensionUtils::makeDescriptor(desc); + return DnnlExtensionUtils::makeDescriptor(desc); } -std::shared_ptr MKLDNNDeconvolutionNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { +std::shared_ptr Deconvolution::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { auto desc = isInt8 ? primitive_desc_it.dst_desc(idx) : primitive_desc_it.diff_src_desc(idx); if (getOutputShapeAtPort(idx).isDynamic()) { - return MKLDNNExtensionUtils::makeUndefinedDesc(desc, getOutputShapeAtPort(idx)); + return DnnlExtensionUtils::makeUndefinedDesc(desc, getOutputShapeAtPort(idx)); } - return MKLDNNExtensionUtils::makeDescriptor(desc); + return DnnlExtensionUtils::makeDescriptor(desc); } -InferenceEngine::Precision MKLDNNDeconvolutionNode::getRuntimePrecision() const { +InferenceEngine::Precision Deconvolution::getRuntimePrecision() const { std::vector inputPrecisions; // Don't take bias precision into account size_t inputsNumLimit = 2; for (size_t i = 0; i < std::min(getParentEdges().size(), inputsNumLimit); i++) { auto parentEdge = getParentEdgeAt(i); - if (parentEdge && parentEdge->getStatus() == MKLDNNEdge::Status::Validated) { - inputPrecisions.emplace_back(MKLDNNExtensionUtils::DataTypeToIEPrecision((parentEdge->getMemoryPtr()->GetDataType()))); + if (parentEdge && parentEdge->getStatus() == Edge::Status::Validated) { + inputPrecisions.emplace_back(DnnlExtensionUtils::DataTypeToIEPrecision((parentEdge->getMemoryPtr()->GetDataType()))); } } return getMaxPrecision(inputPrecisions); } -MKLDNNDeconvolutionNode::DeconvExecutorDefault::DeconvExecutorDefault(const mkldnn::convolution_backward_data::primitive_desc& pd, +Deconvolution::DeconvExecutorDefault::DeconvExecutorDefault(const mkldnn::convolution_backward_data::primitive_desc& pd, const mkldnn::memory::desc& inMemDesc, const mkldnn::memory::desc& weightMemDesc, const mkldnn::memory::desc& outMemDesc, @@ -796,7 +799,7 @@ MKLDNNDeconvolutionNode::DeconvExecutorDefault::DeconvExecutorDefault(const mkld } } -MKLDNNDeconvolutionNode::DeconvExecutorInt8::DeconvExecutorInt8(const mkldnn::deconvolution_forward::primitive_desc& pd, +Deconvolution::DeconvExecutorInt8::DeconvExecutorInt8(const mkldnn::deconvolution_forward::primitive_desc& pd, const mkldnn::memory::desc& inMemDesc, const mkldnn::memory::desc& weightMemDesc, const mkldnn::memory::desc& outMemDesc, @@ -816,7 +819,7 @@ MKLDNNDeconvolutionNode::DeconvExecutorInt8::DeconvExecutorInt8(const mkldnn::de } } -std::vector MKLDNNDeconvolutionNode::readOutputSpatialDims() const { +std::vector Deconvolution::readOutputSpatialDims() const { if (getParentEdges().size() < 3) { IE_THROW() << "Can't get output spatial dims. Inputs number = " << getParentEdges().size(); } @@ -833,4 +836,6 @@ std::vector MKLDNNDeconvolutionNode::readOutputSpatialDims() const { return outSpDims; } -REG_MKLDNN_PRIM_FOR(MKLDNNDeconvolutionNode, Deconvolution); +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/deconv.h b/src/plugins/intel_cpu/src/nodes/deconv.h index 151a737fcb7..6f4163f1094 100644 --- a/src/plugins/intel_cpu/src/nodes/deconv.h +++ b/src/plugins/intel_cpu/src/nodes/deconv.h @@ -13,14 +13,15 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNDeconvolutionNode : public MKLDNNNode { +class Deconvolution : public Node { using DefaultDeconvDescs = std::pair, std::shared_ptr>; using Int8DeconvDesc = std::shared_ptr; public: - MKLDNNDeconvolutionNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + Deconvolution(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override; void createDescriptor(const std::vector& inputDesc, @@ -33,7 +34,7 @@ public: return false; } - size_t descInputNumbers(MKLDNNDescriptor desc) override { + size_t descInputNumbers(DnnlDesriptor desc) override { return static_cast(getParentEdges().size()); } @@ -43,7 +44,7 @@ public: InferenceEngine::Precision getRuntimePrecision() const override; static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; - bool canFuse(const MKLDNNNodePtr& node) const override; + bool canFuse(const NodePtr& node) const override; const VectorDims& getWeightDims() const { return getInputShapeAtPort(1).getStaticDims(); } const std::vector& getStride() const { return stride; } @@ -121,18 +122,18 @@ private: Int8DeconvDesc createDescriptorInternalInt8(const mkldnn::memory::desc& in_candidate, const mkldnn::memory::desc& wgh_candidate, const mkldnn::memory::desc& out_candidate) const; - std::shared_ptr createDefaultMkldnnDeconvDesc(const mkldnn::memory::desc& srcDesc, - const mkldnn::memory::desc& wghDesc, - const mkldnn::memory::desc& dstDesc, - bool isWinograd) const; - std::shared_ptr createInt8MkldnnDeconvDesc(const mkldnn::memory::desc& srcDesc, + std::shared_ptr createDefaultMkldnnDeconvDesc(const mkldnn::memory::desc& srcDesc, const mkldnn::memory::desc& wghDesc, - const mkldnn::memory::desc& dstDesc) const; + const mkldnn::memory::desc& dstDesc, + bool isWinograd) const; + std::shared_ptr createInt8MkldnnDeconvDesc(const mkldnn::memory::desc& srcDesc, + const mkldnn::memory::desc& wghDesc, + const mkldnn::memory::desc& dstDesc) const; - void createDeconvPrim(std::shared_ptr desc, - MKLDNNMemoryPtr srcMemPtr, - MKLDNNMemoryPtr wghMemPtr, - MKLDNNMemoryPtr dstMemPtr, + void createDeconvPrim(std::shared_ptr desc, + MemoryPtr srcMemPtr, + MemoryPtr wghMemPtr, + MemoryPtr dstMemPtr, AttrPtr attr, impl_desc_type selectedImpl); @@ -142,5 +143,6 @@ private: InferenceEngine::Blob::Ptr createWeiBlobAsIO(InferenceEngine::SizeVector dims); }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/def_conv.cpp b/src/plugins/intel_cpu/src/nodes/def_conv.cpp index 87281c49beb..8d0c8dee0b7 100644 --- a/src/plugins/intel_cpu/src/nodes/def_conv.cpp +++ b/src/plugins/intel_cpu/src/nodes/def_conv.cpp @@ -7,13 +7,12 @@ #include #include #include -#include +#include #include #include "ie_parallel.hpp" #include "memory_desc/dnnl_blocked_memory_desc.h" using namespace mkldnn; -using namespace ov::intel_cpu; using namespace InferenceEngine; using namespace mkldnn; using namespace mkldnn::impl; @@ -21,13 +20,17 @@ using namespace mkldnn::impl::cpu::x64; using namespace mkldnn::impl::utils; using namespace Xbyak; +namespace ov { +namespace intel_cpu { +namespace node { + #define GET_OFF(field) offsetof(jit_def_conv_call_args, field) template struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_generator { DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_uni_def_conv_kernel_f32) - constexpr static int sampledPointsPerPixel = MKLDNNDeformableConvolutionNode::sampledPointsPerPixel; + constexpr static int sampledPointsPerPixel = DeformableConvolution::sampledPointsPerPixel; explicit jit_uni_def_conv_kernel_f32(const jit_def_conv_params& jcp) : jit_uni_def_conv_kernel(jcp), jit_generator() {} @@ -665,7 +668,7 @@ private: } }; -bool MKLDNNDeformableConvolutionNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool DeformableConvolution::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { if (!one_of(op->get_type_info(), ngraph::op::v1::DeformableConvolution::get_type_info_static(), @@ -679,8 +682,8 @@ bool MKLDNNDeformableConvolutionNode::isSupportedOperation(const std::shared_ptr return true; } -MKLDNNDeformableConvolutionNode::MKLDNNDeformableConvolutionNode(const std::shared_ptr& op, - const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { +DeformableConvolution::DeformableConvolution(const std::shared_ptr& op, + const mkldnn::engine& eng, WeightsSharing::Ptr &cache) : Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -716,7 +719,7 @@ MKLDNNDeformableConvolutionNode::MKLDNNDeformableConvolutionNode(const std::shar } } -void MKLDNNDeformableConvolutionNode::getSupportedDescriptors() { +void DeformableConvolution::getSupportedDescriptors() { if (getParentEdges().size() != 3 && getParentEdges().size() != 4) IE_THROW() << errorPrefix << " has incorrect number of input edges"; if (getChildEdges().empty()) @@ -735,7 +738,7 @@ void MKLDNNDeformableConvolutionNode::getSupportedDescriptors() { } } -void MKLDNNDeformableConvolutionNode::initSupportedPrimitiveDescriptors() { +void DeformableConvolution::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -820,7 +823,7 @@ void MKLDNNDeformableConvolutionNode::initSupportedPrimitiveDescriptors() { } } -void MKLDNNDeformableConvolutionNode::DefConvExecutor::prepareSamplingWeights( +void DeformableConvolution::DefConvExecutor::prepareSamplingWeights( const float* offsets, const float* modulation, bool enforceRef) { const int MB = jcp.mb; const int OH = jcp.oh; @@ -943,7 +946,7 @@ void MKLDNNDeformableConvolutionNode::DefConvExecutor::prepareSamplingWeights( }); } -MKLDNNDeformableConvolutionNode::DefConvExecutor::DefConvExecutor(const DefConvAttr &defConvAttr, +DeformableConvolution::DefConvExecutor::DefConvExecutor(const DefConvAttr &defConvAttr, const std::vector> &descVector) { if (descVector.size() != 4 && descVector.size() != 5) { IE_THROW() << "Deformable Convolution executor got incorrect desc's count (" << descVector.size() << ")"; @@ -1021,7 +1024,7 @@ MKLDNNDeformableConvolutionNode::DefConvExecutor::DefConvExecutor(const DefConvA jcp.nthr = dnnl_get_max_threads(); } -MKLDNNDeformableConvolutionNode::DefConvJitExecutor::DefConvJitExecutor(const DefConvAttr &defConvAttr, +DeformableConvolution::DefConvJitExecutor::DefConvJitExecutor(const DefConvAttr &defConvAttr, const std::vector> &descVector) : DefConvExecutor(defConvAttr, descVector) { if (mayiuse(cpu::x64::avx512_common)) { @@ -1040,7 +1043,7 @@ MKLDNNDeformableConvolutionNode::DefConvJitExecutor::DefConvJitExecutor(const De } } -void MKLDNNDeformableConvolutionNode::DefConvRefExecutor::exec(const float* src, const float* offsets, +void DeformableConvolution::DefConvRefExecutor::exec(const float* src, const float* offsets, const float* weights, const float* modulation, float* dst, int *pSampledCoordsVector, float *pInterpWeightsVector) { this->pSampledCoordsVector = pSampledCoordsVector; @@ -1099,7 +1102,7 @@ void MKLDNNDeformableConvolutionNode::DefConvRefExecutor::exec(const float* src, }); } -void MKLDNNDeformableConvolutionNode::prepareParams() { +void DeformableConvolution::prepareParams() { auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); auto& srcMemPtr = getParentEdgeAt(DATA_ID)->getMemoryPtr(); auto& offMemPtr = getParentEdgeAt(OFF_ID)->getMemoryPtr(); @@ -1160,11 +1163,11 @@ void MKLDNNDeformableConvolutionNode::prepareParams() { } } -void MKLDNNDeformableConvolutionNode::executeDynamicImpl(dnnl::stream strm) { +void DeformableConvolution::executeDynamicImpl(dnnl::stream strm) { execute(strm); } -void MKLDNNDeformableConvolutionNode::DefConvJitExecutor::exec(const float* src, const float* offsets, +void DeformableConvolution::DefConvJitExecutor::exec(const float* src, const float* offsets, const float* weights, const float* modulation, float* dst, int *pSampledCoordsVector, float *pInterpWeightsVector) { this->pSampledCoordsVector = pSampledCoordsVector; @@ -1196,7 +1199,7 @@ void MKLDNNDeformableConvolutionNode::DefConvJitExecutor::exec(const float* src, }); } -void MKLDNNDeformableConvolutionNode::execute(mkldnn::stream strm) { +void DeformableConvolution::execute(mkldnn::stream strm) { const size_t inputsNumber = getOriginalInputsNumber(); auto &srcMemory0 = getParentEdgeAt(0)->getMemory(); @@ -1226,18 +1229,20 @@ void MKLDNNDeformableConvolutionNode::execute(mkldnn::stream strm) { } } -void MKLDNNDeformableConvolutionNode::updatePadding() { +void DeformableConvolution::updatePadding() { if (isDynamicNode() && autoPadding) { defConvAttr.padL = shapeInference->get_pads_begin(); } } -bool MKLDNNDeformableConvolutionNode::created() const { - return getType() == DeformableConvolution; +bool DeformableConvolution::created() const { + return getType() == Type::DeformableConvolution; } -InferenceEngine::Precision MKLDNNDeformableConvolutionNode::getRuntimePrecision() const { +InferenceEngine::Precision DeformableConvolution::getRuntimePrecision() const { return getMaxPrecision(getInputPrecisions()); } -REG_MKLDNN_PRIM_FOR(MKLDNNDeformableConvolutionNode, DeformableConvolution); +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/def_conv.h b/src/plugins/intel_cpu/src/nodes/def_conv.h index 2328c1aa4b4..415ed4eaa48 100644 --- a/src/plugins/intel_cpu/src/nodes/def_conv.h +++ b/src/plugins/intel_cpu/src/nodes/def_conv.h @@ -11,6 +11,7 @@ namespace ov { namespace intel_cpu { +namespace node { struct jit_def_conv_params { int ndims; @@ -68,9 +69,9 @@ struct jit_uni_def_conv_kernel { jit_def_conv_params jcp_; }; -class MKLDNNDeformableConvolutionNode : public MKLDNNNode { +class DeformableConvolution : public Node { public: - MKLDNNDeformableConvolutionNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + DeformableConvolution(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; void getSupportedDescriptors() override; @@ -155,5 +156,6 @@ private: bool autoPadding = false; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/depth_to_space.cpp b/src/plugins/intel_cpu/src/nodes/depth_to_space.cpp index 571d597ac9e..15f82271e64 100644 --- a/src/plugins/intel_cpu/src/nodes/depth_to_space.cpp +++ b/src/plugins/intel_cpu/src/nodes/depth_to_space.cpp @@ -4,7 +4,7 @@ #include "depth_to_space.h" -#include +#include #include #include @@ -17,11 +17,14 @@ #define THROW_ERROR IE_THROW() << "DepthToSpace layer with name '" << getName() << "' " -using namespace ov::intel_cpu; using namespace InferenceEngine; using namespace mkldnn::impl; -size_t MKLDNNDepthToSpaceNode::DepthToSpaceAttrs::hash() const { +namespace ov { +namespace intel_cpu { +namespace node { + +size_t DepthToSpace::DepthToSpaceAttrs::hash() const { using namespace dnnl::impl; using namespace dnnl::impl::primitive_hashing; @@ -37,7 +40,7 @@ size_t MKLDNNDepthToSpaceNode::DepthToSpaceAttrs::hash() const { return seed; } -bool MKLDNNDepthToSpaceNode::DepthToSpaceAttrs::operator==(const DepthToSpaceAttrs& rhs) const { +bool DepthToSpace::DepthToSpaceAttrs::operator==(const DepthToSpaceAttrs& rhs) const { bool result = layoutType == rhs.layoutType && mode == rhs.mode && blockSize == rhs.blockSize && blockStep == rhs.blockStep && dataSize == rhs.dataSize && nSpatialDims == rhs.nSpatialDims && @@ -46,7 +49,7 @@ bool MKLDNNDepthToSpaceNode::DepthToSpaceAttrs::operator==(const DepthToSpaceAtt return result; } -bool MKLDNNDepthToSpaceNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool DepthToSpace::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { auto depthToSpace = ov::as_type_ptr(op); if (!depthToSpace) { @@ -64,8 +67,8 @@ bool MKLDNNDepthToSpaceNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) - : MKLDNNNode(op, eng, cache) { +DepthToSpace::DepthToSpace(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache) + : Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -104,9 +107,9 @@ MKLDNNDepthToSpaceNode::MKLDNNDepthToSpaceNode(const std::shared_ptr(std::pow(attrs.blockSize, nSpatialDims)); } -void MKLDNNDepthToSpaceNode::getSupportedDescriptors() {} +void DepthToSpace::getSupportedDescriptors() {} -void MKLDNNDepthToSpaceNode::initSupportedPrimitiveDescriptors() { +void DepthToSpace::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -158,7 +161,7 @@ void MKLDNNDepthToSpaceNode::initSupportedPrimitiveDescriptors() { } } -void MKLDNNDepthToSpaceNode::createPrimitive() { +void DepthToSpace::createPrimitive() { auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) @@ -182,7 +185,7 @@ void MKLDNNDepthToSpaceNode::createPrimitive() { } } -void MKLDNNDepthToSpaceNode::prepareParams() { +void DepthToSpace::prepareParams() { attrs.srcBlockedDims = getParentEdgeAt(0)->getMemoryPtr()->GetDescWithType()->getBlockDims(); auto builder = [](const DepthToSpaceAttrs& key) -> std::shared_ptr { return std::make_shared(key); @@ -197,11 +200,11 @@ void MKLDNNDepthToSpaceNode::prepareParams() { execPtr = result.first; } -MKLDNNDepthToSpaceNode::DepthToSpaceExecutor::DepthToSpaceExecutor(const DepthToSpaceAttrs& attrs) { - if (!ov::intel_cpu::one_of(attrs.layoutType, LayoutType::nCsp16c, LayoutType::nCsp8c, LayoutType::nspc, LayoutType::ncsp)) +DepthToSpace::DepthToSpaceExecutor::DepthToSpaceExecutor(const DepthToSpaceAttrs& attrs) { + if (!one_of(attrs.layoutType, LayoutType::nCsp16c, LayoutType::nCsp8c, LayoutType::nspc, LayoutType::ncsp)) IE_THROW() << "DepthToSpace executor supports only 'nCsp16c', 'nCsp8c', 'nspc' or 'ncsp' layouts."; - const bool isBlocked = ov::intel_cpu::one_of(attrs.layoutType, LayoutType::nCsp16c, LayoutType::nCsp8c); + const bool isBlocked = one_of(attrs.layoutType, LayoutType::nCsp16c, LayoutType::nCsp8c); const bool isChannelsFirst = attrs.layoutType == LayoutType::nspc; const size_t nDims = attrs.srcBlockedDims.size(); const size_t reshapedRank = nDims + attrs.nSpatialDims + static_cast(isBlocked && attrs.mode == Mode::DEPTH_FIRST); @@ -286,7 +289,7 @@ MKLDNNDepthToSpaceNode::DepthToSpaceExecutor::DepthToSpaceExecutor(const DepthTo permuteKernel = std::unique_ptr(new PermuteKernel(params)); } -void MKLDNNDepthToSpaceNode::DepthToSpaceExecutor::exec(MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr, const int MB) { +void DepthToSpace::DepthToSpaceExecutor::exec(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr, const int MB) { if (!permuteKernel) IE_THROW() << "Could not execute. Kernel for Transpose node was not compiled."; @@ -296,7 +299,7 @@ void MKLDNNDepthToSpaceNode::DepthToSpaceExecutor::exec(MKLDNNMemoryPtr& srcMemP permuteKernel->execute(srcData, dstData, MB); } -void MKLDNNDepthToSpaceNode::execute(mkldnn::stream strm) { +void DepthToSpace::execute(mkldnn::stream strm) { if (!execPtr) { THROW_ERROR << "doesn't have a compiled executor."; } @@ -305,11 +308,14 @@ void MKLDNNDepthToSpaceNode::execute(mkldnn::stream strm) { execPtr->exec(getParentEdgeAt(0)->getMemoryPtr(), getChildEdgeAt(0)->getMemoryPtr(), MB); } -void MKLDNNDepthToSpaceNode::executeDynamicImpl(mkldnn::stream strm) { +void DepthToSpace::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -bool MKLDNNDepthToSpaceNode::created() const { - return getType() == DepthToSpace; +bool DepthToSpace::created() const { + return getType() == Type::DepthToSpace; } -REG_MKLDNN_PRIM_FOR(MKLDNNDepthToSpaceNode, DepthToSpace); + +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/depth_to_space.h b/src/plugins/intel_cpu/src/nodes/depth_to_space.h index d90d3ee6ad1..c14c370961e 100644 --- a/src/plugins/intel_cpu/src/nodes/depth_to_space.h +++ b/src/plugins/intel_cpu/src/nodes/depth_to_space.h @@ -11,10 +11,11 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNDepthToSpaceNode : public MKLDNNNode { +class DepthToSpace : public Node { public: - MKLDNNDepthToSpaceNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + DepthToSpace(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; void getSupportedDescriptors() override; @@ -45,7 +46,7 @@ private: DepthToSpaceAttrs attrs; struct DepthToSpaceExecutor { DepthToSpaceExecutor(const DepthToSpaceAttrs& attrs); - void exec(MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr, const int MB); + void exec(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr, const int MB); ~DepthToSpaceExecutor() = default; private: @@ -55,5 +56,6 @@ private: executorPtr execPtr = nullptr; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/detection_output.cpp b/src/plugins/intel_cpu/src/nodes/detection_output.cpp index 83ed3072775..b088a0fca3d 100644 --- a/src/plugins/intel_cpu/src/nodes/detection_output.cpp +++ b/src/plugins/intel_cpu/src/nodes/detection_output.cpp @@ -12,9 +12,11 @@ #include "detection_output.h" using namespace mkldnn; -using namespace ov::intel_cpu; using namespace InferenceEngine; +namespace ov { +namespace intel_cpu { +namespace node { namespace { template @@ -31,7 +33,7 @@ bool SortScorePairDescend>(const std::pair& op, std::string& errorMessage) noexcept { +bool DetectionOutput::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { const auto doOp = ov::as_type_ptr(op); if (!doOp) { @@ -49,8 +51,8 @@ bool MKLDNNDetectionOutputNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { +DetectionOutput::DetectionOutput(const std::shared_ptr& op, const mkldnn::engine& eng, + WeightsSharing::Ptr &cache) : Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -90,7 +92,7 @@ MKLDNNDetectionOutputNode::MKLDNNDetectionOutputNode(const std::shared_ptrgetMemory().GetShape().getStaticDims(); const auto &idConfDims = getParentEdgeAt(ID_CONF)->getMemory().GetShape().getStaticDims(); priorsNum = static_cast(idPriorDims.back() / priorSize); @@ -136,7 +138,7 @@ void MKLDNNDetectionOutputNode::prepareParams() { numPriorsActual.resize(imgNum); } -void MKLDNNDetectionOutputNode::initSupportedPrimitiveDescriptors() { +void DetectionOutput::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -162,11 +164,11 @@ struct ConfidenceComparatorDO { const float* confData; }; -void MKLDNNDetectionOutputNode::executeDynamicImpl(mkldnn::stream strm) { +void DetectionOutput::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -void MKLDNNDetectionOutputNode::execute(mkldnn::stream strm) { +void DetectionOutput::execute(mkldnn::stream strm) { float *dstData = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); const float *locData = reinterpret_cast(getParentEdgeAt(ID_LOC)->getMemoryPtr()->GetPtr()); @@ -341,7 +343,7 @@ void MKLDNNDetectionOutputNode::execute(mkldnn::stream strm) { generateOutput(reorderedConfData, indicesData, detectionsData, decodedBboxesData, dstData); } -inline void MKLDNNDetectionOutputNode::getActualPriorNum(const float *priorData, int* numPriorsActual, int n) { +inline void DetectionOutput::getActualPriorNum(const float *priorData, int* numPriorsActual, int n) { numPriorsActual[n] = priorsNum; if (!normalized) { int num = 0; @@ -355,7 +357,7 @@ inline void MKLDNNDetectionOutputNode::getActualPriorNum(const float *priorData, } } -inline void MKLDNNDetectionOutputNode::confReorderDense(const float *confData, const float *ARMConfData, float *reorderedConfData) { +inline void DetectionOutput::confReorderDense(const float *confData, const float *ARMConfData, float *reorderedConfData) { if (withAddBoxPred) { parallel_for2d(imgNum, priorsNum, [&](size_t n, size_t p) { if (ARMConfData[n * priorsNum * 2 + p * 2 + 1] < objScore) { @@ -380,7 +382,7 @@ inline void MKLDNNDetectionOutputNode::confReorderDense(const float *confData, c }); } -inline void MKLDNNDetectionOutputNode::confFilterCF(float* reorderedConfData, int* indicesData, int* indicesBufData, int* detectionsData) { +inline void DetectionOutput::confFilterCF(float* reorderedConfData, int* indicesData, int* indicesBufData, int* detectionsData) { parallel_for2d(imgNum, classesNum, [&](size_t n, size_t c) { // in: reorderedConf // out: pindices count @@ -409,7 +411,7 @@ inline void MKLDNNDetectionOutputNode::confFilterCF(float* reorderedConfData, in // MX filter is per image filter, max output is prior num(select max for all class within this prior) // NMS is per class, keep topk is per image, final output is per class -inline void MKLDNNDetectionOutputNode::confFilterMX(const float* confData, const float* ARMConfData, float* reorderedConfData, +inline void DetectionOutput::confFilterMX(const float* confData, const float* ARMConfData, float* reorderedConfData, int* indicesData, int* indicesBufData, int* detectionsData) { for (int n = 0; n < imgNum; ++n) { int offB = n * priorsNum * classesNum; @@ -471,7 +473,7 @@ inline void MKLDNNDetectionOutputNode::confFilterMX(const float* confData, const } } -inline void MKLDNNDetectionOutputNode::confReorderAndFilterSparsityCF(const float* confData, const float* ARMConfData, float* reorderedConfData, +inline void DetectionOutput::confReorderAndFilterSparsityCF(const float* confData, const float* ARMConfData, float* reorderedConfData, int* indicesData, int* indicesBufData, int* detectionsData) { int* reorderedConfDataIndices = reinterpret_cast(reorderedConfData); for (int n = 0; n < imgNum; ++n) { @@ -554,7 +556,7 @@ inline void MKLDNNDetectionOutputNode::confReorderAndFilterSparsityCF(const floa } } -inline void MKLDNNDetectionOutputNode::confReorderAndFilterSparsityMX(const float* confData, const float* ARMConfData, float* reorderedConfData, +inline void DetectionOutput::confReorderAndFilterSparsityMX(const float* confData, const float* ARMConfData, float* reorderedConfData, int* indicesData, int* indicesBufData, int* detectionsData) { for (int n = 0; n < imgNum; ++n) { int off = n * priorsNum * classesNum; @@ -614,7 +616,7 @@ inline void MKLDNNDetectionOutputNode::confReorderAndFilterSparsityMX(const floa } } -inline void MKLDNNDetectionOutputNode::decodeBBoxes(const float *priorData, +inline void DetectionOutput::decodeBBoxes(const float *priorData, const float *locData, const float *varianceData, float *decodedBboxes, @@ -717,7 +719,7 @@ inline void MKLDNNDetectionOutputNode::decodeBBoxes(const float *priorData, }); } -inline void MKLDNNDetectionOutputNode::topk(const int *indicesIn, int *indicesOut, const float *conf, int n, int k) { +inline void DetectionOutput::topk(const int *indicesIn, int *indicesOut, const float *conf, int n, int k) { std::partial_sort_copy(indicesIn, indicesIn + n, indicesOut, indicesOut + k, ConfidenceComparatorDO(conf)); @@ -760,7 +762,7 @@ static inline float JaccardOverlap(const float *decodedBbox, return intersectSize / (bbox1Size + bbox2Size - intersectSize); } -inline void MKLDNNDetectionOutputNode::NMSCF(int* indicesIn, +inline void DetectionOutput::NMSCF(int* indicesIn, int& detections, int* indicesOut, const float* bboxes, @@ -787,7 +789,7 @@ inline void MKLDNNDetectionOutputNode::NMSCF(int* indicesIn, } } -inline void MKLDNNDetectionOutputNode::NMSMX(int* indicesIn, +inline void DetectionOutput::NMSMX(int* indicesIn, int* detections, int* indicesOut, const float* bboxes, @@ -826,7 +828,7 @@ inline void MKLDNNDetectionOutputNode::NMSMX(int* indicesIn, } } -inline void MKLDNNDetectionOutputNode::generateOutput(float* reorderedConfData, int* indicesData, int* detectionsData, float* decodedBboxesData, +inline void DetectionOutput::generateOutput(float* reorderedConfData, int* indicesData, int* detectionsData, float* decodedBboxesData, float* dstData) { const auto& outDims = getChildEdgesAtPort(0)[0]->getMemory().getStaticDims(); const int numResults = outDims[2]; @@ -895,8 +897,10 @@ inline void MKLDNNDetectionOutputNode::generateOutput(float* reorderedConfData, } } -bool MKLDNNDetectionOutputNode::created() const { - return getType() == DetectionOutput; +bool DetectionOutput::created() const { + return getType() == Type::DetectionOutput; } -REG_MKLDNN_PRIM_FOR(MKLDNNDetectionOutputNode, DetectionOutput) +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/detection_output.h b/src/plugins/intel_cpu/src/nodes/detection_output.h index d0da9a36dbb..73e04e0805a 100644 --- a/src/plugins/intel_cpu/src/nodes/detection_output.h +++ b/src/plugins/intel_cpu/src/nodes/detection_output.h @@ -10,10 +10,11 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNDetectionOutputNode : public MKLDNNNode { +class DetectionOutput : public Node { public: - MKLDNNDetectionOutputNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + DetectionOutput(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; @@ -109,5 +110,6 @@ private: std::string errorPrefix; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/dft.cpp b/src/plugins/intel_cpu/src/nodes/dft.cpp index 263896c8439..4243a9247bd 100644 --- a/src/plugins/intel_cpu/src/nodes/dft.cpp +++ b/src/plugins/intel_cpu/src/nodes/dft.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include "dft.h" #include "ie_parallel.hpp" @@ -16,10 +16,13 @@ #include using namespace mkldnn; -using namespace ov::intel_cpu; using namespace InferenceEngine; -bool MKLDNNDFTNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +namespace ov { +namespace intel_cpu { +namespace node { + +bool DFT::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { if (isDynamicNgraphNode(op)) { errorMessage = "Doesn't support op with dynamic shapes"; @@ -38,8 +41,8 @@ bool MKLDNNDFTNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : - MKLDNNNode(op, eng, cache) { +DFT::DFT(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache) : + Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -74,9 +77,9 @@ MKLDNNDFTNode::MKLDNNDFTNode(const std::shared_ptr& op, const mkld inverse = std::dynamic_pointer_cast(op) == nullptr; } -void MKLDNNDFTNode::getSupportedDescriptors() {} +void DFT::getSupportedDescriptors() {} -void MKLDNNDFTNode::initSupportedPrimitiveDescriptors() { +void DFT::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -226,7 +229,7 @@ void copyDataToOutputWithSignalSize(const float* input, const std::vector(axesEdge->getMemoryPtr()->GetPtr()); axes = std::vector(axesStartPtr, axesStartPtr + axesEdge->getMemory().getStaticDims()[0]); @@ -273,7 +276,7 @@ void MKLDNNDFTNode::execute(mkldnn::stream strm) { } } -void MKLDNNDFTNode::dftNd(float* output, const std::vector& outputStrides) const { +void DFT::dftNd(float* output, const std::vector& outputStrides) const { const std::vector iterationRange(outputShape.begin(), outputShape.end() - 1); const size_t lastDimIndex = iterationRange.size() - 1; for (size_t axisIndex = 0; axisIndex < axes.size(); ++axisIndex) { @@ -307,7 +310,7 @@ void MKLDNNDFTNode::dftNd(float* output, const std::vector& outputStride } /* Cooley Tukey implementation of FFT */ -void MKLDNNDFTNode::fft(float* data, int64_t dataLength, bool parallelize) const { +void DFT::fft(float* data, int64_t dataLength, bool parallelize) const { static int cacheSizeL3 = utils::get_cache_size(3, false); static int elementsPerCacheLine = cacheSizeL3 / sizeof(float); std::vector bufferVector(dataLength * 2, 0); @@ -368,7 +371,7 @@ void MKLDNNDFTNode::fft(float* data, int64_t dataLength, bool parallelize) const } } -void MKLDNNDFTNode::naiveDFT(float* data, size_t dataLength) const { +void DFT::naiveDFT(float* data, size_t dataLength) const { std::vector outputBuffer(dataLength); const size_t nComplex = dataLength / 2; const auto& twiddles = twiddlesMap.find(nComplex)->second; @@ -401,7 +404,7 @@ void MKLDNNDFTNode::naiveDFT(float* data, size_t dataLength) const { cpu_memcpy(data, outputBuffer.data(), dataLength * sizeof(float)); } -std::vector> MKLDNNDFTNode::generateTwiddles(size_t n_complex) const { +std::vector> DFT::generateTwiddles(size_t n_complex) const { std::vector> twiddles(n_complex * n_complex); parallel_for(n_complex, [&](const size_t k) { for (size_t n = 0; n < n_complex; ++n) { @@ -414,11 +417,12 @@ std::vector> MKLDNNDFTNode::generateTwiddles(size_t n_co return twiddles; } -bool MKLDNNDFTNode::created() const { - return getType() == DFT; +bool DFT::created() const { + return getType() == Type::DFT; } -void MKLDNNDFTNode::createPrimitive() {} +void DFT::createPrimitive() {} - -REG_MKLDNN_PRIM_FOR(MKLDNNDFTNode, DFT) +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/dft.h b/src/plugins/intel_cpu/src/nodes/dft.h index f766c54d9cc..ea2e1a2a3df 100644 --- a/src/plugins/intel_cpu/src/nodes/dft.h +++ b/src/plugins/intel_cpu/src/nodes/dft.h @@ -10,11 +10,12 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNDFTNode : public MKLDNNNode { +class DFT : public Node { public: - MKLDNNDFTNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); - ~MKLDNNDFTNode() override = default; + DFT(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); + ~DFT() override = default; void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; @@ -43,5 +44,6 @@ private: bool inverse; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/eltwise.cpp b/src/plugins/intel_cpu/src/nodes/eltwise.cpp index 4273d8234c9..c1628bcf93b 100644 --- a/src/plugins/intel_cpu/src/nodes/eltwise.cpp +++ b/src/plugins/intel_cpu/src/nodes/eltwise.cpp @@ -12,7 +12,7 @@ #include #include -#include +#include #include "fake_quantize.h" #include "pooling.h" #include "input.h" @@ -43,7 +43,6 @@ #include #include "memory_desc/dnnl_blocked_memory_desc.h" -using namespace ov::intel_cpu; using namespace InferenceEngine; using namespace mkldnn::impl::utils; using namespace mkldnn::impl::cpu; @@ -52,6 +51,9 @@ using namespace Xbyak; #define GET_OFF(field) offsetof(jit_eltwise_call_args_ptrs, field) +namespace ov { +namespace intel_cpu { +namespace node { namespace { template @@ -65,7 +67,7 @@ struct EltwiseEmitterContext { std::shared_ptr emitter; jit_generator *host; cpu_isa_t host_isa; - const MKLDNNEltwiseNode::EltwiseData& opData; + const Eltwise::EltwiseData& opData; InferenceEngine::Precision exec_prc; }; @@ -79,7 +81,7 @@ struct EltwiseEmitter { template<> struct EltwiseEmitter { void operator()(EltwiseEmitterContext & ctx) { - auto algKind = static_cast(ctx.opData.mkldnnAlgorithm); + auto algKind = static_cast(ctx.opData.onednnAlgorithm); ctx.emitter = std::make_shared(ctx.host, ctx.host_isa, algKind, ctx.opData.alpha, ctx.opData.beta, ctx.exec_prc); } @@ -96,11 +98,11 @@ struct EltwiseEmitter { } // namespace template -struct jit_uni_eltwise_generic : public ov::intel_cpu::jit_uni_eltwise_kernel, public jit_generator { +struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_generator { DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_uni_eltwise_generic) explicit jit_uni_eltwise_generic(const jit_eltwise_params& jep, - const std::vector& eltwise_data, + const std::vector& eltwise_data, const std::vector& ops_list, const mkldnn::post_ops& post_ops) : jit_uni_eltwise_kernel(jep), jit_generator(), eltwise_data_(eltwise_data), ops_list_(ops_list), post_ops_(post_ops) {} @@ -415,7 +417,7 @@ private: std::vector>> quantization_injectors = {}; - const std::vector& eltwise_data_; + const std::vector& eltwise_data_; const std::vector& ops_list_; const mkldnn::post_ops& post_ops_; @@ -423,46 +425,46 @@ private: std::set precisions; OV_SWITCH(intel_cpu, SupportedPrecisions, precisions, algo, - OV_CASE(EltwiseRelu, jit_mkldnn_aux_emitter), - OV_CASE(EltwiseGelu, jit_mkldnn_aux_emitter), - OV_CASE(EltwiseElu, jit_mkldnn_aux_emitter), - OV_CASE(EltwiseTanh, jit_mkldnn_aux_emitter), - OV_CASE(EltwiseSigmoid, jit_mkldnn_aux_emitter), - OV_CASE(EltwiseAbs, jit_mkldnn_aux_emitter), - OV_CASE(EltwiseSqrt, jit_mkldnn_aux_emitter), - OV_CASE(EltwiseSoftRelu, jit_mkldnn_aux_emitter), - OV_CASE(EltwiseExp, jit_mkldnn_aux_emitter), - OV_CASE(EltwiseClamp, jit_mkldnn_aux_emitter), - OV_CASE(EltwiseSwish, jit_mkldnn_aux_emitter), - OV_CASE(EltwiseHswish, jit_mkldnn_aux_emitter), - OV_CASE(EltwiseMish, jit_mkldnn_aux_emitter), - OV_CASE(EltwiseHsigmoid, jit_mkldnn_aux_emitter), - OV_CASE(EltwiseRoundHalfToEven, jit_mkldnn_aux_emitter), - OV_CASE(EltwiseRoundHalfAwayFromZero, jit_mkldnn_aux_emitter), - OV_CASE(EltwiseAdd, jit_add_emitter), - OV_CASE(EltwiseMulAdd, jit_mul_add_emitter), - OV_CASE(EltwiseSubtract, jit_subtract_emitter), - OV_CASE(EltwiseMultiply, jit_multiply_emitter), - OV_CASE(EltwiseDivide, jit_divide_emitter), - OV_CASE(EltwiseFloorMod, jit_floor_mod_emitter), - OV_CASE(EltwiseMod, jit_mod_emitter), - OV_CASE(EltwiseMaximum, jit_maximum_emitter), - OV_CASE(EltwiseMinimum, jit_minimum_emitter), - OV_CASE(EltwiseSquaredDifference, jit_squared_difference_emitter), - OV_CASE(EltwisePowerDynamic, jit_power_dynamic_emitter), - OV_CASE(EltwiseEqual, jit_equal_emitter), - OV_CASE(EltwiseNotEqual, jit_not_equal_emitter), - OV_CASE(EltwiseGreater, jit_greater_emitter), - OV_CASE(EltwiseGreaterEqual, jit_greater_equal_emitter), - OV_CASE(EltwiseLess, jit_less_emitter), - OV_CASE(EltwiseLessEqual, jit_less_equal_emitter), - OV_CASE(EltwiseLogicalAnd, jit_logical_and_emitter), - OV_CASE(EltwiseLogicalOr, jit_logical_or_emitter), - OV_CASE(EltwiseLogicalXor, jit_logical_xor_emitter), - OV_CASE(EltwiseLogicalNot, jit_logical_not_emitter), - OV_CASE(EltwisePowerStatic, jit_power_static_emitter), - OV_CASE(EltwisePrelu, jit_prelu_emitter), - OV_CASE(EltwiseErf, jit_erf_emitter)); + OV_CASE(Algorithm::EltwiseRelu, jit_mkldnn_aux_emitter), + OV_CASE(Algorithm::EltwiseGelu, jit_mkldnn_aux_emitter), + OV_CASE(Algorithm::EltwiseElu, jit_mkldnn_aux_emitter), + OV_CASE(Algorithm::EltwiseTanh, jit_mkldnn_aux_emitter), + OV_CASE(Algorithm::EltwiseSigmoid, jit_mkldnn_aux_emitter), + OV_CASE(Algorithm::EltwiseAbs, jit_mkldnn_aux_emitter), + OV_CASE(Algorithm::EltwiseSqrt, jit_mkldnn_aux_emitter), + OV_CASE(Algorithm::EltwiseSoftRelu, jit_mkldnn_aux_emitter), + OV_CASE(Algorithm::EltwiseExp, jit_mkldnn_aux_emitter), + OV_CASE(Algorithm::EltwiseClamp, jit_mkldnn_aux_emitter), + OV_CASE(Algorithm::EltwiseSwish, jit_mkldnn_aux_emitter), + OV_CASE(Algorithm::EltwiseHswish, jit_mkldnn_aux_emitter), + OV_CASE(Algorithm::EltwiseMish, jit_mkldnn_aux_emitter), + OV_CASE(Algorithm::EltwiseHsigmoid, jit_mkldnn_aux_emitter), + OV_CASE(Algorithm::EltwiseRoundHalfToEven, jit_mkldnn_aux_emitter), + OV_CASE(Algorithm::EltwiseRoundHalfAwayFromZero, jit_mkldnn_aux_emitter), + OV_CASE(Algorithm::EltwiseAdd, jit_add_emitter), + OV_CASE(Algorithm::EltwiseMulAdd, jit_mul_add_emitter), + OV_CASE(Algorithm::EltwiseSubtract, jit_subtract_emitter), + OV_CASE(Algorithm::EltwiseMultiply, jit_multiply_emitter), + OV_CASE(Algorithm::EltwiseDivide, jit_divide_emitter), + OV_CASE(Algorithm::EltwiseFloorMod, jit_floor_mod_emitter), + OV_CASE(Algorithm::EltwiseMod, jit_mod_emitter), + OV_CASE(Algorithm::EltwiseMaximum, jit_maximum_emitter), + OV_CASE(Algorithm::EltwiseMinimum, jit_minimum_emitter), + OV_CASE(Algorithm::EltwiseSquaredDifference, jit_squared_difference_emitter), + OV_CASE(Algorithm::EltwisePowerDynamic, jit_power_dynamic_emitter), + OV_CASE(Algorithm::EltwiseEqual, jit_equal_emitter), + OV_CASE(Algorithm::EltwiseNotEqual, jit_not_equal_emitter), + OV_CASE(Algorithm::EltwiseGreater, jit_greater_emitter), + OV_CASE(Algorithm::EltwiseGreaterEqual, jit_greater_equal_emitter), + OV_CASE(Algorithm::EltwiseLess, jit_less_emitter), + OV_CASE(Algorithm::EltwiseLessEqual, jit_less_equal_emitter), + OV_CASE(Algorithm::EltwiseLogicalAnd, jit_logical_and_emitter), + OV_CASE(Algorithm::EltwiseLogicalOr, jit_logical_or_emitter), + OV_CASE(Algorithm::EltwiseLogicalXor, jit_logical_xor_emitter), + OV_CASE(Algorithm::EltwiseLogicalNot, jit_logical_not_emitter), + OV_CASE(Algorithm::EltwisePowerStatic, jit_power_static_emitter), + OV_CASE(Algorithm::EltwisePrelu, jit_prelu_emitter), + OV_CASE(Algorithm::EltwiseErf, jit_erf_emitter)); if (precisions.empty()) IE_THROW() << "Unsupported operation type for Eltwise emitter"; @@ -470,7 +472,7 @@ private: return precisions; } - std::shared_ptr create_eltwise_emitter(const MKLDNNEltwiseNode::EltwiseData& data, Precision exec_prec) { + std::shared_ptr create_eltwise_emitter(const Eltwise::EltwiseData& data, Precision exec_prec) { EltwiseEmitterContext ctx = { nullptr, this, @@ -480,46 +482,46 @@ private: }; OV_SWITCH(intel_cpu, EltwiseEmitter, ctx, data.algo, - OV_CASE(EltwiseRelu, jit_mkldnn_aux_emitter), - OV_CASE(EltwiseGelu, jit_mkldnn_aux_emitter), - OV_CASE(EltwiseElu, jit_mkldnn_aux_emitter), - OV_CASE(EltwiseTanh, jit_mkldnn_aux_emitter), - OV_CASE(EltwiseSigmoid, jit_mkldnn_aux_emitter), - OV_CASE(EltwiseAbs, jit_mkldnn_aux_emitter), - OV_CASE(EltwiseSqrt, jit_mkldnn_aux_emitter), - OV_CASE(EltwiseSoftRelu, jit_mkldnn_aux_emitter), - OV_CASE(EltwiseExp, jit_mkldnn_aux_emitter), - OV_CASE(EltwiseClamp, jit_mkldnn_aux_emitter), - OV_CASE(EltwiseSwish, jit_mkldnn_aux_emitter), - OV_CASE(EltwiseHswish, jit_mkldnn_aux_emitter), - OV_CASE(EltwiseMish, jit_mkldnn_aux_emitter), - OV_CASE(EltwiseHsigmoid, jit_mkldnn_aux_emitter), - OV_CASE(EltwiseRoundHalfToEven, jit_mkldnn_aux_emitter), - OV_CASE(EltwiseRoundHalfAwayFromZero, jit_mkldnn_aux_emitter), - OV_CASE(EltwiseAdd, jit_add_emitter), - OV_CASE(EltwiseMulAdd, jit_mul_add_emitter), - OV_CASE(EltwiseSubtract, jit_subtract_emitter), - OV_CASE(EltwiseMultiply, jit_multiply_emitter), - OV_CASE(EltwiseDivide, jit_divide_emitter), - OV_CASE(EltwiseFloorMod, jit_floor_mod_emitter), - OV_CASE(EltwiseMod, jit_mod_emitter), - OV_CASE(EltwiseMaximum, jit_maximum_emitter), - OV_CASE(EltwiseMinimum, jit_minimum_emitter), - OV_CASE(EltwiseSquaredDifference, jit_squared_difference_emitter), - OV_CASE(EltwisePowerDynamic, jit_power_dynamic_emitter), - OV_CASE(EltwiseEqual, jit_equal_emitter), - OV_CASE(EltwiseNotEqual, jit_not_equal_emitter), - OV_CASE(EltwiseGreater, jit_greater_emitter), - OV_CASE(EltwiseGreaterEqual, jit_greater_equal_emitter), - OV_CASE(EltwiseLess, jit_less_emitter), - OV_CASE(EltwiseLessEqual, jit_less_equal_emitter), - OV_CASE(EltwiseLogicalAnd, jit_logical_and_emitter), - OV_CASE(EltwiseLogicalOr, jit_logical_or_emitter), - OV_CASE(EltwiseLogicalXor, jit_logical_xor_emitter), - OV_CASE(EltwiseLogicalNot, jit_logical_not_emitter), - OV_CASE(EltwisePowerStatic, jit_power_static_emitter), - OV_CASE(EltwisePrelu, jit_prelu_emitter), - OV_CASE(EltwiseErf, jit_erf_emitter)); + OV_CASE(Algorithm::EltwiseRelu, jit_mkldnn_aux_emitter), + OV_CASE(Algorithm::EltwiseGelu, jit_mkldnn_aux_emitter), + OV_CASE(Algorithm::EltwiseElu, jit_mkldnn_aux_emitter), + OV_CASE(Algorithm::EltwiseTanh, jit_mkldnn_aux_emitter), + OV_CASE(Algorithm::EltwiseSigmoid, jit_mkldnn_aux_emitter), + OV_CASE(Algorithm::EltwiseAbs, jit_mkldnn_aux_emitter), + OV_CASE(Algorithm::EltwiseSqrt, jit_mkldnn_aux_emitter), + OV_CASE(Algorithm::EltwiseSoftRelu, jit_mkldnn_aux_emitter), + OV_CASE(Algorithm::EltwiseExp, jit_mkldnn_aux_emitter), + OV_CASE(Algorithm::EltwiseClamp, jit_mkldnn_aux_emitter), + OV_CASE(Algorithm::EltwiseSwish, jit_mkldnn_aux_emitter), + OV_CASE(Algorithm::EltwiseHswish, jit_mkldnn_aux_emitter), + OV_CASE(Algorithm::EltwiseMish, jit_mkldnn_aux_emitter), + OV_CASE(Algorithm::EltwiseHsigmoid, jit_mkldnn_aux_emitter), + OV_CASE(Algorithm::EltwiseRoundHalfToEven, jit_mkldnn_aux_emitter), + OV_CASE(Algorithm::EltwiseRoundHalfAwayFromZero, jit_mkldnn_aux_emitter), + OV_CASE(Algorithm::EltwiseAdd, jit_add_emitter), + OV_CASE(Algorithm::EltwiseMulAdd, jit_mul_add_emitter), + OV_CASE(Algorithm::EltwiseSubtract, jit_subtract_emitter), + OV_CASE(Algorithm::EltwiseMultiply, jit_multiply_emitter), + OV_CASE(Algorithm::EltwiseDivide, jit_divide_emitter), + OV_CASE(Algorithm::EltwiseFloorMod, jit_floor_mod_emitter), + OV_CASE(Algorithm::EltwiseMod, jit_mod_emitter), + OV_CASE(Algorithm::EltwiseMaximum, jit_maximum_emitter), + OV_CASE(Algorithm::EltwiseMinimum, jit_minimum_emitter), + OV_CASE(Algorithm::EltwiseSquaredDifference, jit_squared_difference_emitter), + OV_CASE(Algorithm::EltwisePowerDynamic, jit_power_dynamic_emitter), + OV_CASE(Algorithm::EltwiseEqual, jit_equal_emitter), + OV_CASE(Algorithm::EltwiseNotEqual, jit_not_equal_emitter), + OV_CASE(Algorithm::EltwiseGreater, jit_greater_emitter), + OV_CASE(Algorithm::EltwiseGreaterEqual, jit_greater_equal_emitter), + OV_CASE(Algorithm::EltwiseLess, jit_less_emitter), + OV_CASE(Algorithm::EltwiseLessEqual, jit_less_equal_emitter), + OV_CASE(Algorithm::EltwiseLogicalAnd, jit_logical_and_emitter), + OV_CASE(Algorithm::EltwiseLogicalOr, jit_logical_or_emitter), + OV_CASE(Algorithm::EltwiseLogicalXor, jit_logical_xor_emitter), + OV_CASE(Algorithm::EltwiseLogicalNot, jit_logical_not_emitter), + OV_CASE(Algorithm::EltwisePowerStatic, jit_power_static_emitter), + OV_CASE(Algorithm::EltwisePrelu, jit_prelu_emitter), + OV_CASE(Algorithm::EltwiseErf, jit_erf_emitter)); if (!ctx.emitter) IE_THROW() << "Unsupported operation type for Eltwise emitter"; @@ -546,7 +548,7 @@ private: int eltwise_post_op_idx = 0; int quantization_post_op_idx = 0; for (int i = 1; i < ops_list_.size(); i++) { - if (ops_list_[i] == Eltwise) { + if (ops_list_[i] == ov::intel_cpu::Type::Eltwise) { std::vector in_idxs; std::vector aux_idxs; in_idxs.push_back(vmm_dst.getIdx()); @@ -561,7 +563,7 @@ private: post_op_emitters[eltwise_post_op_idx]->emit_code(in_idxs, out_idxs, aux_idxs); eltwise_post_op_idx++; - } else if (ops_list_[i] == FakeQuantize) { + } else if (ops_list_[i] == ov::intel_cpu::Type::FakeQuantize) { auto& p = post_ops_.get()->entry_[quantization_post_op_idx]; bool do_dequantization = p.quantization.alg == dnnl::impl::alg_kind::quantization_quantize_dequantize; bool do_rounding = do_dequantization || jep_.dst_prc == Precision::FP32 || i != ops_list_.size() - 1; @@ -818,7 +820,7 @@ private: } }; -MKLDNNEltwiseNode::BroadcastingPolicy MKLDNNEltwiseNode::determineBroadcastingPolicy(const std::shared_ptr& op) { +Eltwise::BroadcastingPolicy Eltwise::determineBroadcastingPolicy(const std::shared_ptr& op) { const auto const1 = std::dynamic_pointer_cast(op->get_input_node_shared_ptr(0)); const auto const2 = std::dynamic_pointer_cast(op->get_input_node_shared_ptr(1)); int constPort = -1; @@ -837,129 +839,129 @@ MKLDNNEltwiseNode::BroadcastingPolicy MKLDNNEltwiseNode::determineBroadcastingPo return PerChannel; } -const std::map MKLDNNEltwiseNode::initializers = { - {ngraph::op::v1::Add::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { - node.algorithm = EltwiseAdd; +const std::map Eltwise::initializers = { + {ngraph::op::v1::Add::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { + node.algorithm = Algorithm::EltwiseAdd; node.broadcastingPolicy = determineBroadcastingPolicy(op); }}, - {ngraph::op::v1::Subtract::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { - node.algorithm = EltwiseSubtract; + {ngraph::op::v1::Subtract::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { + node.algorithm = Algorithm::EltwiseSubtract; node.broadcastingPolicy = determineBroadcastingPolicy(op); }}, - {ngraph::op::v1::Multiply::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { - node.algorithm = EltwiseMultiply; + {ngraph::op::v1::Multiply::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { + node.algorithm = Algorithm::EltwiseMultiply; node.broadcastingPolicy = determineBroadcastingPolicy(op); }}, - {ngraph::op::v1::Divide::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { - node.algorithm = EltwiseDivide; + {ngraph::op::v1::Divide::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { + node.algorithm = Algorithm::EltwiseDivide; node.broadcastingPolicy = determineBroadcastingPolicy(op); }}, - {ngraph::op::v0::SquaredDifference::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { - node.algorithm = EltwiseSquaredDifference; + {ngraph::op::v0::SquaredDifference::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { + node.algorithm = Algorithm::EltwiseSquaredDifference; }}, - {ngraph::op::v1::Maximum::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { - node.algorithm = EltwiseMaximum; + {ngraph::op::v1::Maximum::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { + node.algorithm = Algorithm::EltwiseMaximum; }}, - {ngraph::op::v1::Minimum::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { - node.algorithm = EltwiseMinimum; + {ngraph::op::v1::Minimum::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { + node.algorithm = Algorithm::EltwiseMinimum; }}, - {ngraph::op::v1::Mod::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { - node.algorithm = EltwiseMod; + {ngraph::op::v1::Mod::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { + node.algorithm = Algorithm::EltwiseMod; }}, - {ngraph::op::v1::FloorMod::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { - node.algorithm = EltwiseFloorMod; + {ngraph::op::v1::FloorMod::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { + node.algorithm = Algorithm::EltwiseFloorMod; }}, - {ngraph::op::v1::Power::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { - node.algorithm = EltwisePowerDynamic; + {ngraph::op::v1::Power::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { + node.algorithm = Algorithm::EltwisePowerDynamic; }}, - {PowerStaticNode::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { + {PowerStaticNode::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { auto powerStatic = getNgraphOpAs(op); - node.algorithm = EltwisePowerStatic; + node.algorithm = Algorithm::EltwisePowerStatic; node.alpha = powerStatic->get_power(); node.beta = powerStatic->get_scale(); node.gamma = powerStatic->get_shift(); node.broadcastingPolicy = PerTensor; }}, - {ngraph::op::v1::Equal::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { - node.algorithm = EltwiseEqual; + {ngraph::op::v1::Equal::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { + node.algorithm = Algorithm::EltwiseEqual; }}, - {ngraph::op::v1::NotEqual::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { - node.algorithm = EltwiseNotEqual; + {ngraph::op::v1::NotEqual::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { + node.algorithm = Algorithm::EltwiseNotEqual; }}, - {ngraph::op::v1::Greater::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { - node.algorithm = EltwiseGreater; + {ngraph::op::v1::Greater::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { + node.algorithm = Algorithm::EltwiseGreater; }}, - {ngraph::op::v1::GreaterEqual::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { - node.algorithm = EltwiseGreaterEqual; + {ngraph::op::v1::GreaterEqual::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { + node.algorithm = Algorithm::EltwiseGreaterEqual; }}, - {ngraph::op::v1::Less::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { - node.algorithm = EltwiseLess; + {ngraph::op::v1::Less::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { + node.algorithm = Algorithm::EltwiseLess; }}, - {ngraph::op::v1::LessEqual::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { - node.algorithm = EltwiseLessEqual; + {ngraph::op::v1::LessEqual::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { + node.algorithm = Algorithm::EltwiseLessEqual; }}, - {ngraph::op::v1::LogicalAnd::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { - node.algorithm = EltwiseLogicalAnd; + {ngraph::op::v1::LogicalAnd::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { + node.algorithm = Algorithm::EltwiseLogicalAnd; }}, - {ngraph::op::v1::LogicalOr::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { - node.algorithm = EltwiseLogicalOr; + {ngraph::op::v1::LogicalOr::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { + node.algorithm = Algorithm::EltwiseLogicalOr; }}, - {ngraph::op::v1::LogicalXor::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { - node.algorithm = EltwiseLogicalXor; + {ngraph::op::v1::LogicalXor::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { + node.algorithm = Algorithm::EltwiseLogicalXor; }}, - {ngraph::op::v1::LogicalNot::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { - node.algorithm = EltwiseLogicalNot; + {ngraph::op::v1::LogicalNot::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { + node.algorithm = Algorithm::EltwiseLogicalNot; }}, - {ngraph::op::v0::Relu::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { - node.algorithm = EltwiseRelu; - node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_relu; + {ngraph::op::v0::Relu::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { + node.algorithm = Algorithm::EltwiseRelu; + node.onednnAlgorithm = mkldnn::algorithm::eltwise_relu; }}, - {LeakyReluNode::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { + {LeakyReluNode::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { auto leakyRelu = getNgraphOpAs(op); - node.algorithm = EltwiseRelu; - node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_relu; + node.algorithm = Algorithm::EltwiseRelu; + node.onednnAlgorithm = mkldnn::algorithm::eltwise_relu; node.alpha = leakyRelu->get_slope(); node.beta = 0.0f; }}, - {ngraph::op::v0::Gelu::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { - node.algorithm = EltwiseGelu; - node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_gelu_erf; + {ngraph::op::v0::Gelu::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { + node.algorithm = Algorithm::EltwiseGelu; + node.onednnAlgorithm = mkldnn::algorithm::eltwise_gelu_erf; }}, - {ngraph::op::v7::Gelu::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { + {ngraph::op::v7::Gelu::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { auto gelu = getNgraphOpAs(op); - node.algorithm = EltwiseGelu; + node.algorithm = Algorithm::EltwiseGelu; ngraph::op::GeluApproximationMode approximationMode = gelu->get_approximation_mode(); if (approximationMode == ngraph::op::GeluApproximationMode::ERF) - node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_gelu_erf; + node.onednnAlgorithm = mkldnn::algorithm::eltwise_gelu_erf; else if (approximationMode == ngraph::op::GeluApproximationMode::TANH) - node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_gelu_tanh; + node.onednnAlgorithm = mkldnn::algorithm::eltwise_gelu_tanh; else IE_THROW(NotImplemented) << "CPU Eltwise node doesn't support ngraph operation Gelu with approximation mode: " << approximationMode; }}, - {ngraph::op::v0::Elu::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { + {ngraph::op::v0::Elu::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { auto eluOp = getNgraphOpAs(op); node.alpha = static_cast(eluOp->get_alpha()); - node.algorithm = EltwiseElu; - node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_elu; + node.algorithm = Algorithm::EltwiseElu; + node.onednnAlgorithm = mkldnn::algorithm::eltwise_elu; }}, - {ngraph::op::v0::Tanh::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { - node.algorithm = EltwiseTanh; - node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_tanh; + {ngraph::op::v0::Tanh::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { + node.algorithm = Algorithm::EltwiseTanh; + node.onednnAlgorithm = mkldnn::algorithm::eltwise_tanh; }}, - {ngraph::op::v0::Sigmoid::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { - node.algorithm = EltwiseSigmoid; - node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_logistic; + {ngraph::op::v0::Sigmoid::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { + node.algorithm = Algorithm::EltwiseSigmoid; + node.onednnAlgorithm = mkldnn::algorithm::eltwise_logistic; }}, - {ngraph::op::v0::Abs::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { - node.algorithm = EltwiseAbs; - node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_abs; + {ngraph::op::v0::Abs::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { + node.algorithm = Algorithm::EltwiseAbs; + node.onednnAlgorithm = mkldnn::algorithm::eltwise_abs; }}, - {ngraph::op::v0::Sqrt::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { - node.algorithm = EltwiseSqrt; - node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_sqrt; + {ngraph::op::v0::Sqrt::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { + node.algorithm = Algorithm::EltwiseSqrt; + node.onednnAlgorithm = mkldnn::algorithm::eltwise_sqrt; }}, - {ngraph::op::v0::Clamp::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { + {ngraph::op::v0::Clamp::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { auto clampOp = getNgraphOpAs(op); float alpha_ = static_cast(clampOp->get_min()); @@ -971,62 +973,62 @@ const std::map M } node.alpha = alpha_; node.beta = beta_; - node.algorithm = EltwiseClamp; - node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_clip; + node.algorithm = Algorithm::EltwiseClamp; + node.onednnAlgorithm = mkldnn::algorithm::eltwise_clip; }}, - {ngraph::op::v0::Exp::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { - node.algorithm = EltwiseExp; - node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_exp; + {ngraph::op::v0::Exp::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { + node.algorithm = Algorithm::EltwiseExp; + node.onednnAlgorithm = mkldnn::algorithm::eltwise_exp; }}, - {SwishNode::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { + {SwishNode::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { auto swishOp = getNgraphOpAs(op); - node.algorithm = EltwiseSwish; - node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_swish; + node.algorithm = Algorithm::EltwiseSwish; + node.onednnAlgorithm = mkldnn::algorithm::eltwise_swish; node.alpha = swishOp->get_alpha(); }}, - {ngraph::op::v4::HSwish::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { - node.algorithm = EltwiseHswish; - node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_hardswish; + {ngraph::op::v4::HSwish::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { + node.algorithm = Algorithm::EltwiseHswish; + node.onednnAlgorithm = mkldnn::algorithm::eltwise_hardswish; }}, - {ngraph::op::v4::Mish::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { - node.algorithm = EltwiseMish; - node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_mish; + {ngraph::op::v4::Mish::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { + node.algorithm = Algorithm::EltwiseMish; + node.onednnAlgorithm = mkldnn::algorithm::eltwise_mish; }}, - {ngraph::op::v5::HSigmoid::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { - node.algorithm = EltwiseHsigmoid; - node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_hsigmoid; + {ngraph::op::v5::HSigmoid::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { + node.algorithm = Algorithm::EltwiseHsigmoid; + node.onednnAlgorithm = mkldnn::algorithm::eltwise_hsigmoid; }}, - {ngraph::op::v5::Round::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { + {ngraph::op::v5::Round::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { auto roundOp = getNgraphOpAs(op); switch (roundOp->get_mode()) { case ngraph::op::v5::Round::RoundMode::HALF_TO_EVEN: - node.algorithm = EltwiseRoundHalfToEven; - node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_round_half_to_even; + node.algorithm = Algorithm::EltwiseRoundHalfToEven; + node.onednnAlgorithm = mkldnn::algorithm::eltwise_round_half_to_even; break; case ngraph::op::v5::Round::RoundMode::HALF_AWAY_FROM_ZERO: - node.algorithm = EltwiseRoundHalfAwayFromZero; - node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_round_half_away_from_zero; + node.algorithm = Algorithm::EltwiseRoundHalfAwayFromZero; + node.onednnAlgorithm = mkldnn::algorithm::eltwise_round_half_away_from_zero; break; } }}, - {ngraph::op::v0::PRelu::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { - node.algorithm = EltwisePrelu; + {ngraph::op::v0::PRelu::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { + node.algorithm = Algorithm::EltwisePrelu; node.broadcastingPolicy = determineBroadcastingPolicy(op); }}, - {ngraph::op::v0::Erf::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { - node.algorithm = EltwiseErf; + {ngraph::op::v0::Erf::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { + node.algorithm = Algorithm::EltwiseErf; }}, - {ngraph::op::v4::SoftPlus::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { - node.algorithm = EltwiseSoftRelu; - node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_soft_relu; + {ngraph::op::v4::SoftPlus::get_type_info_static(), [](const std::shared_ptr& op, Eltwise& node) { + node.algorithm = Algorithm::EltwiseSoftRelu; + node.onednnAlgorithm = mkldnn::algorithm::eltwise_soft_relu; }}, }; namespace { struct EltwiseKey { - std::vector eltwise_data; + std::vector eltwise_data; std::vector ops_list; VectorDims outBlkDims; VectorDims outOrder; @@ -1041,15 +1043,15 @@ struct EltwiseKey { using namespace dnnl::impl; using namespace dnnl::impl::primitive_hashing; size_t seed = 0; - auto hash_combine_eltwiseData = [](size_t seed, const MKLDNNEltwiseNode::EltwiseData& eltwiseData) { + auto hash_combine_eltwiseData = [](size_t seed, const Eltwise::EltwiseData& eltwiseData) { seed = hash_combine(seed, eltwiseData.algo); - seed = hash_combine(seed, eltwiseData.mkldnnAlgorithm); + seed = hash_combine(seed, eltwiseData.onednnAlgorithm); seed = hash_combine(seed, eltwiseData.alpha); seed = hash_combine(seed, eltwiseData.beta); seed = hash_combine(seed, eltwiseData.gamma); return seed; }; - std::for_each(eltwise_data.begin(), eltwise_data.end(), [&](const MKLDNNEltwiseNode::EltwiseData& item) { + std::for_each(eltwise_data.begin(), eltwise_data.end(), [&](const Eltwise::EltwiseData& item) { seed = hash_combine_eltwiseData(seed, item); }); seed = get_vector_hash(seed, ops_list); @@ -1090,7 +1092,7 @@ struct EltwiseKey { } }; -class EltwiseJitExecutor : public MKLDNNEltwiseNode::IEltwiseExecutor { +class EltwiseJitExecutor : public Eltwise::IEltwiseExecutor { public: static void offset_out_calc(VectorDims& offset, const VectorDims& dims) { int k = 1; @@ -1108,7 +1110,7 @@ public: } } - EltwiseJitExecutor(const std::vector& eltwise_data, + EltwiseJitExecutor(const std::vector& eltwise_data, const std::vector& ops_list, const VectorDims& outBlkDims, const VectorDims& outOrder, @@ -1191,7 +1193,7 @@ public: size_t oc_size = 0; jep.oc_offsets.resize(jep.input_size, 0); std::fill(jep.oc_offsets.begin(), jep.oc_offsets.end(), 0); - if (isFusedWith(FakeQuantize)) { + if (isFusedWith(Type::FakeQuantize)) { size_t offset_oc = 1; for (int i = outOrder.size() - 1; i >= 0; i--) { if (outOrder[i] == 1) { @@ -1259,7 +1261,7 @@ public: } collapseLastDims(jep.dims, 1); - if (isFusedWith(FakeQuantize)) { + if (isFusedWith(Type::FakeQuantize)) { collapseLastOffsets(jep.oc_offsets, 1); } } else { @@ -1374,9 +1376,9 @@ public: static const int optimalTensorRank = 6; }; -class EltwiseRefExecutor : public MKLDNNEltwiseNode::IEltwiseExecutor { +class EltwiseRefExecutor : public Eltwise::IEltwiseExecutor { public: - EltwiseRefExecutor(MKLDNNEltwiseNode::EltwiseData opData, + EltwiseRefExecutor(Eltwise::EltwiseData opData, const VectorDims& outBlkDims, std::vector inpDims) : _opData(std::move(opData)) { @@ -1422,9 +1424,9 @@ public: void exec(const jit_eltwise_call_args_ptrs &args_ptrs, const VectorDims &dims_out) override { std::shared_ptr ref_eltwise_injector = nullptr; - if (_opData.mkldnnAlgorithm != mkldnn::algorithm::undef) { + if (_opData.onednnAlgorithm != mkldnn::algorithm::undef) { ref_eltwise_injector = std::make_shared( - static_cast(_opData.mkldnnAlgorithm), _opData.alpha, _opData.beta, 1.f); + static_cast(_opData.onednnAlgorithm), _opData.alpha, _opData.beta, 1.f); } parallel_nt(0, [&](const int ithr, const int nthr) { @@ -1462,35 +1464,48 @@ public: float* dst_ptr_f = reinterpret_cast(args_ptrs.dst_ptr) + index_out; switch (_opData.algo) { - case EltwiseRelu: case EltwiseGelu: case EltwiseElu: case EltwiseTanh: case EltwiseSigmoid: case EltwiseAbs: - case EltwiseSqrt: case EltwiseSoftRelu: case EltwiseExp: case EltwiseClamp: - case EltwiseSwish: case EltwiseHswish: case EltwiseMish: case EltwiseHsigmoid: - case EltwiseRoundHalfToEven: case EltwiseRoundHalfAwayFromZero: - *dst_ptr_f = ref_eltwise_injector->compute_scalar(src_f[0]); break; - case EltwiseAdd: *dst_ptr_f = src_f[0] + src_f[1]; break; - case EltwiseMulAdd: *dst_ptr_f = src_f[0] * src_f[1] + src_f[2]; break; - case EltwiseSubtract: *dst_ptr_f = src_f[0] - src_f[1]; break; - case EltwiseMultiply: *dst_ptr_f = src_f[0] * src_f[1]; break; - case EltwiseDivide: *dst_ptr_f = src_f[0] / src_f[1]; break; - case EltwiseFloorMod: *dst_ptr_f = src_f[0] - floorf(src_f[0] / src_f[1]) * src_f[1]; break; - case EltwiseMod: *dst_ptr_f = src_f[0] - truncf(src_f[0] / src_f[1]) * src_f[1]; break; - case EltwiseMaximum: *dst_ptr_f = std::max(src_f[0], src_f[1]); break; - case EltwiseMinimum: *dst_ptr_f = std::min(src_f[0], src_f[1]); break; - case EltwiseSquaredDifference: *dst_ptr_f = powf((src_f[0] - src_f[1]), 2.f); break; - case EltwisePowerDynamic: *dst_ptr_f = powf(src_f[0], src_f[1]); break; - case EltwiseEqual: *dst_ptr_f = src_f[0] == src_f[1]; break; - case EltwiseNotEqual: *dst_ptr_f = src_f[0] != src_f[1]; break; - case EltwiseGreater: *dst_ptr_f = src_f[0] > src_f[1]; break; - case EltwiseGreaterEqual: *dst_ptr_f = src_f[0] >= src_f[1]; break; - case EltwiseLess: *dst_ptr_f = src_f[0] < src_f[1]; break; - case EltwiseLessEqual: *dst_ptr_f = src_f[0] <= src_f[1]; break; - case EltwiseLogicalAnd: *dst_ptr_f = src_f[0] && src_f[1]; break; - case EltwiseLogicalOr: *dst_ptr_f = src_f[0] || src_f[1]; break; - case EltwiseLogicalXor: *dst_ptr_f = (src_f[0] || src_f[1]) - (src_f[0] && src_f[1]); break; - case EltwiseLogicalNot: *dst_ptr_f = !src_f[0]; break; - case EltwisePowerStatic: *dst_ptr_f = powf(_opData.beta * src_f[0] + _opData.gamma, _opData.alpha); break; - case EltwisePrelu: *dst_ptr_f = src_f[0] > 0 ? src_f[0] : src_f[0] * src_f[1]; break; - case EltwiseErf: *dst_ptr_f = std::erf(src_f[0]); break; + case Algorithm::EltwiseRelu: + case Algorithm::EltwiseGelu: + case Algorithm::EltwiseElu: + case Algorithm::EltwiseTanh: + case Algorithm::EltwiseSigmoid: + case Algorithm::EltwiseAbs: + case Algorithm::EltwiseSqrt: + case Algorithm::EltwiseSoftRelu: + case Algorithm::EltwiseExp: + case Algorithm::EltwiseClamp: + case Algorithm::EltwiseSwish: + case Algorithm::EltwiseHswish: + case Algorithm::EltwiseMish: + case Algorithm::EltwiseHsigmoid: + case Algorithm::EltwiseRoundHalfToEven: + case Algorithm::EltwiseRoundHalfAwayFromZero: + *dst_ptr_f = ref_eltwise_injector->compute_scalar(src_f[0]); + break; + case Algorithm::EltwiseAdd: *dst_ptr_f = src_f[0] + src_f[1]; break; + case Algorithm::EltwiseMulAdd: *dst_ptr_f = src_f[0] * src_f[1] + src_f[2]; break; + case Algorithm::EltwiseSubtract: *dst_ptr_f = src_f[0] - src_f[1]; break; + case Algorithm::EltwiseMultiply: *dst_ptr_f = src_f[0] * src_f[1]; break; + case Algorithm::EltwiseDivide: *dst_ptr_f = src_f[0] / src_f[1]; break; + case Algorithm::EltwiseFloorMod: *dst_ptr_f = src_f[0] - floorf(src_f[0] / src_f[1]) * src_f[1]; break; + case Algorithm::EltwiseMod: *dst_ptr_f = src_f[0] - truncf(src_f[0] / src_f[1]) * src_f[1]; break; + case Algorithm::EltwiseMaximum: *dst_ptr_f = std::max(src_f[0], src_f[1]); break; + case Algorithm::EltwiseMinimum: *dst_ptr_f = std::min(src_f[0], src_f[1]); break; + case Algorithm::EltwiseSquaredDifference: *dst_ptr_f = powf((src_f[0] - src_f[1]), 2.f); break; + case Algorithm::EltwisePowerDynamic: *dst_ptr_f = powf(src_f[0], src_f[1]); break; + case Algorithm::EltwiseEqual: *dst_ptr_f = src_f[0] == src_f[1]; break; + case Algorithm::EltwiseNotEqual: *dst_ptr_f = src_f[0] != src_f[1]; break; + case Algorithm::EltwiseGreater: *dst_ptr_f = src_f[0] > src_f[1]; break; + case Algorithm::EltwiseGreaterEqual: *dst_ptr_f = src_f[0] >= src_f[1]; break; + case Algorithm::EltwiseLess: *dst_ptr_f = src_f[0] < src_f[1]; break; + case Algorithm::EltwiseLessEqual: *dst_ptr_f = src_f[0] <= src_f[1]; break; + case Algorithm::EltwiseLogicalAnd: *dst_ptr_f = src_f[0] && src_f[1]; break; + case Algorithm::EltwiseLogicalOr: *dst_ptr_f = src_f[0] || src_f[1]; break; + case Algorithm::EltwiseLogicalXor: *dst_ptr_f = (src_f[0] || src_f[1]) - (src_f[0] && src_f[1]); break; + case Algorithm::EltwiseLogicalNot: *dst_ptr_f = !src_f[0]; break; + case Algorithm::EltwisePowerStatic: *dst_ptr_f = powf(_opData.beta * src_f[0] + _opData.gamma, _opData.alpha); break; + case Algorithm::EltwisePrelu: *dst_ptr_f = src_f[0] > 0 ? src_f[0] : src_f[0] * src_f[1]; break; + case Algorithm::EltwiseErf: *dst_ptr_f = std::erf(src_f[0]); break; default: IE_THROW() << "Unsupported operation type for Eltwise executor"; } } @@ -1506,7 +1521,7 @@ public: } private: - const MKLDNNEltwiseNode::EltwiseData _opData; + const Eltwise::EltwiseData _opData; VectorDims _dims; VectorDims _src_offsets[MAX_ELTWISE_INPUTS]; VectorDims _dst_offsets; @@ -1517,16 +1532,16 @@ private: } // namespace -bool MKLDNNEltwiseNode::EltwiseData::operator==(const EltwiseData &rhs) const noexcept { +bool Eltwise::EltwiseData::operator==(const EltwiseData &rhs) const noexcept { return algo == rhs.algo && - mkldnnAlgorithm == rhs.mkldnnAlgorithm && + onednnAlgorithm == rhs.onednnAlgorithm && alpha == rhs.alpha && beta == rhs.beta && gamma == rhs.gamma; } -static MKLDNNEltwiseNode::executorPtr buildExecutor(const EltwiseKey& key) { - MKLDNNEltwiseNode::executorPtr execPtr; +static Eltwise::executorPtr buildExecutor(const EltwiseKey& key) { + Eltwise::executorPtr execPtr; if (key.useJit) { execPtr = std::make_shared(key.eltwise_data, key.ops_list, @@ -1545,7 +1560,7 @@ static MKLDNNEltwiseNode::executorPtr buildExecutor(const EltwiseKey& key) { return execPtr; } -bool MKLDNNEltwiseNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool Eltwise::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { if (initializers.find(op->get_type_info()) == initializers.end()) { errorMessage = "Doesn't support Eltwise algorithm: " + std::string(op->get_type_name()); @@ -1564,8 +1579,8 @@ bool MKLDNNEltwiseNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : - MKLDNNNode(op, eng, cache), broadcastingPolicy(Undefined) { +Eltwise::Eltwise(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache) : + Node(op, eng, cache), broadcastingPolicy(Undefined) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -1573,24 +1588,56 @@ MKLDNNEltwiseNode::MKLDNNEltwiseNode(const std::shared_ptr& op, co initializers.at(op->get_type_info())(op, *this); } -size_t MKLDNNEltwiseNode::getOpInputsNum() const { +size_t Eltwise::getOpInputsNum() const { switch (getAlgorithm()) { - case EltwiseRelu: case EltwiseGelu: case EltwiseElu: case EltwiseTanh: case EltwiseSigmoid: case EltwiseAbs: case EltwiseSqrt: - case EltwiseSoftRelu: case EltwiseExp: case EltwiseClamp: case EltwiseErf: case EltwiseLogicalNot: case EltwisePowerStatic: - case EltwiseSwish: case EltwiseHswish: case EltwiseMish: case EltwiseHsigmoid: case EltwiseRoundHalfToEven: case EltwiseRoundHalfAwayFromZero: + case Algorithm::EltwiseRelu: + case Algorithm::EltwiseGelu: + case Algorithm::EltwiseElu: + case Algorithm::EltwiseTanh: + case Algorithm::EltwiseSigmoid: + case Algorithm::EltwiseAbs: + case Algorithm::EltwiseSqrt: + case Algorithm::EltwiseSoftRelu: + case Algorithm::EltwiseExp: + case Algorithm::EltwiseClamp: + case Algorithm::EltwiseErf: + case Algorithm::EltwiseLogicalNot: + case Algorithm::EltwisePowerStatic: + case Algorithm::EltwiseSwish: + case Algorithm::EltwiseHswish: + case Algorithm::EltwiseMish: + case Algorithm::EltwiseHsigmoid: + case Algorithm::EltwiseRoundHalfToEven: + case Algorithm::EltwiseRoundHalfAwayFromZero: return 1; - case EltwiseAdd: case EltwiseSubtract: case EltwiseMultiply: case EltwiseDivide: case EltwiseFloorMod: case EltwiseMod: case EltwiseMaximum: - case EltwiseMinimum: case EltwiseSquaredDifference: case EltwisePowerDynamic: case EltwiseEqual: case EltwiseNotEqual: case EltwiseGreater: - case EltwiseGreaterEqual: case EltwiseLess: case EltwiseLessEqual: case EltwiseLogicalAnd: case EltwiseLogicalOr: case EltwiseLogicalXor: - case EltwisePrelu: + case Algorithm::EltwiseAdd: + case Algorithm::EltwiseSubtract: + case Algorithm::EltwiseMultiply: + case Algorithm::EltwiseDivide: + case Algorithm::EltwiseFloorMod: + case Algorithm::EltwiseMod: + case Algorithm::EltwiseMaximum: + case Algorithm::EltwiseMinimum: + case Algorithm::EltwiseSquaredDifference: + case Algorithm::EltwisePowerDynamic: + case Algorithm::EltwiseEqual: + case Algorithm::EltwiseNotEqual: + case Algorithm::EltwiseGreater: + case Algorithm::EltwiseGreaterEqual: + case Algorithm::EltwiseLess: + case Algorithm::EltwiseLessEqual: + case Algorithm::EltwiseLogicalAnd: + case Algorithm::EltwiseLogicalOr: + case Algorithm::EltwiseLogicalXor: + case Algorithm::EltwisePrelu: return 2; - case EltwiseMulAdd: + case Algorithm::EltwiseMulAdd: return 3; default: IE_THROW() << "Unsupported operation for Eltwise node with name `" << getName() << "`."; } } -bool MKLDNNEltwiseNode::isWithBroadcast() { +bool Eltwise::isWithBroadcast() { const auto& oDims = getOutputShapeAtPort(0).getDims(); for (size_t i = 0; i < inputShapes.size(); i++) { const auto& iDims = getInputShapeAtPort(i).getDims(); @@ -1602,14 +1649,14 @@ bool MKLDNNEltwiseNode::isWithBroadcast() { return false; } -void MKLDNNEltwiseNode::getSupportedDescriptors() { +void Eltwise::getSupportedDescriptors() { if (getParentEdges().size() < 1) IE_THROW() << "Incorrect number of input edges for layer " << getName(); if (getChildEdges().empty()) IE_THROW() << "Incorrect number of output edges for layer " << getName(); } -void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() { +void Eltwise::initSupportedPrimitiveDescriptors() { std::vector supportedPrecisions = { Precision::FP32, Precision::U8, @@ -1632,7 +1679,7 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() { size_t expectedInputsNum = getOpInputsNum(); for (auto& postOp : fusedWith) { - auto* eltwiseNode = dynamic_cast(postOp.get()); + auto* eltwiseNode = dynamic_cast(postOp.get()); if (eltwiseNode != nullptr) { expectedInputsNum += eltwiseNode->getOpInputsNum() - 1; } @@ -1651,7 +1698,7 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() { } for (auto& fusedNode : fusedWith) { - if (fusedNode->getType() == Eltwise) { + if (fusedNode->getType() == Type::Eltwise) { for (int i = 0; i < fusedNode->getOriginalInputsNumber(); i++) { if (fusedNode->getFusingPort() != i) inputPrecisions.push_back(fusedNode->getOriginalInputPrecisionAtPort(i)); @@ -1698,10 +1745,10 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() { // TODO: delete after new LPT (ngraph based) is merged // WA is needed to handle bug in LPT that produces wrong precision after average pooling (I8/U8 instead of FP32) - if ((getAlgorithm() == EltwiseMulAdd || getAlgorithm() == EltwisePowerStatic) && + if ((getAlgorithm() == Algorithm::EltwiseMulAdd || getAlgorithm() == Algorithm::EltwisePowerStatic) && (inputPrecisions[0] == Precision::U8 || inputPrecisions[0] == Precision::I8)) { auto parentNode = getParentEdgesAtPort(0)[0]->getParent(); - if (getParentEdgesAtPort(0)[0]->getParent()->getAlgorithm() == PoolingAvg) { + if (getParentEdgesAtPort(0)[0]->getParent()->getAlgorithm() == Algorithm::PoolingAvg) { inputPrecisions[0] = Precision::FP32; } } @@ -1835,7 +1882,7 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() { currentInBlkDims.resize(inputNum); } -std::vector MKLDNNEltwiseNode::shapeInfer() const { +std::vector Eltwise::shapeInfer() const { ov::PartialShape outShape = getParentEdgesAtPort(0)[0]->getMemory().GetShape().toPartialShape(); for (size_t i = 1; i < getParentEdges().size(); i++) { ov::PartialShape::broadcast_merge_into(outShape, getParentEdgesAtPort(i)[0]->getMemory().GetShape().toPartialShape(), @@ -1856,7 +1903,7 @@ std::vector MKLDNNEltwiseNode::shapeInfer() const { return {outShape.get_shape()}; } -void MKLDNNEltwiseNode::prepareParams() { +void Eltwise::prepareParams() { if (memPtrs.empty()) { for (auto i = 0; i < inputNum; i++) memPtrs.push_back(getParentEdgeAt(i)->getMemoryPtr()); @@ -1914,19 +1961,19 @@ void MKLDNNEltwiseNode::prepareParams() { auto outPrc = getChildEdgeAt(0)->getMemory().getDesc().getPrecision(); - EltwiseData thisOp{getAlgorithm(), getMKLDNNAlgorithm(), getAlpha(), getBeta(), getGamma()}; + EltwiseData thisOp{getAlgorithm(), getOneDnnAlgorithm(), getAlpha(), getBeta(), getGamma()}; EltwiseKey key = {{thisOp}, {getType()}, currentOutBlkDims, outOrder, dims_in, inpPrc, outPrc, mkldnn::post_ops(), isDynBatchEnabled, canUseOptimizedImpl}; fqDataPtrs.clear(); for (const auto &node : fusedWith) { key.ops_list.push_back(node->getType()); - if (node->getType() == Eltwise) { - if (auto eltwise = std::dynamic_pointer_cast(node)) { - key.eltwise_data.push_back({eltwise->getAlgorithm(), eltwise->getMKLDNNAlgorithm(), eltwise->getAlpha(), + if (node->getType() == Type::Eltwise) { + if (auto eltwise = std::dynamic_pointer_cast(node)) { + key.eltwise_data.push_back({eltwise->getAlgorithm(), eltwise->getOneDnnAlgorithm(), eltwise->getAlpha(), eltwise->getBeta(), eltwise->getGamma()}); } - } else if (node->getType() == FakeQuantize) { + } else if (node->getType() == Type::FakeQuantize) { node->appendPostOps(key.postOps, {}, fqDataPtrs); } else { IE_THROW(Unexpected) << "Eltwise node with name '" << getName() << "' has unexpected fused op of type '" << node->getTypeStr() << "'"; @@ -1938,7 +1985,7 @@ void MKLDNNEltwiseNode::prepareParams() { execPtr = result.first; } -bool MKLDNNEltwiseNode::needPrepareParams() const { +bool Eltwise::needPrepareParams() const { for (size_t i = 0; i < getParentEdges().size(); i++) { if (getParentEdgesAtPort(i)[0]->getMemory().GetDescWithType()->getBlockDims() != currentInBlkDims[i]) return true; @@ -1946,11 +1993,11 @@ bool MKLDNNEltwiseNode::needPrepareParams() const { return false; } -void MKLDNNEltwiseNode::selectOptimalPrimitiveDescriptor() { +void Eltwise::selectOptimalPrimitiveDescriptor() { selectPreferPrimitiveDescriptor(getPrimitivesPriority(), true); } -void MKLDNNEltwiseNode::execute(mkldnn::stream strm) { +void Eltwise::execute(mkldnn::stream strm) { if (execPtr) { jit_eltwise_call_args_ptrs args_ptrs = {}; auto batchDimIdx = execPtr->getBatchDimIdx(); @@ -1974,12 +2021,12 @@ void MKLDNNEltwiseNode::execute(mkldnn::stream strm) { } } -void MKLDNNEltwiseNode::executeDynamicImpl(mkldnn::stream strm) { +void Eltwise::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -void MKLDNNEltwiseNode::setDynamicBatchLim(int lim) { - MKLDNNNode::setDynamicBatchLim(lim); +void Eltwise::setDynamicBatchLim(int lim) { + Node::setDynamicBatchLim(lim); ov::PartialShape outShape = getParentEdgesAtPort(0)[0]->getMemory().GetShape().toPartialShape(); if (!getParentEdgesAtPort(0)[0]->getParent()->isConstant()) { @@ -1996,12 +2043,12 @@ void MKLDNNEltwiseNode::setDynamicBatchLim(int lim) { } } -bool MKLDNNEltwiseNode::created() const { - return getType() == Eltwise; +bool Eltwise::created() const { + return getType() == Type::Eltwise; } -bool MKLDNNEltwiseNode::canBeInPlace() const { - if (getParentEdgesAtPort(0)[0]->getParent()->getType() == Input) { +bool Eltwise::canBeInPlace() const { + if (getParentEdgesAtPort(0)[0]->getParent()->getType() == Type::Input) { return false; } @@ -2011,7 +2058,7 @@ bool MKLDNNEltwiseNode::canBeInPlace() const { return false; // WA to prevent memory corruption caused by inplace feature - if (parent->getType() == Concatenation) { + if (parent->getType() == Type::Concatenation) { for (auto& parentParentEdge : parent->getParentEdges()) { auto parentParent = parentParentEdge.lock()->getParent(); if (parentParent->getChildEdges().size() != 1) @@ -2023,23 +2070,32 @@ bool MKLDNNEltwiseNode::canBeInPlace() const { return getInputShapeAtPort(0) == getOutputShapeAtPort(0); } -void MKLDNNEltwiseNode::fuseInto(MKLDNNNodePtr& parentNode) { +void Eltwise::fuseInto(NodePtr& parentNode) { // Handling Convolution custom Add node fusing case which is processed via dnnl append_sum() API. - specialConvolutionAddFusing = (parentNode->getType() == Convolution || parentNode->getType() == BinaryConvolution) && getAlgorithm() == EltwiseAdd && + specialConvolutionAddFusing = (parentNode->getType() == Type::Convolution + || parentNode->getType() == Type::BinaryConvolution) + && getAlgorithm() == Algorithm::EltwiseAdd && dimsEqualWeak(getInputShapeAtPort(0).getDims(), getInputShapeAtPort(1).getDims()); if (!specialConvolutionAddFusing && canBePerformedAsScaleShift(parentNode.get())) { std::tie(scales, shifts) = getScalesAndShifts(parentNode.get()); - if ((parentNode->getType() == FullyConnected || parentNode->getType() == MatMul) && one_of(getAlgorithm(), EltwiseAdd, EltwiseSubtract, - EltwiseMultiply, EltwiseDivide, EltwiseMulAdd, EltwisePowerStatic, EltwisePrelu)) { + if ((parentNode->getType() == Type::FullyConnected + || parentNode->getType() == Type::MatMul) + && one_of(getAlgorithm(), Algorithm::EltwiseAdd, + Algorithm::EltwiseSubtract, + Algorithm::EltwiseMultiply, + Algorithm::EltwiseDivide, + Algorithm::EltwiseMulAdd, + Algorithm::EltwisePowerStatic, + Algorithm::EltwisePrelu)) { std::tie(scales, shifts) = getScalesAndShifts(parentNode.get()); } } - MKLDNNNode::fuseInto(parentNode); + Node::fuseInto(parentNode); } -void MKLDNNEltwiseNode::appendMemory(const std::vector &data, MKLDNNMemoryPtr &memPtr, std::vector& postOpsMem) { +void Eltwise::appendMemory(const std::vector &data, MemoryPtr &memPtr, std::vector& postOpsMem) { if (!memPtr) { - memPtr.reset(new MKLDNNMemory(getEngine())); + memPtr.reset(new Memory(getEngine())); DnnlBlockedMemoryDesc memoryDesc(Precision::FP32, {data.size()}); memPtr->Create(memoryDesc, data.data()); @@ -2047,16 +2103,16 @@ void MKLDNNEltwiseNode::appendMemory(const std::vector &data, MKLDNNMemor } } -void MKLDNNEltwiseNode::appendMemory(const std::vector &data, MKLDNNMemoryPtr &memPtr, std::vector& postOpsMem) { +void Eltwise::appendMemory(const std::vector &data, MemoryPtr &memPtr, std::vector& postOpsMem) { postOpsMem.push_back(data.data()); } template -void MKLDNNEltwiseNode::appendPostOpsImpl(mkldnn::post_ops& ops, const VectorDims &postOpDims, std::vector& postOpsMem) { +void Eltwise::appendPostOpsImpl(mkldnn::post_ops& ops, const VectorDims &postOpDims, std::vector& postOpsMem) { const std::string errorPrefix = "Appending Eltwise node with name '" + getName() + "' "; - if (getMKLDNNAlgorithm() != mkldnn::algorithm::undef) { - switch (getMKLDNNAlgorithm()) { + if (getOneDnnAlgorithm() != mkldnn::algorithm::undef) { + switch (getOneDnnAlgorithm()) { case mkldnn::algorithm::eltwise_relu: case mkldnn::algorithm::eltwise_tanh: case mkldnn::algorithm::eltwise_elu: @@ -2077,7 +2133,7 @@ void MKLDNNEltwiseNode::appendPostOpsImpl(mkldnn::post_ops& ops, const VectorDim case mkldnn::algorithm::eltwise_hsigmoid: case mkldnn::algorithm::eltwise_round_half_to_even: case mkldnn::algorithm::eltwise_round_half_away_from_zero: - ops.append_eltwise(1.0, getMKLDNNAlgorithm(), getAlpha(), getBeta()); + ops.append_eltwise(1.0, getOneDnnAlgorithm(), getAlpha(), getBeta()); break; default: IE_THROW() << errorPrefix << "as post operation is not supported"; } @@ -2121,15 +2177,15 @@ void MKLDNNEltwiseNode::appendPostOpsImpl(mkldnn::post_ops& ops, const VectorDim * for performance reasons */ switch (getAlgorithm()) { - case EltwiseAdd: - case EltwiseSubtract: - case EltwiseMultiply: - case EltwiseDivide: - case EltwiseMulAdd: - case EltwisePowerStatic: + case Algorithm::EltwiseAdd: + case Algorithm::EltwiseSubtract: + case Algorithm::EltwiseMultiply: + case Algorithm::EltwiseDivide: + case Algorithm::EltwiseMulAdd: + case Algorithm::EltwisePowerStatic: ops.append_depthwise(mkldnn::algorithm::depthwise_scale_shift, offsets); break; - case EltwisePrelu: + case Algorithm::EltwisePrelu: ops.append_depthwise(mkldnn::algorithm::depthwise_prelu, offsets); break; default: @@ -2140,19 +2196,19 @@ void MKLDNNEltwiseNode::appendPostOpsImpl(mkldnn::post_ops& ops, const VectorDim } } -void MKLDNNEltwiseNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, std::vector& postOpsMem) { +void Eltwise::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, std::vector& postOpsMem) { appendPostOpsImpl(ops, postOpDims, postOpsMem); } -void MKLDNNEltwiseNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, std::vector& postOpsMem) { +void Eltwise::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, std::vector& postOpsMem) { appendPostOpsImpl(ops, postOpDims, postOpsMem); } -void MKLDNNEltwiseNode::appendBinPostOps(mkldnn::post_ops& ops, const VectorDims& postOpDims, std::vector& binaryPostOpsMem) { +void Eltwise::appendBinPostOps(mkldnn::post_ops& ops, const VectorDims& postOpDims, std::vector& binaryPostOpsMem) { const std::string errorPrefix = "Appending Eltwise node with name '" + getName() + "' as binary post op "; VectorDims broadcastBinaryShape(postOpDims.size(), 1); - auto appendBinary = [&](const mkldnn::algorithm alg, MKLDNNMemoryPtr &memPtr, const std::vector &data) { + auto appendBinary = [&](const mkldnn::algorithm alg, MemoryPtr &memPtr, const std::vector &data) { if (data.empty()) IE_THROW() << errorPrefix << "cannot be performed since buffers are not allocated"; if (broadcastingPolicy == Undefined) @@ -2163,7 +2219,7 @@ void MKLDNNEltwiseNode::appendBinPostOps(mkldnn::post_ops& ops, const VectorDims ops.append_binary(alg, memoryDesc.getDnnlDesc()); if (!memPtr) { - memPtr.reset(new MKLDNNMemory(getEngine())); + memPtr.reset(new Memory(getEngine())); memPtr->Create(memoryDesc, &data[0]); binaryPostOpsMem.push_back(memPtr); @@ -2171,25 +2227,25 @@ void MKLDNNEltwiseNode::appendBinPostOps(mkldnn::post_ops& ops, const VectorDims }; switch (getAlgorithm()) { - case EltwiseAdd: - case EltwiseSubtract: + case Algorithm::EltwiseAdd: + case Algorithm::EltwiseSubtract: appendBinary(mkldnn::algorithm::binary_add, shiftsMemory, shifts); break; - case EltwiseDivide: - case EltwiseMultiply: + case Algorithm::EltwiseDivide: + case Algorithm::EltwiseMultiply: appendBinary(mkldnn::algorithm::binary_mul, scalesMemory, scales); break; - case EltwiseMulAdd: + case Algorithm::EltwiseMulAdd: appendBinary(mkldnn::algorithm::binary_mul, scalesMemory, scales); appendBinary(mkldnn::algorithm::binary_add, shiftsMemory, shifts); break; - case EltwisePowerStatic: + case Algorithm::EltwisePowerStatic: if (beta != 1.0f) // Multiply if has scales appendBinary(mkldnn::algorithm::binary_mul, scalesMemory, scales); if (gamma != 0.0f) // Add only if has shifts appendBinary(mkldnn::algorithm::binary_add, shiftsMemory, shifts); break; - case EltwisePrelu: + case Algorithm::EltwisePrelu: appendBinary(mkldnn::algorithm::binary_prelu, scalesMemory, scales); break; default: @@ -2197,13 +2253,13 @@ void MKLDNNEltwiseNode::appendBinPostOps(mkldnn::post_ops& ops, const VectorDims } } -bool MKLDNNEltwiseNode::canFuse(const MKLDNNNodePtr& node) const { - auto isSuitableNode = [this](const MKLDNNEltwiseNode* node) { +bool Eltwise::canFuse(const NodePtr& node) const { + auto isSuitableNode = [this](const Eltwise* node) { // [WA] Since execution precision change from I32 to FP32 for Divide operation may lead to incorrect results // we disable its fusing otherwise there is no guarantee it will be executed it I32 // [TODO] We need to rewrite support for different precisions at all to avoid implicit conversions to FP32 // (all should be handled via explicit convert operations) - if (node->getAlgorithm() == EltwiseDivide) { + if (node->getAlgorithm() == Algorithm::EltwiseDivide) { for (const auto &originalInputPrecision : getOriginalInputPrecisions()) { if (originalInputPrecision == Precision::I32) { return false; @@ -2222,15 +2278,23 @@ bool MKLDNNEltwiseNode::canFuse(const MKLDNNNodePtr& node) const { } // FQ inputs with quantization parameters will be hided inside post_op object, so will not increase inputs number - size_t addedInputEdgesNum = node->getType() != FakeQuantize ? (node->getParentEdges().size() - 1) : 0; + size_t addedInputEdgesNum = node->getType() != Type::FakeQuantize ? (node->getParentEdges().size() - 1) : 0; if (getParentEdges().size() + addedInputEdgesNum > MAX_ELTWISE_INPUTS) return false; - if (node->getType() == Eltwise) { + if (node->getType() == Type::Eltwise) { if (node->getParentEdgesAtPort(0)[0]->getParent().get() != this) { // Eltwise jitter doesn't respect commutative property, so fusing is disabled in case it applied not for 0-th port. - if (one_of(node->getAlgorithm(), EltwiseSubtract, EltwiseDivide, EltwiseFloorMod, EltwiseMod, EltwisePowerDynamic, EltwiseGreater, - EltwiseGreaterEqual, EltwiseLess, EltwiseLessEqual, EltwiseMulAdd)) { + if (one_of(node->getAlgorithm(), Algorithm::EltwiseSubtract, + Algorithm::EltwiseDivide, + Algorithm::EltwiseFloorMod, + Algorithm::EltwiseMod, + Algorithm::EltwisePowerDynamic, + Algorithm::EltwiseGreater, + Algorithm::EltwiseGreaterEqual, + Algorithm::EltwiseLess, + Algorithm::EltwiseLessEqual, + Algorithm::EltwiseMulAdd)) { return false; } @@ -2250,24 +2314,26 @@ bool MKLDNNEltwiseNode::canFuse(const MKLDNNNodePtr& node) const { return true; } - if (node->getType() == FakeQuantize) { - return node->getAlgorithm() != FQBinarization; + if (node->getType() == Type::FakeQuantize) { + return node->getAlgorithm() != Algorithm::FQBinarization; } return false; } -InferenceEngine::Precision MKLDNNEltwiseNode::getRuntimePrecision() const { +InferenceEngine::Precision Eltwise::getRuntimePrecision() const { std::vector inputPrecisions; // Don't take bias precision into account for (size_t i = 0; i < getParentEdges().size(); i++) { auto parentEdge = getParentEdgeAt(i); - if (parentEdge && parentEdge->getStatus() == MKLDNNEdge::Status::Validated && !parentEdge->getParent()->isConstant()) { - inputPrecisions.emplace_back(MKLDNNExtensionUtils::DataTypeToIEPrecision((parentEdge->getMemoryPtr()->GetDataType()))); + if (parentEdge && parentEdge->getStatus() == Edge::Status::Validated && !parentEdge->getParent()->isConstant()) { + inputPrecisions.emplace_back(DnnlExtensionUtils::DataTypeToIEPrecision((parentEdge->getMemoryPtr()->GetDataType()))); } } return getMaxPrecision(inputPrecisions); } -REG_MKLDNN_PRIM_FOR(MKLDNNEltwiseNode, Eltwise); +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/eltwise.h b/src/plugins/intel_cpu/src/nodes/eltwise.h index a99d6047e51..5cde48b2991 100644 --- a/src/plugins/intel_cpu/src/nodes/eltwise.h +++ b/src/plugins/intel_cpu/src/nodes/eltwise.h @@ -13,6 +13,7 @@ namespace ov { namespace intel_cpu { +namespace node { #define MAX_ELTWISE_INPUTS 7 #define MAX_ELTWISE_DIM_RANK 12 @@ -47,7 +48,7 @@ struct jit_eltwise_call_args_indexes { size_t indexes[MAX_ELTWISE_DIM_RANK]; }; -class MKLDNNEltwiseNode; +class Eltwise; struct jit_uni_eltwise_kernel { void (*ker_)(const jit_eltwise_call_args_ptrs*, const jit_eltwise_call_args_indexes*); @@ -65,11 +66,11 @@ struct jit_uni_eltwise_kernel { jit_eltwise_params jep_; }; -class MKLDNNEltwiseNode : public MKLDNNNode { +class Eltwise : public Node { public: struct EltwiseData { Algorithm algo; - mkldnn::algorithm mkldnnAlgorithm; + mkldnn::algorithm onednnAlgorithm; float alpha; float beta; float gamma; @@ -89,7 +90,7 @@ public: using executorPtr = std::shared_ptr; public: - MKLDNNEltwiseNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + Eltwise(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; @@ -97,18 +98,18 @@ public: void execute(mkldnn::stream strm) override; bool created() const override; bool canBeInPlace() const override; - bool canFuse(const MKLDNNNodePtr& node) const override; - void appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, std::vector& postOpsMem) override; + bool canFuse(const NodePtr& node) const override; + void appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, std::vector& postOpsMem) override; void appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, std::vector& postOpsMem) override; - void appendBinPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, std::vector& binaryPostOpsMem) override; - void fuseInto(MKLDNNNodePtr& parentNode) override; + void appendBinPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, std::vector& binaryPostOpsMem) override; + void fuseInto(NodePtr& parentNode) override; InferenceEngine::Precision getRuntimePrecision() const override; float getAlpha() const { return alpha; } float getBeta() const { return beta; } float getGamma() const { return gamma; } - mkldnn::algorithm getMKLDNNAlgorithm() const { return mkldnnAlgorithm; } + mkldnn::algorithm getOneDnnAlgorithm() const { return onednnAlgorithm; } bool isWithBroadcast(); bool isSpecialConvolutionAddFusing() const { return specialConvolutionAddFusing; } @@ -135,7 +136,7 @@ private: executorPtr execPtr = nullptr; BroadcastingPolicy broadcastingPolicy; - mkldnn::algorithm mkldnnAlgorithm = mkldnn::algorithm::undef; + mkldnn::algorithm onednnAlgorithm = mkldnn::algorithm::undef; bool canUseOptimizedImpl = false; bool isDynBatchEnabled = false; @@ -153,17 +154,17 @@ private: std::vector scales = {}; std::vector shifts = {}; - MKLDNNMemoryPtr scalesMemory; - MKLDNNMemoryPtr shiftsMemory; + MemoryPtr scalesMemory; + MemoryPtr shiftsMemory; std::vector depthwiseData = {}; - MKLDNNMemoryPtr depthwiseMemory; + MemoryPtr depthwiseMemory; size_t depthwiseDataSize = 0; - std::vector memPtrs = {}; + std::vector memPtrs = {}; std::vector fqDataPtrs; - using Initializer = std::function&, MKLDNNEltwiseNode& node)>; + using Initializer = std::function&, Eltwise& node)>; static const std::map initializers; static BroadcastingPolicy determineBroadcastingPolicy(const std::shared_ptr& op); @@ -173,9 +174,10 @@ private: template void appendPostOpsImpl(mkldnn::post_ops& ops, const VectorDims &postOpDims, std::vector& postOpsMem); - void appendMemory(const std::vector &data, MKLDNNMemoryPtr &memPtr, std::vector& postOpsMem); - void appendMemory(const std::vector &data, MKLDNNMemoryPtr &memPtr, std::vector& postOpsMem); + void appendMemory(const std::vector &data, MemoryPtr &memPtr, std::vector& postOpsMem); + void appendMemory(const std::vector &data, MemoryPtr &memPtr, std::vector& postOpsMem); }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/embedding_bag_offset_sum.cpp b/src/plugins/intel_cpu/src/nodes/embedding_bag_offset_sum.cpp index c29d19e9587..ca079637a10 100644 --- a/src/plugins/intel_cpu/src/nodes/embedding_bag_offset_sum.cpp +++ b/src/plugins/intel_cpu/src/nodes/embedding_bag_offset_sum.cpp @@ -8,10 +8,13 @@ #include "embedding_bag_offset_sum.h" #include -using namespace ov::intel_cpu; using namespace InferenceEngine; -bool MKLDNNEmbeddingBagOffsetSumNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +namespace ov { +namespace intel_cpu { +namespace node { + +bool EmbeddingBagOffsetSum::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { const auto embBagOffsetSumOp = ngraph::as_type_ptr(op); if (!embBagOffsetSumOp) { @@ -24,8 +27,8 @@ bool MKLDNNEmbeddingBagOffsetSumNode::isSupportedOperation(const std::shared_ptr return true; } -MKLDNNEmbeddingBagOffsetSumNode::MKLDNNEmbeddingBagOffsetSumNode(const std::shared_ptr& op, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache), MKLDNNEmbeddingBagSumNode(op, 3lu, 1lu, 4lu, 3lu) { +EmbeddingBagOffsetSum::EmbeddingBagOffsetSum(const std::shared_ptr& op, const mkldnn::engine& eng, + WeightsSharing::Ptr &cache) : Node(op, eng, cache), EmbeddingBagSum(op, 3lu, 1lu, 4lu, 3lu) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -38,7 +41,7 @@ MKLDNNEmbeddingBagOffsetSumNode::MKLDNNEmbeddingBagOffsetSumNode(const std::shar IE_THROW() << "'" << _layerName << "' layer's offsets data has invalid rank."; } -void MKLDNNEmbeddingBagOffsetSumNode::initSupportedPrimitiveDescriptors() { +void EmbeddingBagOffsetSum::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -70,13 +73,13 @@ void MKLDNNEmbeddingBagOffsetSumNode::initSupportedPrimitiveDescriptors() { addSupportedPrimDesc(inDataConfigurators, {{LayoutType::ncsp, inDataPrecision}}, impl_desc_type::ref_any); } -void MKLDNNEmbeddingBagOffsetSumNode::prepareParams() { +void EmbeddingBagOffsetSum::prepareParams() { _indicesLen = getParentEdgesAtPort(INDICES_IDX)[0]->getMemory().getStaticDims()[0]; _offsetsLen = getParentEdgesAtPort(OFFSETS_IDX)[0]->getMemory().getStaticDims()[0]; - MKLDNNEmbeddingBagSumNode::prepareParams(getParentEdgesAtPort(EMB_TABLE_IDX)[0]->getMemory().getStaticDims()); + EmbeddingBagSum::prepareParams(getParentEdgesAtPort(EMB_TABLE_IDX)[0]->getMemory().getStaticDims()); } -void MKLDNNEmbeddingBagOffsetSumNode::initFromInputs() { +void EmbeddingBagOffsetSum::initFromInputs() { indicesData_ = reinterpret_cast(getParentEdgeAt(INDICES_IDX)->getMemoryPtr()->GetPtr()); offsetsData_ = reinterpret_cast(getParentEdgeAt(OFFSETS_IDX)->getMemoryPtr()->GetPtr()); @@ -85,7 +88,7 @@ void MKLDNNEmbeddingBagOffsetSumNode::initFromInputs() { } } -void MKLDNNEmbeddingBagOffsetSumNode::getIndices(int embIndex, const int*& indices, size_t& size, int& weightsIdx, bool& withWeight) { +void EmbeddingBagOffsetSum::getIndices(int embIndex, const int*& indices, size_t& size, int& weightsIdx, bool& withWeight) { if (embIndex >= _offsetsLen) { IE_THROW() << "Invalid embedding bag index."; } @@ -118,15 +121,15 @@ void MKLDNNEmbeddingBagOffsetSumNode::getIndices(int embIndex, const int*& indic weightsIdx = offsetsData_[embIndex]; } -void MKLDNNEmbeddingBagOffsetSumNode::executeDynamicImpl(mkldnn::stream strm) { +void EmbeddingBagOffsetSum::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -bool MKLDNNEmbeddingBagOffsetSumNode::isExecutable() const { +bool EmbeddingBagOffsetSum::isExecutable() const { return !isInputTensorAtPortEmpty(0); } -void MKLDNNEmbeddingBagOffsetSumNode::execute(mkldnn::stream strm) { +void EmbeddingBagOffsetSum::execute(mkldnn::stream strm) { const auto *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); auto *dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); const uint8_t* weightsData = nullptr; @@ -134,12 +137,14 @@ void MKLDNNEmbeddingBagOffsetSumNode::execute(mkldnn::stream strm) { weightsData = reinterpret_cast(getParentEdgeAt(PER_SAMPLE_WEIGHTS_IDX)->getMemoryPtr()->GetPtr()); const auto &inputMem = getParentEdgeAt(0)->getMemory(); - MKLDNNEmbeddingBagSumNode::execute(srcData, weightsData, dstData, inputMem .getDesc().getPrecision(), + EmbeddingBagSum::execute(srcData, weightsData, dstData, inputMem .getDesc().getPrecision(), inputMem .getStaticDims(), getChildEdgesAtPort(0)[0]->getMemory().GetShape().getStaticDims()); } -bool MKLDNNEmbeddingBagOffsetSumNode::created() const { - return getType() == EmbeddingBagOffsetsSum; +bool EmbeddingBagOffsetSum::created() const { + return getType() == Type::EmbeddingBagOffsetsSum; } -REG_MKLDNN_PRIM_FOR(MKLDNNEmbeddingBagOffsetSumNode, EmbeddingBagOffsetsSum) +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/embedding_bag_offset_sum.h b/src/plugins/intel_cpu/src/nodes/embedding_bag_offset_sum.h index 0140ccdd322..6e7e5dafec1 100644 --- a/src/plugins/intel_cpu/src/nodes/embedding_bag_offset_sum.h +++ b/src/plugins/intel_cpu/src/nodes/embedding_bag_offset_sum.h @@ -13,10 +13,11 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNEmbeddingBagOffsetSumNode : public MKLDNNNode, public MKLDNNEmbeddingBagSumNode { +class EmbeddingBagOffsetSum : public Node, public EmbeddingBagSum { public: - MKLDNNEmbeddingBagOffsetSumNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + EmbeddingBagOffsetSum(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; @@ -44,5 +45,6 @@ private: size_t _offsetsLen = 0; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/embedding_bag_packed_sum.cpp b/src/plugins/intel_cpu/src/nodes/embedding_bag_packed_sum.cpp index 75e62308fe6..5ba0b3e418e 100644 --- a/src/plugins/intel_cpu/src/nodes/embedding_bag_packed_sum.cpp +++ b/src/plugins/intel_cpu/src/nodes/embedding_bag_packed_sum.cpp @@ -8,10 +8,13 @@ #include "embedding_bag_packed_sum.h" #include -using namespace ov::intel_cpu; using namespace InferenceEngine; -bool MKLDNNEmbeddingBagPackedSumNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +namespace ov { +namespace intel_cpu { +namespace node { + +bool EmbeddingBagPackedSum::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { const auto embBagPackedSumOp = ngraph::as_type_ptr(op); if (!embBagPackedSumOp) { @@ -24,8 +27,8 @@ bool MKLDNNEmbeddingBagPackedSumNode::isSupportedOperation(const std::shared_ptr return true; } -MKLDNNEmbeddingBagPackedSumNode::MKLDNNEmbeddingBagPackedSumNode(const std::shared_ptr& op, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache), MKLDNNEmbeddingBagSumNode(op, 2lu, 1lu, 2lu, 3lu) { +EmbeddingBagPackedSum::EmbeddingBagPackedSum(const std::shared_ptr& op, const mkldnn::engine& eng, + WeightsSharing::Ptr &cache) : Node(op, eng, cache), EmbeddingBagSum(op, 2lu, 1lu, 2lu, 3lu) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -35,7 +38,7 @@ MKLDNNEmbeddingBagPackedSumNode::MKLDNNEmbeddingBagPackedSumNode(const std::shar IE_THROW() << "'" << _layerName << "' layer has indices data with invalid rank."; } -void MKLDNNEmbeddingBagPackedSumNode::initSupportedPrimitiveDescriptors() { +void EmbeddingBagPackedSum::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -64,17 +67,17 @@ void MKLDNNEmbeddingBagPackedSumNode::initSupportedPrimitiveDescriptors() { addSupportedPrimDesc(inDataConfigurators, {{LayoutType::ncsp, inDataPrecision}}, impl_desc_type::ref_any); } -void MKLDNNEmbeddingBagPackedSumNode::prepareParams() { +void EmbeddingBagPackedSum::prepareParams() { _batch = getParentEdgesAtPort(INDICES_IDX)[0]->getMemory().getStaticDims()[0]; _indicesPerBag = getParentEdgesAtPort(INDICES_IDX)[0]->getMemory().getStaticDims()[1]; - MKLDNNEmbeddingBagSumNode::prepareParams(getParentEdgesAtPort(EMB_TABLE_IDX)[0]->getMemory().getStaticDims()); + EmbeddingBagSum::prepareParams(getParentEdgesAtPort(EMB_TABLE_IDX)[0]->getMemory().getStaticDims()); } -void MKLDNNEmbeddingBagPackedSumNode::initFromInputs() { +void EmbeddingBagPackedSum::initFromInputs() { _indices = reinterpret_cast(getParentEdgeAt(INDICES_IDX)->getMemoryPtr()->GetPtr()); } -void MKLDNNEmbeddingBagPackedSumNode::getIndices(int embIndex, const int*& indices, size_t& size, int& weightsIdx, bool& withWeight) { +void EmbeddingBagPackedSum::getIndices(int embIndex, const int*& indices, size_t& size, int& weightsIdx, bool& withWeight) { if (embIndex >= _batch * _indicesPerBag) IE_THROW() << "Invalid embedding bag index."; @@ -86,15 +89,15 @@ void MKLDNNEmbeddingBagPackedSumNode::getIndices(int embIndex, const int*& indic weightsIdx = embIndex * _indicesPerBag; } -void MKLDNNEmbeddingBagPackedSumNode::executeDynamicImpl(mkldnn::stream strm) { +void EmbeddingBagPackedSum::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -bool MKLDNNEmbeddingBagPackedSumNode::isExecutable() const { +bool EmbeddingBagPackedSum::isExecutable() const { return !isInputTensorAtPortEmpty(0); } -void MKLDNNEmbeddingBagPackedSumNode::execute(mkldnn::stream strm) { +void EmbeddingBagPackedSum::execute(mkldnn::stream strm) { const auto *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); auto *dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); const uint8_t* weightsData = nullptr; @@ -102,12 +105,14 @@ void MKLDNNEmbeddingBagPackedSumNode::execute(mkldnn::stream strm) { weightsData = reinterpret_cast(getParentEdgeAt(PER_SAMPLE_WEIGHTS_IDX)->getMemoryPtr()->GetPtr()); const auto &inputMem = getParentEdgeAt(0)->getMemory(); - MKLDNNEmbeddingBagSumNode::execute(srcData, weightsData, dstData, inputMem .getDesc().getPrecision(), + EmbeddingBagSum::execute(srcData, weightsData, dstData, inputMem .getDesc().getPrecision(), inputMem .getStaticDims(), getChildEdgesAtPort(0)[0]->getMemory().GetShape().getStaticDims()); } -bool MKLDNNEmbeddingBagPackedSumNode::created() const { - return getType() == EmbeddingBagPackedSum; +bool EmbeddingBagPackedSum::created() const { + return getType() == Type::EmbeddingBagPackedSum; } -REG_MKLDNN_PRIM_FOR(MKLDNNEmbeddingBagPackedSumNode, EmbeddingBagPackedSum) +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/embedding_bag_packed_sum.h b/src/plugins/intel_cpu/src/nodes/embedding_bag_packed_sum.h index 8ebbcb19795..0c107c5b460 100644 --- a/src/plugins/intel_cpu/src/nodes/embedding_bag_packed_sum.h +++ b/src/plugins/intel_cpu/src/nodes/embedding_bag_packed_sum.h @@ -13,10 +13,11 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNEmbeddingBagPackedSumNode : public MKLDNNNode, public MKLDNNEmbeddingBagSumNode { +class EmbeddingBagPackedSum : public Node, public EmbeddingBagSum { public: - MKLDNNEmbeddingBagPackedSumNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + EmbeddingBagPackedSum(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; @@ -39,5 +40,6 @@ private: size_t _indicesPerBag = 0; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/embedding_bag_sum.cpp b/src/plugins/intel_cpu/src/nodes/embedding_bag_sum.cpp index fd6325d79e3..a4f680cdb97 100644 --- a/src/plugins/intel_cpu/src/nodes/embedding_bag_sum.cpp +++ b/src/plugins/intel_cpu/src/nodes/embedding_bag_sum.cpp @@ -11,10 +11,13 @@ #include #include "common/cpu_memcpy.h" -using namespace ov::intel_cpu; using namespace InferenceEngine; -MKLDNNEmbeddingBagSumNode::MKLDNNEmbeddingBagSumNode( +namespace ov { +namespace intel_cpu { +namespace node { + +EmbeddingBagSum::EmbeddingBagSum( const std::shared_ptr& op, size_t requiredInputNum, size_t indicesIdx, @@ -36,7 +39,7 @@ MKLDNNEmbeddingBagSumNode::MKLDNNEmbeddingBagSumNode( } } -void MKLDNNEmbeddingBagSumNode::prepareParams(const VectorDims& indexStaticShape) { +void EmbeddingBagSum::prepareParams(const VectorDims& indexStaticShape) { _embDepth = 1lu; for (size_t i = 1lu; i < indexStaticShape.size(); i++) { _embDepth *= indexStaticShape[i]; @@ -44,8 +47,8 @@ void MKLDNNEmbeddingBagSumNode::prepareParams(const VectorDims& indexStaticShape } template -void MKLDNNEmbeddingBagSumNode::processData(const T* srcData, const T* weightsData, T* dstData, - const InferenceEngine::SizeVector& inDataDims, const InferenceEngine::SizeVector& outDataDims) { +void EmbeddingBagSum::processData(const T* srcData, const T* weightsData, T* dstData, + const InferenceEngine::SizeVector& inDataDims, const InferenceEngine::SizeVector& outDataDims) { std::string msgPrefix = std::string("Node EmbeddingBagSum with name '") + _layerName + "' "; initFromInputs(); @@ -115,8 +118,8 @@ void MKLDNNEmbeddingBagSumNode::processData(const T* srcData, const T* weightsDa parallel_nt(0, threadBody); } -void MKLDNNEmbeddingBagSumNode::execute(const uint8_t* srcData, const uint8_t* weightsData, uint8_t* dstData, const InferenceEngine::Precision &srcPrc, - const InferenceEngine::SizeVector& inDims, const InferenceEngine::SizeVector& outDims) { +void EmbeddingBagSum::execute(const uint8_t* srcData, const uint8_t* weightsData, uint8_t* dstData, const InferenceEngine::Precision &srcPrc, + const InferenceEngine::SizeVector& inDims, const InferenceEngine::SizeVector& outDims) { switch (srcPrc) { case Precision::FP32: { return processData::value_type>(reinterpret_cast(srcData), @@ -139,3 +142,7 @@ void MKLDNNEmbeddingBagSumNode::execute(const uint8_t* srcData, const uint8_t* w } } } + +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/embedding_bag_sum.h b/src/plugins/intel_cpu/src/nodes/embedding_bag_sum.h index 0cd9c57fc88..73492167bab 100644 --- a/src/plugins/intel_cpu/src/nodes/embedding_bag_sum.h +++ b/src/plugins/intel_cpu/src/nodes/embedding_bag_sum.h @@ -12,10 +12,11 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNEmbeddingBagSumNode { +class EmbeddingBagSum { public: - MKLDNNEmbeddingBagSumNode( + EmbeddingBagSum( const std::shared_ptr&, size_t requiredInputsNum, size_t indicesIdx, @@ -25,7 +26,7 @@ public: void execute(const uint8_t* srcData, const uint8_t* weightsData, uint8_t* dstData, const InferenceEngine::Precision &srcPrc, const InferenceEngine::SizeVector& inDims, const InferenceEngine::SizeVector& outDims); - ~MKLDNNEmbeddingBagSumNode() = default; + ~EmbeddingBagSum() = default; protected: virtual void initFromInputs() = 0; @@ -52,5 +53,6 @@ protected: std::string _layerName; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/embedding_segments_sum.cpp b/src/plugins/intel_cpu/src/nodes/embedding_segments_sum.cpp index 9c846538f9b..38601c30fe2 100644 --- a/src/plugins/intel_cpu/src/nodes/embedding_segments_sum.cpp +++ b/src/plugins/intel_cpu/src/nodes/embedding_segments_sum.cpp @@ -8,10 +8,13 @@ #include "embedding_segments_sum.h" #include -using namespace ov::intel_cpu; using namespace InferenceEngine; -bool MKLDNNEmbeddingSegmentsSumNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +namespace ov { +namespace intel_cpu { +namespace node { + +bool EmbeddingSegmentsSum::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { const auto embBagSegSumOp = ngraph::as_type_ptr(op); if (!embBagSegSumOp) { @@ -24,8 +27,8 @@ bool MKLDNNEmbeddingSegmentsSumNode::isSupportedOperation(const std::shared_ptr< return true; } -MKLDNNEmbeddingSegmentsSumNode::MKLDNNEmbeddingSegmentsSumNode(const std::shared_ptr& op, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache), MKLDNNEmbeddingBagSumNode(op, 4lu, 1lu, 5lu, 4lu) { +EmbeddingSegmentsSum::EmbeddingSegmentsSum(const std::shared_ptr& op, const mkldnn::engine& eng, + WeightsSharing::Ptr &cache) : Node(op, eng, cache), EmbeddingBagSum(op, 4lu, 1lu, 5lu, 4lu) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -41,7 +44,7 @@ MKLDNNEmbeddingSegmentsSumNode::MKLDNNEmbeddingSegmentsSumNode(const std::shared << getInputShapeAtPort(SEGMENT_ID_IDX).getRank(); } -void MKLDNNEmbeddingSegmentsSumNode::initSupportedPrimitiveDescriptors() { +void EmbeddingSegmentsSum::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -74,11 +77,11 @@ void MKLDNNEmbeddingSegmentsSumNode::initSupportedPrimitiveDescriptors() { addSupportedPrimDesc(inDataConfigurators, {{LayoutType::ncsp, inDataPrecision}}, impl_desc_type::ref_any); } -void MKLDNNEmbeddingSegmentsSumNode::prepareParams() { - MKLDNNEmbeddingBagSumNode::prepareParams(getParentEdgesAtPort(EMB_TABLE_IDX)[0]->getMemory().getStaticDims()); +void EmbeddingSegmentsSum::prepareParams() { + EmbeddingBagSum::prepareParams(getParentEdgesAtPort(EMB_TABLE_IDX)[0]->getMemory().getStaticDims()); } -void MKLDNNEmbeddingSegmentsSumNode::initFromInputs() { +void EmbeddingSegmentsSum::initFromInputs() { indices_ = reinterpret_cast(getParentEdgeAt(INDICES_IDX)->getMemoryPtr()->GetPtr()); indicesSize_ = getParentEdgeAt(INDICES_IDX)->getMemory().GetShape().getElementsCount(); @@ -93,7 +96,7 @@ void MKLDNNEmbeddingSegmentsSumNode::initFromInputs() { } } -void MKLDNNEmbeddingSegmentsSumNode::getIndices(int embIndex, const int*& indices, size_t& size, int& weightsIdx, bool& withWeight) { +void EmbeddingSegmentsSum::getIndices(int embIndex, const int*& indices, size_t& size, int& weightsIdx, bool& withWeight) { if (embIndex >= numSegments_) IE_THROW() << "Invalid embedding bag index."; @@ -121,19 +124,19 @@ void MKLDNNEmbeddingSegmentsSumNode::getIndices(int embIndex, const int*& indice } } -std::vector MKLDNNEmbeddingSegmentsSumNode::shapeInfer() const { - return MKLDNNNode::shapeInferGeneric(PortMask(NUM_SEGMENTS_IDX)); +std::vector EmbeddingSegmentsSum::shapeInfer() const { + return Node::shapeInferGeneric(PortMask(NUM_SEGMENTS_IDX)); } -void MKLDNNEmbeddingSegmentsSumNode::executeDynamicImpl(mkldnn::stream strm) { +void EmbeddingSegmentsSum::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -bool MKLDNNEmbeddingSegmentsSumNode::isExecutable() const { +bool EmbeddingSegmentsSum::isExecutable() const { return !isInputTensorAtPortEmpty(0); } -void MKLDNNEmbeddingSegmentsSumNode::execute(mkldnn::stream strm) { +void EmbeddingSegmentsSum::execute(mkldnn::stream strm) { const auto *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); auto *dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); const uint8_t* weightsData = nullptr; @@ -141,12 +144,14 @@ void MKLDNNEmbeddingSegmentsSumNode::execute(mkldnn::stream strm) { weightsData = reinterpret_cast(getParentEdgeAt(PER_SAMPLE_WEIGHTS_IDX)->getMemoryPtr()->GetPtr()); const auto &inputMem = getParentEdgeAt(0)->getMemory(); - MKLDNNEmbeddingBagSumNode::execute(srcData, weightsData, dstData, inputMem .getDesc().getPrecision(), + EmbeddingBagSum::execute(srcData, weightsData, dstData, inputMem .getDesc().getPrecision(), inputMem .getStaticDims(), getChildEdgesAtPort(0)[0]->getMemory().GetShape().getStaticDims()); } -bool MKLDNNEmbeddingSegmentsSumNode::created() const { - return getType() == EmbeddingSegmentsSum; +bool EmbeddingSegmentsSum::created() const { + return getType() == Type::EmbeddingSegmentsSum; } -REG_MKLDNN_PRIM_FOR(MKLDNNEmbeddingSegmentsSumNode, EmbeddingSegmentsSum) +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/embedding_segments_sum.h b/src/plugins/intel_cpu/src/nodes/embedding_segments_sum.h index 9ec3f38aae4..64e2c73213c 100644 --- a/src/plugins/intel_cpu/src/nodes/embedding_segments_sum.h +++ b/src/plugins/intel_cpu/src/nodes/embedding_segments_sum.h @@ -13,10 +13,11 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNEmbeddingSegmentsSumNode : public MKLDNNNode, public MKLDNNEmbeddingBagSumNode { +class EmbeddingSegmentsSum : public Node, public EmbeddingBagSum { public: - MKLDNNEmbeddingSegmentsSumNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + EmbeddingSegmentsSum(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; @@ -47,5 +48,6 @@ private: size_t indicesSize_ = 0; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/experimental_detectron_detection_output.cpp b/src/plugins/intel_cpu/src/nodes/experimental_detectron_detection_output.cpp index 93b4d5b3188..f71340a45e5 100644 --- a/src/plugins/intel_cpu/src/nodes/experimental_detectron_detection_output.cpp +++ b/src/plugins/intel_cpu/src/nodes/experimental_detectron_detection_output.cpp @@ -9,6 +9,11 @@ #include "ie_parallel.hpp" #include "experimental_detectron_detection_output.h" +using namespace InferenceEngine; + +namespace ov { +namespace intel_cpu { +namespace node { struct Indexer { const std::vector dims_; @@ -33,10 +38,6 @@ struct Indexer { } }; - -using namespace ov::intel_cpu; -using namespace InferenceEngine; - static void refine_boxes(const float* boxes, const float* deltas, const float* weights, const float* scores, float* refined_boxes, float* refined_boxes_areas, float* refined_scores, @@ -215,15 +216,15 @@ static void nms_cf(const float* conf_data, detections = (post_nms_topn == -1 ? detections : (std::min)(post_nms_topn, detections)); } -bool MKLDNNExperimentalDetectronDetectionOutputNode::needShapeInfer() const { +bool ExperimentalDetectronDetectionOutput::needShapeInfer() const { return false; } -bool MKLDNNExperimentalDetectronDetectionOutputNode::needPrepareParams() const { +bool ExperimentalDetectronDetectionOutput::needPrepareParams() const { return false; } -bool MKLDNNExperimentalDetectronDetectionOutputNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool ExperimentalDetectronDetectionOutput::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { const auto doOp = ngraph::as_type_ptr(op); if (!doOp) { @@ -236,9 +237,9 @@ bool MKLDNNExperimentalDetectronDetectionOutputNode::isSupportedOperation(const return true; } -MKLDNNExperimentalDetectronDetectionOutputNode::MKLDNNExperimentalDetectronDetectionOutputNode +ExperimentalDetectronDetectionOutput::ExperimentalDetectronDetectionOutput (const std::shared_ptr& op, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { + WeightsSharing::Ptr &cache) : Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -256,7 +257,7 @@ MKLDNNExperimentalDetectronDetectionOutputNode::MKLDNNExperimentalDetectronDetec deltas_weights_ = attributes.deltas_weights; } -void MKLDNNExperimentalDetectronDetectionOutputNode::initSupportedPrimitiveDescriptors() { +void ExperimentalDetectronDetectionOutput::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -272,7 +273,7 @@ void MKLDNNExperimentalDetectronDetectionOutputNode::initSupportedPrimitiveDescr impl_desc_type::ref_any); } -void MKLDNNExperimentalDetectronDetectionOutputNode::execute(mkldnn::stream strm) { +void ExperimentalDetectronDetectionOutput::execute(mkldnn::stream strm) { const int rois_num = getParentEdgeAt(INPUT_ROIS)->getMemory().getStaticDims()[0]; assert(classes_num_ == static_cast(getParentEdgeAt(INPUT_SCORES)->getMemory().getStaticDims()[1])); assert(4 * classes_num_ == static_cast(getParentEdgeAt(INPUT_DELTAS)->getMemory().getStaticDims()[1])); @@ -369,8 +370,10 @@ void MKLDNNExperimentalDetectronDetectionOutputNode::execute(mkldnn::stream strm } } -bool MKLDNNExperimentalDetectronDetectionOutputNode::created() const { - return getType() == ExperimentalDetectronDetectionOutput; +bool ExperimentalDetectronDetectionOutput::created() const { + return getType() == Type::ExperimentalDetectronDetectionOutput; } -REG_MKLDNN_PRIM_FOR(MKLDNNExperimentalDetectronDetectionOutputNode, ExperimentalDetectronDetectionOutput) +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/experimental_detectron_detection_output.h b/src/plugins/intel_cpu/src/nodes/experimental_detectron_detection_output.h index 45e56cb7faf..e37cd2c9834 100644 --- a/src/plugins/intel_cpu/src/nodes/experimental_detectron_detection_output.h +++ b/src/plugins/intel_cpu/src/nodes/experimental_detectron_detection_output.h @@ -9,10 +9,11 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNExperimentalDetectronDetectionOutputNode : public MKLDNNNode { +class ExperimentalDetectronDetectionOutput : public Node { public: - MKLDNNExperimentalDetectronDetectionOutputNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + ExperimentalDetectronDetectionOutput(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; @@ -44,6 +45,6 @@ private: std::vector deltas_weights_; }; +} // namespace node } // namespace intel_cpu } // namespace ov - diff --git a/src/plugins/intel_cpu/src/nodes/experimental_detectron_generate_proposals_single_image.cpp b/src/plugins/intel_cpu/src/nodes/experimental_detectron_generate_proposals_single_image.cpp index 3712a8a0a4d..90a2e6a259b 100644 --- a/src/plugins/intel_cpu/src/nodes/experimental_detectron_generate_proposals_single_image.cpp +++ b/src/plugins/intel_cpu/src/nodes/experimental_detectron_generate_proposals_single_image.cpp @@ -19,7 +19,13 @@ #include "common/cpu_memcpy.h" #include "experimental_detectron_generate_proposals_single_image.h" +using namespace InferenceEngine; + +namespace ov { +namespace intel_cpu { +namespace node { namespace { + struct Indexer4d { int dim3_; int dim23_; @@ -34,13 +40,7 @@ struct Indexer4d { return i * dim123_ + j * dim23_ + k * dim3_ + n; } }; -} // namespace - -using namespace ov::intel_cpu; -using namespace InferenceEngine; - -static void refine_anchors(const float* deltas, const float* scores, const float* anchors, float* proposals, const int anchors_num, const int bottom_H, const int bottom_W, const float img_H, const float img_W, @@ -108,7 +108,7 @@ void refine_anchors(const float* deltas, const float* scores, const float* ancho }); } -static void unpack_boxes(const float* p_proposals, float* unpacked_boxes, int pre_nms_topn) { +void unpack_boxes(const float* p_proposals, float* unpacked_boxes, int pre_nms_topn) { parallel_for(pre_nms_topn, [&](size_t i) { unpacked_boxes[0*pre_nms_topn + i] = p_proposals[5*i + 0]; unpacked_boxes[1*pre_nms_topn + i] = p_proposals[5*i + 1]; @@ -118,7 +118,6 @@ static void unpack_boxes(const float* p_proposals, float* unpacked_boxes, int pr }); } -static void nms_cpu(const int num_boxes, int is_dead[], const float* boxes, int index_out[], int* const num_out, const int base_index, const float nms_thresh, const int max_num_out, @@ -242,8 +241,6 @@ void nms_cpu(const int num_boxes, int is_dead[], *num_out = count; } - -static void fill_output_blobs(const float* proposals, const int* roi_indices, float* rois, float* scores, const int num_proposals, const int num_rois, const int post_nms_topn) { @@ -272,7 +269,9 @@ void fill_output_blobs(const float* proposals, const int* roi_indices, } } -bool MKLDNNExperimentalDetectronGenerateProposalsSingleImageNode::isSupportedOperation +} // namespace + +bool ExperimentalDetectronGenerateProposalsSingleImage::isSupportedOperation (const std::shared_ptr& op, std::string& errorMessage) noexcept { try { const auto proposalOp = ngraph::as_type_ptr(op); @@ -286,9 +285,9 @@ bool MKLDNNExperimentalDetectronGenerateProposalsSingleImageNode::isSupportedOpe return true; } -MKLDNNExperimentalDetectronGenerateProposalsSingleImageNode::MKLDNNExperimentalDetectronGenerateProposalsSingleImageNode +ExperimentalDetectronGenerateProposalsSingleImage::ExperimentalDetectronGenerateProposalsSingleImage (const std::shared_ptr& op, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { + WeightsSharing::Ptr &cache) : Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -307,7 +306,7 @@ MKLDNNExperimentalDetectronGenerateProposalsSingleImageNode::MKLDNNExperimentalD roi_indices_.resize(post_nms_topn_); } -void MKLDNNExperimentalDetectronGenerateProposalsSingleImageNode::initSupportedPrimitiveDescriptors() { +void ExperimentalDetectronGenerateProposalsSingleImage::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -320,7 +319,7 @@ void MKLDNNExperimentalDetectronGenerateProposalsSingleImageNode::initSupportedP impl_desc_type::ref_any); } -void MKLDNNExperimentalDetectronGenerateProposalsSingleImageNode::execute(mkldnn::stream strm) { +void ExperimentalDetectronGenerateProposalsSingleImage::execute(mkldnn::stream strm) { try { if (inputShapes.size() != 4 || outputShapes.size() != 2) { IE_THROW() << "Incorrect number of input or output edges!"; @@ -423,16 +422,18 @@ void MKLDNNExperimentalDetectronGenerateProposalsSingleImageNode::execute(mkldnn } } -bool MKLDNNExperimentalDetectronGenerateProposalsSingleImageNode::created() const { - return getType() == ExperimentalDetectronGenerateProposalsSingleImage; +bool ExperimentalDetectronGenerateProposalsSingleImage::created() const { + return getType() == Type::ExperimentalDetectronGenerateProposalsSingleImage; } -bool MKLDNNExperimentalDetectronGenerateProposalsSingleImageNode::needShapeInfer() const { +bool ExperimentalDetectronGenerateProposalsSingleImage::needShapeInfer() const { return false; } -bool MKLDNNExperimentalDetectronGenerateProposalsSingleImageNode::needPrepareParams() const { +bool ExperimentalDetectronGenerateProposalsSingleImage::needPrepareParams() const { return false; } -REG_MKLDNN_PRIM_FOR(MKLDNNExperimentalDetectronGenerateProposalsSingleImageNode, ExperimentalDetectronGenerateProposalsSingleImage) +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/experimental_detectron_generate_proposals_single_image.h b/src/plugins/intel_cpu/src/nodes/experimental_detectron_generate_proposals_single_image.h index 8d77c34047e..cd3795faaf1 100644 --- a/src/plugins/intel_cpu/src/nodes/experimental_detectron_generate_proposals_single_image.h +++ b/src/plugins/intel_cpu/src/nodes/experimental_detectron_generate_proposals_single_image.h @@ -9,11 +9,12 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNExperimentalDetectronGenerateProposalsSingleImageNode : public MKLDNNNode { +class ExperimentalDetectronGenerateProposalsSingleImage : public Node { public: - MKLDNNExperimentalDetectronGenerateProposalsSingleImageNode(const std::shared_ptr& op, - const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + ExperimentalDetectronGenerateProposalsSingleImage(const std::shared_ptr& op, + const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; @@ -48,5 +49,6 @@ private: std::vector roi_indices_; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/experimental_detectron_priorgridgenerator.cpp b/src/plugins/intel_cpu/src/nodes/experimental_detectron_priorgridgenerator.cpp index d2eac948cf7..4732d3d7219 100644 --- a/src/plugins/intel_cpu/src/nodes/experimental_detectron_priorgridgenerator.cpp +++ b/src/plugins/intel_cpu/src/nodes/experimental_detectron_priorgridgenerator.cpp @@ -8,10 +8,13 @@ #include "ie_parallel.hpp" #include "experimental_detectron_priorgridgenerator.h" -using namespace ov::intel_cpu; using namespace InferenceEngine; -bool MKLDNNExperimentalDetectronPriorGridGeneratorNode::isSupportedOperation(const std::shared_ptr& op, +namespace ov { +namespace intel_cpu { +namespace node { + +bool ExperimentalDetectronPriorGridGenerator::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { const auto priorGridGen = std::dynamic_pointer_cast(op); @@ -25,9 +28,9 @@ bool MKLDNNExperimentalDetectronPriorGridGeneratorNode::isSupportedOperation(con return true; } -MKLDNNExperimentalDetectronPriorGridGeneratorNode::MKLDNNExperimentalDetectronPriorGridGeneratorNode +ExperimentalDetectronPriorGridGenerator::ExperimentalDetectronPriorGridGenerator (const std::shared_ptr& op, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { + WeightsSharing::Ptr &cache) : Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -45,7 +48,7 @@ MKLDNNExperimentalDetectronPriorGridGeneratorNode::MKLDNNExperimentalDetectronPr stride_w_ = attr.stride_x; } -void MKLDNNExperimentalDetectronPriorGridGeneratorNode::initSupportedPrimitiveDescriptors() { +void ExperimentalDetectronPriorGridGenerator::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -56,7 +59,7 @@ void MKLDNNExperimentalDetectronPriorGridGeneratorNode::initSupportedPrimitiveDe impl_desc_type::ref_any); } -void MKLDNNExperimentalDetectronPriorGridGeneratorNode::execute(mkldnn::stream strm) { +void ExperimentalDetectronPriorGridGenerator::execute(mkldnn::stream strm) { const int num_priors_ = getParentEdgeAt(INPUT_PRIORS)->getMemory().getStaticDims()[0]; assert(getParentEdgeAt(INPUT_PRIORS)->getMemory().getStaticDims()[1] == 4); @@ -82,12 +85,14 @@ void MKLDNNExperimentalDetectronPriorGridGeneratorNode::execute(mkldnn::stream s } } -bool MKLDNNExperimentalDetectronPriorGridGeneratorNode::created() const { - return getType() == ExperimentalDetectronPriorGridGenerator; +bool ExperimentalDetectronPriorGridGenerator::created() const { + return getType() == Type::ExperimentalDetectronPriorGridGenerator; } -bool MKLDNNExperimentalDetectronPriorGridGeneratorNode::needPrepareParams() const { +bool ExperimentalDetectronPriorGridGenerator::needPrepareParams() const { return false; } -REG_MKLDNN_PRIM_FOR(MKLDNNExperimentalDetectronPriorGridGeneratorNode, ExperimentalDetectronPriorGridGenerator) +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/experimental_detectron_priorgridgenerator.h b/src/plugins/intel_cpu/src/nodes/experimental_detectron_priorgridgenerator.h index 5c119aaec82..6b188cd4f37 100644 --- a/src/plugins/intel_cpu/src/nodes/experimental_detectron_priorgridgenerator.h +++ b/src/plugins/intel_cpu/src/nodes/experimental_detectron_priorgridgenerator.h @@ -9,10 +9,11 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNExperimentalDetectronPriorGridGeneratorNode : public MKLDNNNode { +class ExperimentalDetectronPriorGridGenerator : public Node { public: - MKLDNNExperimentalDetectronPriorGridGeneratorNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + ExperimentalDetectronPriorGridGenerator(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; @@ -45,5 +46,6 @@ private: std::string errorPrefix; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/experimental_detectron_roifeatureextractor.cpp b/src/plugins/intel_cpu/src/nodes/experimental_detectron_roifeatureextractor.cpp index c0a704e40ea..bdd83f272f0 100644 --- a/src/plugins/intel_cpu/src/nodes/experimental_detectron_roifeatureextractor.cpp +++ b/src/plugins/intel_cpu/src/nodes/experimental_detectron_roifeatureextractor.cpp @@ -11,9 +11,11 @@ #include "common/cpu_memcpy.h" #include "experimental_detectron_roifeatureextractor.h" -using namespace ov::intel_cpu; using namespace InferenceEngine; +namespace ov { +namespace intel_cpu { +namespace node { namespace { // implementation taken from Caffe2 @@ -308,7 +310,7 @@ void reorder_rois(const float *rois, const int* ids, int* mapping, const int roi } // namespace -bool MKLDNNExperimentalDetectronROIFeatureExtractorNode::isSupportedOperation(const std::shared_ptr& op, +bool ExperimentalDetectronROIFeatureExtractor::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { const auto roiFeatureExtractor = std::dynamic_pointer_cast(op); @@ -322,9 +324,9 @@ bool MKLDNNExperimentalDetectronROIFeatureExtractorNode::isSupportedOperation(co return true; } -MKLDNNExperimentalDetectronROIFeatureExtractorNode::MKLDNNExperimentalDetectronROIFeatureExtractorNode +ExperimentalDetectronROIFeatureExtractor::ExperimentalDetectronROIFeatureExtractor (const std::shared_ptr& op, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { + WeightsSharing::Ptr &cache) : Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -340,7 +342,7 @@ MKLDNNExperimentalDetectronROIFeatureExtractorNode::MKLDNNExperimentalDetectronR pooled_width_ = output_dim_; } -void MKLDNNExperimentalDetectronROIFeatureExtractorNode::initSupportedPrimitiveDescriptors() { +void ExperimentalDetectronROIFeatureExtractor::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -355,7 +357,7 @@ void MKLDNNExperimentalDetectronROIFeatureExtractorNode::initSupportedPrimitiveD impl_desc_type::ref_any); } -void MKLDNNExperimentalDetectronROIFeatureExtractorNode::execute(mkldnn::stream strm) { +void ExperimentalDetectronROIFeatureExtractor::execute(mkldnn::stream strm) { const int levels_num = inputShapes.size() - INPUT_FEATURES_START; const int num_rois = getParentEdgeAt(INPUT_ROIS)->getMemory().getStaticDims()[0]; const int channels_num = getParentEdgeAt(INPUT_FEATURES_START)->getMemory().getStaticDims()[1]; @@ -409,8 +411,10 @@ void MKLDNNExperimentalDetectronROIFeatureExtractorNode::execute(mkldnn::stream } } -bool MKLDNNExperimentalDetectronROIFeatureExtractorNode::created() const { - return getType() == ExperimentalDetectronROIFeatureExtractor; +bool ExperimentalDetectronROIFeatureExtractor::created() const { + return getType() == Type::ExperimentalDetectronROIFeatureExtractor; } -REG_MKLDNN_PRIM_FOR(MKLDNNExperimentalDetectronROIFeatureExtractorNode, ExperimentalDetectronROIFeatureExtractor) +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/experimental_detectron_roifeatureextractor.h b/src/plugins/intel_cpu/src/nodes/experimental_detectron_roifeatureextractor.h index a9b4ad7e9c6..82be8af0525 100644 --- a/src/plugins/intel_cpu/src/nodes/experimental_detectron_roifeatureextractor.h +++ b/src/plugins/intel_cpu/src/nodes/experimental_detectron_roifeatureextractor.h @@ -9,10 +9,11 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNExperimentalDetectronROIFeatureExtractorNode : public MKLDNNNode { +class ExperimentalDetectronROIFeatureExtractor : public Node { public: - MKLDNNExperimentalDetectronROIFeatureExtractorNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + ExperimentalDetectronROIFeatureExtractor(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; @@ -39,5 +40,6 @@ private: bool aligned_ = false; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/experimental_detectron_topkrois.cpp b/src/plugins/intel_cpu/src/nodes/experimental_detectron_topkrois.cpp index c44a1b09b95..7675b20fa8d 100644 --- a/src/plugins/intel_cpu/src/nodes/experimental_detectron_topkrois.cpp +++ b/src/plugins/intel_cpu/src/nodes/experimental_detectron_topkrois.cpp @@ -11,10 +11,13 @@ #include "common/cpu_memcpy.h" #include "experimental_detectron_topkrois.h" -using namespace ov::intel_cpu; using namespace InferenceEngine; -bool MKLDNNExperimentalDetectronTopKROIsNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +namespace ov { +namespace intel_cpu { +namespace node { + +bool ExperimentalDetectronTopKROIs::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { const auto topKROI = std::dynamic_pointer_cast(op); if (!topKROI) { @@ -27,8 +30,8 @@ bool MKLDNNExperimentalDetectronTopKROIsNode::isSupportedOperation(const std::sh return true; } -MKLDNNExperimentalDetectronTopKROIsNode::MKLDNNExperimentalDetectronTopKROIsNode(const std::shared_ptr& op, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { +ExperimentalDetectronTopKROIs::ExperimentalDetectronTopKROIs(const std::shared_ptr& op, const mkldnn::engine& eng, + WeightsSharing::Ptr &cache) : Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -49,7 +52,7 @@ MKLDNNExperimentalDetectronTopKROIsNode::MKLDNNExperimentalDetectronTopKROIsNode max_rois_num_ = topKROI->get_max_rois(); } -void MKLDNNExperimentalDetectronTopKROIsNode::initSupportedPrimitiveDescriptors() { +void ExperimentalDetectronTopKROIs::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -59,7 +62,7 @@ void MKLDNNExperimentalDetectronTopKROIsNode::initSupportedPrimitiveDescriptors( impl_desc_type::ref_any); } -void MKLDNNExperimentalDetectronTopKROIsNode::execute(mkldnn::stream strm) { +void ExperimentalDetectronTopKROIs::execute(mkldnn::stream strm) { const int input_rois_num = getParentEdgeAt(INPUT_ROIS)->getMemory().getStaticDims()[0]; const int top_rois_num = (std::min)(max_rois_num_, input_rois_num); @@ -77,8 +80,10 @@ void MKLDNNExperimentalDetectronTopKROIsNode::execute(mkldnn::stream strm) { } } -bool MKLDNNExperimentalDetectronTopKROIsNode::created() const { - return getType() == ExperimentalDetectronTopKROIs; +bool ExperimentalDetectronTopKROIs::created() const { + return getType() == Type::ExperimentalDetectronTopKROIs; } -REG_MKLDNN_PRIM_FOR(MKLDNNExperimentalDetectronTopKROIsNode, ExperimentalDetectronTopKROIs) +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/experimental_detectron_topkrois.h b/src/plugins/intel_cpu/src/nodes/experimental_detectron_topkrois.h index fcbddb5860b..c1b7b86726e 100644 --- a/src/plugins/intel_cpu/src/nodes/experimental_detectron_topkrois.h +++ b/src/plugins/intel_cpu/src/nodes/experimental_detectron_topkrois.h @@ -9,10 +9,11 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNExperimentalDetectronTopKROIsNode : public MKLDNNNode { +class ExperimentalDetectronTopKROIs : public Node { public: - MKLDNNExperimentalDetectronTopKROIsNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + ExperimentalDetectronTopKROIs(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; @@ -41,5 +42,6 @@ private: std::string errorPrefix; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/extract_image_patches.cpp b/src/plugins/intel_cpu/src/nodes/extract_image_patches.cpp index 645a2eefd80..25e16b68569 100644 --- a/src/plugins/intel_cpu/src/nodes/extract_image_patches.cpp +++ b/src/plugins/intel_cpu/src/nodes/extract_image_patches.cpp @@ -9,12 +9,10 @@ #include #include "ie_parallel.hpp" #include "extract_image_patches.h" -#include "list.hpp" #include #include "caseless.hpp" #include -using namespace ov::intel_cpu; using namespace InferenceEngine; using details::CaselessEq; @@ -24,6 +22,10 @@ using namespace dnnl::impl::cpu::x64; using namespace dnnl::impl::utils; using namespace Xbyak; +namespace ov { +namespace intel_cpu { +namespace node { + #define GET_OFF(field) offsetof(jit_extract_image_patches_args, field) template @@ -269,7 +271,7 @@ private: } }; -bool MKLDNNExtractImagePatchesNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool ExtractImagePatches::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { auto extImgPatcher = ngraph::as_type_ptr(op); if (!extImgPatcher) { @@ -298,7 +300,7 @@ struct ExtractImagePatchesKey { VectorDims kSizes; VectorDims strides; VectorDims rates; - MKLDNNExtractImagePatchesNode::ExtImgPatcherPadType padType; + ExtractImagePatches::ExtImgPatcherPadType padType; size_t prcSize; size_t hash() const; bool operator==(const ExtractImagePatchesKey& rhs) const; @@ -325,8 +327,8 @@ bool ExtractImagePatchesKey::operator==(const ExtractImagePatchesKey& rhs) const } } // namespace -MKLDNNExtractImagePatchesNode::MKLDNNExtractImagePatchesNode(const std::shared_ptr& op, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { +ExtractImagePatches::ExtractImagePatches(const std::shared_ptr& op, const mkldnn::engine& eng, + WeightsSharing::Ptr &cache) : Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -362,7 +364,7 @@ MKLDNNExtractImagePatchesNode::MKLDNNExtractImagePatchesNode(const std::shared_p IE_THROW() << errorPrefix << "must have the following attributes with shape {2}: sizes, strides, rates."; } -void MKLDNNExtractImagePatchesNode::prepareParams() { +void ExtractImagePatches::prepareParams() { const auto& srcMemPtr0 = getParentEdgeAt(0)->getMemoryPtr(); const auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); if (!srcMemPtr0 || !srcMemPtr0->isAllocated()) @@ -401,7 +403,7 @@ void MKLDNNExtractImagePatchesNode::prepareParams() { execPtr = result.first; } -void MKLDNNExtractImagePatchesNode::initSupportedPrimitiveDescriptors() { +void ExtractImagePatches::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -414,7 +416,7 @@ void MKLDNNExtractImagePatchesNode::initSupportedPrimitiveDescriptors() { impl_desc_type::ref_any); } -void MKLDNNExtractImagePatchesNode::execute(mkldnn::stream strm) { +void ExtractImagePatches::execute(mkldnn::stream strm) { if (execPtr) { auto src = getParentEdgeAt(0)->getMemoryPtr()->GetPtr(); auto dst = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr(); @@ -426,11 +428,11 @@ void MKLDNNExtractImagePatchesNode::execute(mkldnn::stream strm) { } } -void MKLDNNExtractImagePatchesNode::executeDynamicImpl(mkldnn::stream strm) { +void ExtractImagePatches::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -void MKLDNNExtractImagePatchesNode::ExtractImagePatchesRefExecutor::executeReference( +void ExtractImagePatches::ExtractImagePatchesRefExecutor::executeReference( void* src, void* dst, const VectorDims& istrides, const VectorDims& ostrides) const { const char* src_data = reinterpret_cast(src); char* dst_data = reinterpret_cast(dst); @@ -477,7 +479,7 @@ void MKLDNNExtractImagePatchesNode::ExtractImagePatchesRefExecutor::executeRefer }); } -void MKLDNNExtractImagePatchesNode::ExtractImagePatchesJitExecutor::executeOptimizedGeneric( +void ExtractImagePatches::ExtractImagePatchesJitExecutor::executeOptimizedGeneric( void* src, void* dst, const VectorDims& istrides, const VectorDims& ostrides) const { const char* src_data = reinterpret_cast(src); char* dst_data = reinterpret_cast(dst); @@ -507,7 +509,7 @@ void MKLDNNExtractImagePatchesNode::ExtractImagePatchesJitExecutor::executeOptim }); } -jit_extract_image_patches_params MKLDNNExtractImagePatchesNode::ExtractImagePatchesExecutor::fillJpp( +jit_extract_image_patches_params ExtractImagePatches::ExtractImagePatchesExecutor::fillJpp( const VectorDims& inDims, const VectorDims& outDims, const VectorDims& kSizes, @@ -575,7 +577,7 @@ jit_extract_image_patches_params MKLDNNExtractImagePatchesNode::ExtractImagePatc return jpp; } -MKLDNNExtractImagePatchesNode::ExtractImagePatchesJitExecutor::ExtractImagePatchesJitExecutor( +ExtractImagePatches::ExtractImagePatchesJitExecutor::ExtractImagePatchesJitExecutor( const VectorDims& inDims, const VectorDims& outDims, const VectorDims& kSizes, @@ -598,14 +600,14 @@ MKLDNNExtractImagePatchesNode::ExtractImagePatchesJitExecutor::ExtractImagePatch pKernel->create_ker(); } -void MKLDNNExtractImagePatchesNode::ExtractImagePatchesJitExecutor::exec( +void ExtractImagePatches::ExtractImagePatchesJitExecutor::exec( void* src, void* dst, const VectorDims& istrides, const VectorDims& ostrides) { if (!pKernel) IE_THROW() << "Can't execute, kernel for extract image patches node is not compiled"; executeOptimizedGeneric(src, dst, istrides, ostrides); } -MKLDNNExtractImagePatchesNode::ExtractImagePatchesRefExecutor::ExtractImagePatchesRefExecutor( +ExtractImagePatches::ExtractImagePatchesRefExecutor::ExtractImagePatchesRefExecutor( const VectorDims& inDims, const VectorDims& outDims, const VectorDims& kSizes, @@ -614,15 +616,17 @@ MKLDNNExtractImagePatchesNode::ExtractImagePatchesRefExecutor::ExtractImagePatch const ExtImgPatcherPadType& padType, const size_t prcSize) : jpp(fillJpp(inDims, outDims, kSizes, strides, rates, padType, prcSize)) {} -void MKLDNNExtractImagePatchesNode::ExtractImagePatchesRefExecutor::exec( +void ExtractImagePatches::ExtractImagePatchesRefExecutor::exec( void* src, void* dst, const VectorDims& istrides, const VectorDims& ostrides) { executeReference(src, dst, istrides, ostrides); } -const std::set MKLDNNExtractImagePatchesNode::_supported_precisions_sizes = {1, 2, 4}; +const std::set ExtractImagePatches::_supported_precisions_sizes = {1, 2, 4}; -bool MKLDNNExtractImagePatchesNode::created() const { - return getType() == ExtractImagePatches; +bool ExtractImagePatches::created() const { + return getType() == Type::ExtractImagePatches; } -REG_MKLDNN_PRIM_FOR(MKLDNNExtractImagePatchesNode, ExtractImagePatches) +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/extract_image_patches.h b/src/plugins/intel_cpu/src/nodes/extract_image_patches.h index 6ec512a3246..87fb4d7554e 100644 --- a/src/plugins/intel_cpu/src/nodes/extract_image_patches.h +++ b/src/plugins/intel_cpu/src/nodes/extract_image_patches.h @@ -12,6 +12,7 @@ namespace ov { namespace intel_cpu { +namespace node { struct jit_extract_image_patches_params { size_t IW; @@ -41,9 +42,9 @@ struct jit_uni_extract_image_patches_kernel { virtual ~jit_uni_extract_image_patches_kernel() {} }; -class MKLDNNExtractImagePatchesNode : public MKLDNNNode { +class ExtractImagePatches : public Node { public: - MKLDNNExtractImagePatchesNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + ExtractImagePatches(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; @@ -128,5 +129,6 @@ private: }; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/fake_quantize.cpp b/src/plugins/intel_cpu/src/nodes/fake_quantize.cpp index dc7fcc988e8..c89f5e742ac 100644 --- a/src/plugins/intel_cpu/src/nodes/fake_quantize.cpp +++ b/src/plugins/intel_cpu/src/nodes/fake_quantize.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include "utils/general_utils.h" #include "utils/cpu_utils.hpp" @@ -32,7 +32,6 @@ // #define FQ_DOUBLE_PRECISION using namespace mkldnn; -using namespace ov::intel_cpu; using namespace InferenceEngine; using namespace details; using namespace mkldnn::impl; @@ -40,6 +39,10 @@ using namespace mkldnn::impl::cpu::x64; using namespace mkldnn::impl::utils; using namespace Xbyak; +namespace ov { +namespace intel_cpu { +namespace node { + #define GET_OFF(field) offsetof(jit_quantize_call_args, field) template @@ -222,7 +225,7 @@ struct jit_uni_quantization_kernel : public jit_uni_quantize_kernel, public jit_ }; void generate() override { - do_dequantization = jqp_.op_type == FQCommon; + do_dequantization = jqp_.op_type == Algorithm::FQCommon; do_rounding = do_dequantization || jqp_.dst_prc == Precision::FP32; this->preamble(); @@ -825,7 +828,7 @@ private: } }; -bool MKLDNNFakeQuantizeNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool FakeQuantize::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { const auto fq = std::dynamic_pointer_cast(op); if (!fq) { @@ -911,11 +914,11 @@ struct FakeQuantKey { }; } // namespace -MKLDNNFakeQuantizeNode::MKLDNNFakeQuantizeNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : - MKLDNNNode(op, eng, cache) { +FakeQuantize::FakeQuantize(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache) : + Node(op, eng, cache) { std::string errorMessage; if (isSupportedOperation(op, errorMessage)) { - algorithm = FQCommon; + algorithm = Algorithm::FQCommon; const auto fq = std::dynamic_pointer_cast(op); errorPrefix = "FakeQuantize node with name '" + getName() + "' "; @@ -1022,7 +1025,7 @@ MKLDNNFakeQuantizeNode::MKLDNNFakeQuantizeNode(const std::shared_ptr MKLDNNFakeQuantizeNode::getDataFormats() const { +std::vector FakeQuantize::getDataFormats() const { // Special case for first FQ in the network const auto &dims = getInputShapeAtPort(0).getDims(); if (dims[getAxis()] == 3) { @@ -1186,7 +1189,7 @@ std::vector MKLDNNFakeQuantizeNode::getDataFormats() const { } } -void MKLDNNFakeQuantizeNode::init() { +void FakeQuantize::init() { if (binarization) { inputPrecision = Precision::FP32; outputPrecision = Precision::BIN; @@ -1202,7 +1205,7 @@ void MKLDNNFakeQuantizeNode::init() { } } -void MKLDNNFakeQuantizeNode::getSupportedDescriptors() { +void FakeQuantize::getSupportedDescriptors() { if (getParentEdges().size() != 5) IE_THROW() << errorPrefix << "has incorrect number of input edges: " << getParentEdges().size(); if (getChildEdges().empty()) @@ -1231,7 +1234,7 @@ void MKLDNNFakeQuantizeNode::getSupportedDescriptors() { } } -void MKLDNNFakeQuantizeNode::initSupportedPrimitiveDescriptors() { +void FakeQuantize::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -1284,7 +1287,7 @@ void MKLDNNFakeQuantizeNode::initSupportedPrimitiveDescriptors() { } } -bool MKLDNNFakeQuantizeNode::needPrepareParams() const { +bool FakeQuantize::needPrepareParams() const { auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor(); if (!selectedPrimitiveDescriptor) IE_THROW() << "CPU quantize node with name '" << getName() << "' doesn't have primitive descriptors."; @@ -1301,7 +1304,7 @@ bool MKLDNNFakeQuantizeNode::needPrepareParams() const { axisSize != currentAxisSize); } -void MKLDNNFakeQuantizeNode::prepareParams() { +void FakeQuantize::prepareParams() { const size_t axisSize = getParentEdgesAtPort(0)[0]->getMemory().GetShape().getStaticDims()[getAxis()]; const size_t newPaddedSize = rnd_up(axisSize, 16); IE_ASSERT(newPaddedSize != 0); @@ -1328,7 +1331,7 @@ void MKLDNNFakeQuantizeNode::prepareParams() { } if (internalBlobMemory.empty() || needUpdThr) { - auto binarizationThresholdsDataMem = std::make_shared(getEngine()); + auto binarizationThresholdsDataMem = std::make_shared(getEngine()); binarizationThresholdsDataMem->Create(weightsDataDesc, getBinarizationTresholdsPtr()); if (internalBlobMemory.empty()) { internalBlobMemory.push_back(binarizationThresholdsDataMem); @@ -1338,7 +1341,7 @@ void MKLDNNFakeQuantizeNode::prepareParams() { } if (internalBlobMemory.size() == (numBinFqIntBlob - 1) || needUpdMask) { - auto binarizationMaskDataMem = std::make_shared(getEngine()); + auto binarizationMaskDataMem = std::make_shared(getEngine()); binarizationMaskDataMem->Create(weightsDataDesc, getBinarizationOutputMaskPtr()); if (internalBlobMemory.size() == (numBinFqIntBlob - 1)) { internalBlobMemory.push_back(binarizationMaskDataMem); @@ -1350,7 +1353,7 @@ void MKLDNNFakeQuantizeNode::prepareParams() { constexpr size_t numFqIntBlob = 6; auto pushInternalBlob = [&](std::vector& data, size_t idx) { - auto memory = std::make_shared(getEngine()); + auto memory = std::make_shared(getEngine()); bool needOverwrite = getInputShapeAtPort(0).getDims()[getAxis()] == Shape::UNDEFINED_DIM && data.size() == 1; if (needOverwrite) { memory->Create(weightsDataDesc); @@ -1414,7 +1417,7 @@ void MKLDNNFakeQuantizeNode::prepareParams() { } } -void MKLDNNFakeQuantizeNode::executeReference() { +void FakeQuantize::executeReference() { auto &srcMemory = getParentEdgeAt(0)->getMemoryPtr(); auto &dstMemory = getChildEdgeAt(0)->getMemoryPtr(); @@ -1531,7 +1534,7 @@ void MKLDNNFakeQuantizeNode::executeReference() { } } -void MKLDNNFakeQuantizeNode::executeBinarization(const std::unique_ptr &pKernel) const { +void FakeQuantize::executeBinarization(const std::unique_ptr &pKernel) const { auto &srcMemory = getParentEdgeAt(0)->getMemoryPtr(); auto &dstMemory = getChildEdgeAt(0)->getMemoryPtr(); @@ -1571,7 +1574,7 @@ void MKLDNNFakeQuantizeNode::executeBinarization(const std::unique_ptr &pKernel) const { +void FakeQuantize::executeQuantization(const std::unique_ptr &pKernel) const { auto &srcMemory = getParentEdgeAt(0)->getMemoryPtr(); auto &dstMemory = getChildEdgeAt(0)->getMemoryPtr(); @@ -1702,11 +1705,11 @@ void MKLDNNFakeQuantizeNode::executeQuantization(const std::unique_ptr 1 ? 1 : 0]; const auto axisPaddedSize = rnd_up(realAxisSize, bufferAlignment); if (!isPostOpDataInitialized) { @@ -1763,11 +1766,11 @@ void MKLDNNFakeQuantizeNode::initializePostOpData(const VectorDims &dims, const isPostOpDataInitialized = true; } -void MKLDNNFakeQuantizeNode::initializePostOpDataLegacy(const VectorDims &dims, const size_t bufferAlignment) { +void FakeQuantize::initializePostOpDataLegacy(const VectorDims &dims, const size_t bufferAlignment) { if (isPostOpDataInitialized) return; - if (getAlgorithm() == FQBinarization) { + if (getAlgorithm() == Algorithm::FQBinarization) { const auto realAxisSize = dims[dims.size() > 1 ? 1 : 0]; const auto axisPaddedSize = rnd_up(realAxisSize, bufferAlignment); if (!isPostOpDataInitialized) { @@ -1799,9 +1802,9 @@ void MKLDNNFakeQuantizeNode::initializePostOpDataLegacy(const VectorDims &dims, isPostOpDataInitialized = true; } -void MKLDNNFakeQuantizeNode::appendMemory(const size_t dataSize, const void *data, MKLDNNMemoryPtr &memPtr, std::vector& postOpsMem) { +void FakeQuantize::appendMemory(const size_t dataSize, const void *data, MemoryPtr &memPtr, std::vector& postOpsMem) { if (!memPtr) { - memPtr.reset(new MKLDNNMemory(getEngine())); + memPtr.reset(new Memory(getEngine())); DnnlBlockedMemoryDesc memoryDesc(Precision::FP32, {dataSize}); memPtr->Create(memoryDesc, data); @@ -1809,24 +1812,24 @@ void MKLDNNFakeQuantizeNode::appendMemory(const size_t dataSize, const void *dat } } -void MKLDNNFakeQuantizeNode::appendMemory(const size_t dataSize, const void *data, MKLDNNMemoryPtr &memPtr, std::vector& postOpsMem) { +void FakeQuantize::appendMemory(const size_t dataSize, const void *data, MemoryPtr &memPtr, std::vector& postOpsMem) { postOpsMem.push_back(data); } template -void MKLDNNFakeQuantizeNode::appendPostOpsImpl(mkldnn::post_ops& ops, const VectorDims &postOpDims, std::vector& postOpsMem) { - // MKLDNN quantization_injectors assumes that quantization data memory is always aligned on 16 +void FakeQuantize::appendPostOpsImpl(mkldnn::post_ops& ops, const VectorDims &postOpDims, std::vector& postOpsMem) { + // oneDNN quantization_injectors assumes that quantization data memory is always aligned on 16 // by length of AVX512 vector register which is also enough for AVX2 and SSE42 implementations. // Otherwise it can lead to buffer over-read and performance penalties due to denormals. const size_t bufferAlignment = 16; initializePostOpDataLegacy(postOpDims, bufferAlignment); - if (getAlgorithm() == FQBinarization) { + if (getAlgorithm() == Algorithm::FQBinarization) { ops.append_binarization(mkldnn::algorithm::binarization_depthwise, (const float*)&binarizationThresholds[0], (const float*)&binarizationOutputMask[0]); } else { - mkldnn::algorithm alg = getAlgorithm() == FQCommon ? mkldnn::algorithm::quantization_quantize_dequantize : - mkldnn::algorithm::quantization_quantize; + mkldnn::algorithm alg = getAlgorithm() == Algorithm::FQCommon ? mkldnn::algorithm::quantization_quantize_dequantize : + mkldnn::algorithm::quantization_quantize; std::array per_channel = {cropLowSize > 1, cropHighSize > 1, inputScaleSize > 1, inputShiftSize > 1, outputScaleSize > 1, outputShiftSize > 1}; @@ -1852,35 +1855,35 @@ void MKLDNNFakeQuantizeNode::appendPostOpsImpl(mkldnn::post_ops& ops, const Vect } } -void MKLDNNFakeQuantizeNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, std::vector& postOpsMem) { +void FakeQuantize::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, std::vector& postOpsMem) { appendPostOpsImpl(ops, postOpDims, postOpsMem); } -void MKLDNNFakeQuantizeNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, std::vector& postOpsMem) { +void FakeQuantize::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, std::vector& postOpsMem) { appendPostOpsImpl(ops, postOpDims, postOpsMem); } -void MKLDNNFakeQuantizeNode::appendBinPostOps(mkldnn::post_ops& ops, const VectorDims& postOpDims, std::vector& binaryPostOpsMem) { +void FakeQuantize::appendBinPostOps(mkldnn::post_ops& ops, const VectorDims& postOpDims, std::vector& binaryPostOpsMem) { static const size_t bufferAlignment = 1; initializePostOpData(postOpDims, bufferAlignment); VectorDims broadcastBinaryShape(postOpDims.size(), 1); - auto appendBinary = [&](const mkldnn::algorithm alg, const size_t dataSize, MKLDNNMemoryPtr &memPtr, const void *data) { + auto appendBinary = [&](const mkldnn::algorithm alg, const size_t dataSize, MemoryPtr &memPtr, const void *data) { DnnlBlockedMemoryDesc memoryDesc(Precision::FP32, dataSize == 1 ? Shape(broadcastBinaryShape) : Shape(postOpDims)); ops.append_binary(alg, memoryDesc.getDnnlDesc()); if (!memPtr) { - memPtr.reset(new MKLDNNMemory(getEngine())); + memPtr.reset(new Memory(getEngine())); memPtr->Create(memoryDesc, data); binaryPostOpsMem.push_back(memPtr); } }; - mkldnn::algorithm alg = getAlgorithm() == FQCommon ? mkldnn::algorithm::quantization_quantize_dequantize : - mkldnn::algorithm::quantization_quantize; + mkldnn::algorithm alg = getAlgorithm() == Algorithm::FQCommon ? mkldnn::algorithm::quantization_quantize_dequantize : + mkldnn::algorithm::quantization_quantize; appendBinary(mkldnn::algorithm::binary_min, cropHighSize, cropHighMemory, &cropHighData.shifts_[0]); appendBinary(mkldnn::algorithm::binary_max, cropLowSize, cropLowMemory, &cropLowData.shifts_[0]); @@ -1893,8 +1896,8 @@ void MKLDNNFakeQuantizeNode::appendBinPostOps(mkldnn::post_ops& ops, const Vecto appendBinary(mkldnn::algorithm::binary_add, outputShiftSize, outputShiftMemory, &outputShiftData.shifts_[0]); } -MKLDNNFakeQuantizeNode::FakeQuantizeJitExecutor::FakeQuantizeJitExecutor(const jit_quantize_params &_jqp) { - bool isBinarization = _jqp.op_type == FQBinarization; +FakeQuantize::FakeQuantizeJitExecutor::FakeQuantizeJitExecutor(const jit_quantize_params &_jqp) { + bool isBinarization = _jqp.op_type == Algorithm::FQBinarization; if (mayiuse(cpu::x64::avx512_common)) { if (isBinarization) pKernel.reset(new jit_uni_binarization_kernel(_jqp)); @@ -1918,19 +1921,21 @@ MKLDNNFakeQuantizeNode::FakeQuantizeJitExecutor::FakeQuantizeJitExecutor(const j } } -void MKLDNNFakeQuantizeNode::FakeQuantizeJitExecutor::exec(const MKLDNNFakeQuantizeNode& node) { +void FakeQuantize::FakeQuantizeJitExecutor::exec(const FakeQuantize& node) { if (!pKernel) IE_THROW() << "Can't execute, kernel for fake quantize node is not compiled"; - if (pKernel->jqp_.op_type == FQBinarization) { + if (pKernel->jqp_.op_type == Algorithm::FQBinarization) { node.executeBinarization(pKernel); } else { node.executeQuantization(pKernel); } } -bool MKLDNNFakeQuantizeNode::created() const { - return getType() == FakeQuantize; +bool FakeQuantize::created() const { + return getType() == Type::FakeQuantize; } -REG_MKLDNN_PRIM_FOR(MKLDNNFakeQuantizeNode, FakeQuantize); +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/fake_quantize.h b/src/plugins/intel_cpu/src/nodes/fake_quantize.h index 01fc9c6e681..bb1a890309c 100644 --- a/src/plugins/intel_cpu/src/nodes/fake_quantize.h +++ b/src/plugins/intel_cpu/src/nodes/fake_quantize.h @@ -15,6 +15,7 @@ namespace ov { namespace intel_cpu { +namespace node { struct jit_quantize_params { int c; @@ -65,9 +66,9 @@ struct jit_uni_quantize_kernel { jit_quantize_params jqp_; }; -class MKLDNNFakeQuantizeNode : public MKLDNNNode { +class FakeQuantize : public Node { public: - MKLDNNFakeQuantizeNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + FakeQuantize(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void initSupportedPrimitiveDescriptors() override; void getSupportedDescriptors() override; @@ -121,9 +122,9 @@ public: InferenceEngine::Precision getInputPrecision() const { return inputPrecision; } InferenceEngine::Precision getOutputPrecision() const { return outputPrecision; } - void appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, std::vector& postOpsMem) override; + void appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, std::vector& postOpsMem) override; void appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, std::vector& postOpsMem) override; - void appendBinPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, std::vector& binaryPostOpsMem) override; + void appendBinPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, std::vector& binaryPostOpsMem) override; static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; @@ -135,16 +136,16 @@ public: BroadcastingPolicy getBroadcastingPolicy() const { return broadcastingPolicy; } - MKLDNNMemoryPtr cropLowMemory; - MKLDNNMemoryPtr cropHighMemory; - MKLDNNMemoryPtr inputScaleMemory; - MKLDNNMemoryPtr inputShiftMemory; - MKLDNNMemoryPtr outputScaleMemory; - MKLDNNMemoryPtr outputShiftMemory; + MemoryPtr cropLowMemory; + MemoryPtr cropHighMemory; + MemoryPtr inputScaleMemory; + MemoryPtr inputShiftMemory; + MemoryPtr outputScaleMemory; + MemoryPtr outputShiftMemory; private: struct FakeQuantizeExecutor { - virtual void exec(const MKLDNNFakeQuantizeNode& node) = 0; + virtual void exec(const FakeQuantize& node) = 0; virtual ~FakeQuantizeExecutor() = default; }; using executorPtr = std::shared_ptr; @@ -152,7 +153,7 @@ private: struct FakeQuantizeJitExecutor : public FakeQuantizeExecutor { FakeQuantizeJitExecutor(const jit_quantize_params &_jqp); - void exec(const MKLDNNFakeQuantizeNode& node) override; + void exec(const FakeQuantize& node) override; std::unique_ptr pKernel; }; @@ -164,8 +165,8 @@ private: void executeBinarization(const std::unique_ptr &pKernel) const; void executeQuantization(const std::unique_ptr &pKernel) const; - void appendMemory(const size_t dataSize, const void *data, MKLDNNMemoryPtr &memPtr, std::vector& postOpsMem); - void appendMemory(const size_t dataSize, const void *data, MKLDNNMemoryPtr &memPtr, std::vector& postOpsMem); + void appendMemory(const size_t dataSize, const void *data, MemoryPtr &memPtr, std::vector& postOpsMem); + void appendMemory(const size_t dataSize, const void *data, MemoryPtr &memPtr, std::vector& postOpsMem); template void appendPostOpsImpl(mkldnn::post_ops& ops, const VectorDims &postOpDims, std::vector& postOpsMem); @@ -185,7 +186,7 @@ private: std::vector quantizationData; size_t quantizationDataSize = 0lu; - MKLDNNMemoryPtr quantizationMemory; + MemoryPtr quantizationMemory; size_t cropLowSize; size_t cropHighSize; @@ -219,5 +220,6 @@ private: BroadcastingPolicy broadcastingPolicy; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp index 87fc65755f5..abcedc70b45 100644 --- a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp +++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include "utils/general_utils.h" #include @@ -18,9 +18,11 @@ #include using namespace mkldnn; -using namespace ov::intel_cpu; using namespace InferenceEngine; +namespace ov { +namespace intel_cpu { +namespace node { namespace { struct FCKey { @@ -73,7 +75,7 @@ bool FCKey::operator==(const FCKey &rhs) const { } // namespace -bool MKLDNNFullyConnectedNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool FullyConnected::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { const auto fc = std::dynamic_pointer_cast(op); if (!fc) { @@ -101,8 +103,8 @@ bool MKLDNNFullyConnectedNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) - : MKLDNNNode(op, eng, cache), withBiases(false) { +FullyConnected::FullyConnected(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache) + : Node(op, eng, cache), withBiases(false) { std::string errorMessage; if (isSupportedOperation(op, errorMessage)) { errorPrefix = "FullyConnected node with name '" + getName() + "'"; @@ -113,7 +115,7 @@ MKLDNNFullyConnectedNode::MKLDNNFullyConnectedNode(const std::shared_ptr MKLDNNFullyConnectedNode::getAvailableFormatsForDims(const Shape &dims) const { +std::vector FullyConnected::getAvailableFormatsForDims(const Shape &dims) const { if (dims.getRank() == 0) return {memory::format_tag::x}; else if (dims.getRank() == 1) @@ -129,7 +131,7 @@ std::vector MKLDNNFullyConnectedNode::getAvailableFormatsFor return {memory::format_tag::any}; } -VectorDims MKLDNNFullyConnectedNode::makeDummyInputDims() const { +VectorDims FullyConnected::makeDummyInputDims() const { const auto& inShape = getInputShapeAtPort(DATA_ID); const auto& weightDims = getInputShapeAtPort(WEIGHTS_ID).getStaticDims(); @@ -148,7 +150,7 @@ VectorDims MKLDNNFullyConnectedNode::makeDummyInputDims() const { return MemoryDescUtils::makeDummyShape(Shape(inMinDims, inMaxDims)).getStaticDims(); } -VectorDims MKLDNNFullyConnectedNode::makeDummyOutputDims(const VectorDims& inDims) const { +VectorDims FullyConnected::makeDummyOutputDims(const VectorDims& inDims) const { std::vector inShapes = {Shape(inDims), getInputShapeAtPort(WEIGHTS_ID)}; if (inputShapes.size() > 2) { inShapes.emplace_back(getInputShapeAtPort(BIAS_ID)); @@ -156,23 +158,23 @@ VectorDims MKLDNNFullyConnectedNode::makeDummyOutputDims(const VectorDims& inDim return shapeInferGeneric(inShapes).front(); } -void MKLDNNFullyConnectedNode::getSupportedDescriptors() { +void FullyConnected::getSupportedDescriptors() { if (getParentEdges().size() != 2 && getParentEdges().size() != 3) IE_THROW() << errorPrefix << " has incorrect number of input edges"; if (getChildEdges().empty()) IE_THROW()<< errorPrefix << " has incorrect number of output edges"; - auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(DATA_ID)); - auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalOutputPrecisionAtPort(DATA_ID)); + auto inputDataType = DnnlExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(DATA_ID)); + auto outputDataType = DnnlExtensionUtils::IEPrecisionToDataType(getOriginalOutputPrecisionAtPort(DATA_ID)); if (inputDataType == memory::data_type::f32) { outputDataType = memory::data_type::f32; } if (!fusedWith.empty()) { - outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0)); + outputDataType = DnnlExtensionUtils::IEPrecisionToDataType(fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0)); } - auto weightsDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(WEIGHTS_ID)); + auto weightsDataType = DnnlExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(WEIGHTS_ID)); // We have to extend gemm_x8s8s32x_inner_product_fwd_t from oneDNN to support BF16 output data type if ((!one_of(inputDataType , memory::data_type::u8, memory::data_type::s8) || weightsDataType != memory::data_type::s8) @@ -194,14 +196,14 @@ void MKLDNNFullyConnectedNode::getSupportedDescriptors() { outDims = isDynamicNode() ? makeDummyOutputDims(inDims) : getOutputShapeAtPort(0).getStaticDims(); for (auto format : getAvailableFormatsForDims(getInputShapeAtPort(0))) { - auto in_candidate = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(inDims), inputDataType, format); - auto out_candidate = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(outDims), outputDataType, mkldnn::memory::format_tag::any); + auto in_candidate = mkldnn::memory::desc(DnnlExtensionUtils::convertToDnnlDims(inDims), inputDataType, format); + auto out_candidate = mkldnn::memory::desc(DnnlExtensionUtils::convertToDnnlDims(outDims), outputDataType, mkldnn::memory::format_tag::any); createDescriptorInternal(in_candidate, out_candidate); } } -void MKLDNNFullyConnectedNode::prepareParams() { +void FullyConnected::prepareParams() { auto srcMemPtr = getParentEdgesAtPort(0)[0]->getMemoryPtr(); auto wghMemPtr = getParentEdgesAtPort(1)[0]->getMemoryPtr(); auto dstMemPtr = getChildEdgesAtPort(0)[0]->getMemoryPtr(); @@ -211,7 +213,7 @@ void MKLDNNFullyConnectedNode::prepareParams() { IE_THROW() << "Input memory hasn't been allocated."; if (!wghMemPtr || !wghMemPtr->isAllocated()) IE_THROW() << "Weight memory hasn't been allocated."; - MKLDNNMemoryPtr biasMemPtr = nullptr; + MemoryPtr biasMemPtr = nullptr; if (withBiases) { biasMemPtr = getParentEdgesAtPort(2)[0]->getMemoryPtr(); if (!biasMemPtr || !biasMemPtr->isAllocated()) @@ -271,7 +273,7 @@ void MKLDNNFullyConnectedNode::prepareParams() { key.inp1->getDnnlDesc(), outDesc); } - MKLDNNDescriptor desc(fcDsc); + DnnlDesriptor desc(fcDsc); primitive_desc_iterator itpd = desc.createPrimitiveDescriptorIterator(engine, key.attr); inner_product_forward::primitive_desc prim_desc; @@ -326,7 +328,7 @@ void MKLDNNFullyConnectedNode::prepareParams() { reshapeMemory(DNNL_ARG_DST); } -void MKLDNNFullyConnectedNode::setDynamicBatchLim(int lim) { +void FullyConnected::setDynamicBatchLim(int lim) { dynBatchLim = lim; auto setBatchPrimArgs = [this](int argType, const mkldnn::memory& oldMem) { @@ -347,7 +349,7 @@ void MKLDNNFullyConnectedNode::setDynamicBatchLim(int lim) { setBatchPrimArgs(DNNL_ARG_DST, getChildEdgesAtPort(0)[0]->getMemory().GetPrimitive()); } -void MKLDNNFullyConnectedNode::execute(mkldnn::stream strm) { +void FullyConnected::execute(mkldnn::stream strm) { if (prim) { // in cases parameter -> FullyConnected or dynamic shapes // we keep old pointer to data in primArgs on second iteration with same input shapes @@ -370,15 +372,15 @@ void MKLDNNFullyConnectedNode::execute(mkldnn::stream strm) { } } -void MKLDNNFullyConnectedNode::executeDynamicImpl(mkldnn::stream strm) { +void FullyConnected::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -bool MKLDNNFullyConnectedNode::canFuse(const MKLDNNNodePtr& node) const { +bool FullyConnected::canFuse(const NodePtr& node) const { return canFuseSimpleOperation(node); } -void MKLDNNFullyConnectedNode::setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims, bool initWeights) { +void FullyConnected::setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims, bool initWeights) { mkldnn::post_ops ops; auto getBinPostOpShape = [&](){ @@ -392,13 +394,13 @@ void MKLDNNFullyConnectedNode::setPostOps(mkldnn::primitive_attr &attr, const Ve }; for (auto &node : fusedWith) { - if (auto* fakeQuantizeNode = dynamic_cast(node.get())) { + if (auto* fakeQuantizeNode = dynamic_cast(node.get())) { fakeQuantizeNode->appendBinPostOps(ops, getBinPostOpShape(), postOpsArgs); continue; } - if (auto* eltwiseNode = dynamic_cast(node.get())) { - if (eltwiseNode->getMKLDNNAlgorithm() != mkldnn::algorithm::undef) { + if (auto* eltwiseNode = dynamic_cast(node.get())) { + if (eltwiseNode->getOneDnnAlgorithm() != mkldnn::algorithm::undef) { eltwiseNode->appendPostOps(ops, dims, postOpsArgs); } else { eltwiseNode->appendBinPostOps(ops, getBinPostOpShape(), postOpsArgs); @@ -412,11 +414,11 @@ void MKLDNNFullyConnectedNode::setPostOps(mkldnn::primitive_attr &attr, const Ve attr.set_post_ops(ops); } -bool MKLDNNFullyConnectedNode::created() const { - return getType() == FullyConnected; +bool FullyConnected::created() const { + return getType() == Type::FullyConnected; } -const std::vector& MKLDNNFullyConnectedNode::getPrimitivesPriority() { +const std::vector& FullyConnected::getPrimitivesPriority() { std::vector priorities = { impl_desc_type::unknown, impl_desc_type::gemm_blas, @@ -460,7 +462,7 @@ const std::vector& MKLDNNFullyConnectedNode::getPrimitivesPriori return implPriorities; } -MKLDNNNode::AttrPtr MKLDNNFullyConnectedNode::initPrimitiveAttr() { +Node::AttrPtr FullyConnected::initPrimitiveAttr() { auto attr = std::make_shared(mkldnn::primitive_attr()); setPostOps(*attr, outDims); @@ -471,7 +473,7 @@ MKLDNNNode::AttrPtr MKLDNNFullyConnectedNode::initPrimitiveAttr() { // WA: creation DnnlMemoryDesc with format == any is prohibited // so we create mkldnn::memory::desc directly // we need specific method and can't remove createDescriptor from base class because its used into initDescriptor -void MKLDNNFullyConnectedNode::createDescriptorInternal(const mkldnn::memory::desc &inputDesc, +void FullyConnected::createDescriptorInternal(const mkldnn::memory::desc &inputDesc, const mkldnn::memory::desc &outputDesc) { auto in_candidate = inputDesc; auto out_candidate = outputDesc; @@ -483,42 +485,42 @@ void MKLDNNFullyConnectedNode::createDescriptorInternal(const mkldnn::memory::de } else if (in_candidate.data_type() == mkldnn::memory::data_type::u8 || in_candidate.data_type() == mkldnn::memory::data_type::s8) { wdt = memory::data_type::s8; if (withBiases) - bdt = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(BIAS_ID)); + bdt = DnnlExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(BIAS_ID)); } if (in_candidate.dims().size() == 3) { auto inDims = in_candidate.dims(); auto normalizedInDims = {inDims[0] * inDims[1], inDims[2]}; in_candidate = mkldnn::memory::desc(normalizedInDims, in_candidate.data_type(), - MKLDNNExtensionUtils::GetPlainFormatByRank(normalizedInDims.size())); + DnnlExtensionUtils::GetPlainFormatByRank(normalizedInDims.size())); } if (out_candidate.dims().size() == 3) { auto outDims = out_candidate.dims(); auto normalizedOutDims = { outDims[0] * outDims[1], outDims[2] }; out_candidate = mkldnn::memory::desc(normalizedOutDims, out_candidate.data_type(), - MKLDNNExtensionUtils::GetPlainFormatByRank(normalizedOutDims.size())); + DnnlExtensionUtils::GetPlainFormatByRank(normalizedOutDims.size())); } - mkldnn::memory::desc wgh_candidate(MKLDNNExtensionUtils::convertToDnnlDims(getInputShapeAtPort(WEIGHTS_ID).getStaticDims()), + mkldnn::memory::desc wgh_candidate(DnnlExtensionUtils::convertToDnnlDims(getInputShapeAtPort(WEIGHTS_ID).getStaticDims()), wdt, mkldnn::memory::format_tag::any); if (withBiases) { - mkldnn::memory::desc bias_candidate(MKLDNNExtensionUtils::convertToDnnlDims(getInputShapeAtPort(BIAS_ID).getStaticDims()), bdt, + mkldnn::memory::desc bias_candidate(DnnlExtensionUtils::convertToDnnlDims(getInputShapeAtPort(BIAS_ID).getStaticDims()), bdt, mkldnn::memory::format_tag::any); - MKLDNNDescriptor desc(std::shared_ptr( + DnnlDesriptor desc(std::shared_ptr( new inner_product_forward::desc(prop_kind::forward_scoring, in_candidate, wgh_candidate, bias_candidate, out_candidate))); descs.push_back(desc); } else { - MKLDNNDescriptor desc(std::shared_ptr( + DnnlDesriptor desc(std::shared_ptr( new inner_product_forward::desc(prop_kind::forward_scoring, in_candidate, wgh_candidate, out_candidate))); descs.push_back(desc); } } -void MKLDNNFullyConnectedNode::createDescriptor(const std::vector &inputDesc, +void FullyConnected::createDescriptor(const std::vector &inputDesc, const std::vector &outputDesc) { MemoryDescPtr inpDesc; if (inputDesc[0]->isDefined()) { @@ -537,7 +539,7 @@ void MKLDNNFullyConnectedNode::createDescriptor(const std::vector MemoryDescUtils::convertToDnnlMemoryDesc(outDesc)->getDnnlDesc()); } -void MKLDNNFullyConnectedNode::initSupportedPrimitiveDescriptors() { +void FullyConnected::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -586,48 +588,50 @@ void MKLDNNFullyConnectedNode::initSupportedPrimitiveDescriptors() { } } -std::shared_ptr MKLDNNFullyConnectedNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { +std::shared_ptr FullyConnected::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { auto desc = idx > 0 ? primitive_desc_it.weights_desc(idx - 1) : primitive_desc_it.src_desc(idx); if (getInputShapeAtPort(idx).getRank() == 3) { - return std::make_shared(MKLDNNExtensionUtils::DataTypeToIEPrecision( + return std::make_shared(DnnlExtensionUtils::DataTypeToIEPrecision( static_cast(desc.data.data_type)), getInputShapeAtPort(idx)); } if (getInputShapeAtPort(idx).isDynamic()) { - return MKLDNNExtensionUtils::makeUndefinedDesc(desc, getInputShapeAtPort(idx)); + return DnnlExtensionUtils::makeUndefinedDesc(desc, getInputShapeAtPort(idx)); } - return MKLDNNExtensionUtils::makeDescriptor(desc); + return DnnlExtensionUtils::makeDescriptor(desc); } -std::shared_ptr MKLDNNFullyConnectedNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { +std::shared_ptr FullyConnected::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { auto desc = primitive_desc_it.dst_desc(idx); if (getOutputShapeAtPort(idx).getRank() == 3) { - return std::make_shared(MKLDNNExtensionUtils::DataTypeToIEPrecision( + return std::make_shared(DnnlExtensionUtils::DataTypeToIEPrecision( static_cast(desc.data.data_type)), getOutputShapeAtPort(idx)); } if (getOutputShapeAtPort(idx).isDynamic()) { - return MKLDNNExtensionUtils::makeUndefinedDesc(desc, getOutputShapeAtPort(idx)); + return DnnlExtensionUtils::makeUndefinedDesc(desc, getOutputShapeAtPort(idx)); } - return MKLDNNExtensionUtils::makeDescriptor(desc); + return DnnlExtensionUtils::makeDescriptor(desc); } -InferenceEngine::Precision MKLDNNFullyConnectedNode::getRuntimePrecision() const { +InferenceEngine::Precision FullyConnected::getRuntimePrecision() const { std::vector inputPrecisions; // Don't take bias precision into account size_t inputsNumLimit = 2; for (size_t i = 0; i < std::min(getParentEdges().size(), inputsNumLimit); i++) { auto parentEdge = getParentEdgeAt(i); - if (parentEdge && parentEdge->getStatus() == MKLDNNEdge::Status::Validated) { - inputPrecisions.emplace_back(MKLDNNExtensionUtils::DataTypeToIEPrecision((parentEdge->getMemoryPtr()->GetDataType()))); + if (parentEdge && parentEdge->getStatus() == Edge::Status::Validated) { + inputPrecisions.emplace_back(DnnlExtensionUtils::DataTypeToIEPrecision((parentEdge->getMemoryPtr()->GetDataType()))); } } return getMaxPrecision(inputPrecisions); } -REG_MKLDNN_PRIM_FOR(MKLDNNFullyConnectedNode, FullyConnected); +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.h b/src/plugins/intel_cpu/src/nodes/fullyconnected.h index dd08139b201..48dc06f89b9 100644 --- a/src/plugins/intel_cpu/src/nodes/fullyconnected.h +++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.h @@ -12,10 +12,11 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNFullyConnectedNode : public MKLDNNNode { +class FullyConnected : public Node { public: - MKLDNNFullyConnectedNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + FullyConnected(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); std::vector getAvailableFormatsForDims(const Shape &dims) const override; void getSupportedDescriptors() override; @@ -34,7 +35,7 @@ public: void createDescriptor(const std::vector& inputDesc, const std::vector& outputDesc) override; - size_t descInputNumbers(MKLDNNDescriptor desc) override { + size_t descInputNumbers(DnnlDesriptor desc) override { return static_cast(getOriginalInputsNumber()); } @@ -44,7 +45,7 @@ public: InferenceEngine::Precision getRuntimePrecision() const override; - bool canFuse(const MKLDNNNodePtr& node) const override; + bool canFuse(const NodePtr& node) const override; static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; @@ -75,5 +76,6 @@ private: static const size_t BIAS_ID = 2; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/gather.cpp b/src/plugins/intel_cpu/src/nodes/gather.cpp index da47211a526..8d179e47f66 100644 --- a/src/plugins/intel_cpu/src/nodes/gather.cpp +++ b/src/plugins/intel_cpu/src/nodes/gather.cpp @@ -12,13 +12,16 @@ #include #include "kernels/gather_uni_kernel.hpp" -using namespace ov::intel_cpu; using namespace InferenceEngine; using namespace mkldnn::impl::cpu; #define THROW_ERROR IE_THROW() << getTypeStr() << " node with name '" << getName() << "' " -bool MKLDNNGatherNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +namespace ov { +namespace intel_cpu { +namespace node { + +bool Gather::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { if (!one_of(op->get_type_info(), ov::op::v7::Gather::get_type_info_static(), @@ -38,8 +41,8 @@ bool MKLDNNGatherNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache), batchDims(0) { +Gather::Gather(const std::shared_ptr& op, const mkldnn::engine& eng, + WeightsSharing::Ptr &cache) : Node(op, eng, cache), batchDims(0) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -89,7 +92,7 @@ MKLDNNGatherNode::MKLDNNGatherNode(const std::shared_ptr& op, const mk } } -void MKLDNNGatherNode::initSupportedPrimitiveDescriptors() { +void Gather::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -129,7 +132,7 @@ void MKLDNNGatherNode::initSupportedPrimitiveDescriptors() { isDynamicNode()); } -void MKLDNNGatherNode::createPrimitive() { +void Gather::createPrimitive() { uint64_t idxElPerVec = 1; if (!isDynamicNode()) { idxElPerVec = x64::mayiuse(x64::avx512_common) ? x64::cpu_isa_traits::vlen / idxTypeSize : @@ -195,17 +198,17 @@ void MKLDNNGatherNode::createPrimitive() { } } - MKLDNNNode::createPrimitive(); + Node::createPrimitive(); } -bool MKLDNNGatherNode::needPrepareParams() const { +bool Gather::needPrepareParams() const { bool result = inputShapesModified(); if (!isAxisInputConst) result = result || axis != (reinterpret_cast(getParentEdgeAt(GATHER_AXIS)->getMemoryPtr()->GetPtr()))[0]; return result; } -void MKLDNNGatherNode::prepareParams() { +void Gather::prepareParams() { auto& dataMemPtr = getParentEdgeAt(GATHER_DATA)->getMemoryPtr(); if (!dataMemPtr || !dataMemPtr->isAllocated()) THROW_ERROR << " has not allocated input data memory."; @@ -260,7 +263,7 @@ void MKLDNNGatherNode::prepareParams() { } } -void MKLDNNGatherNode::execute(mkldnn::stream strm) { +void Gather::execute(mkldnn::stream strm) { if (jitKernel && jitKernel->isSupportedConfiguration(afterAxisSize)) { const void* srcIndices = getParentEdgeAt(GATHER_INDICES)->getMemoryPtr()->GetPtr(); const void* srcData = getParentEdgeAt(GATHER_DATA)->getMemoryPtr()->GetPtr(); @@ -313,7 +316,7 @@ void MKLDNNGatherNode::execute(mkldnn::stream strm) { } } -void MKLDNNGatherNode::executeDynamicImpl(mkldnn::stream strm) { +void Gather::executeDynamicImpl(mkldnn::stream strm) { if (jitKernel && jitKernel->isSupportedConfiguration(afterAxisSize)) { const void* srcIndices = getParentEdgeAt(GATHER_INDICES)->getMemoryPtr()->GetPtr(); const void* srcData = getParentEdgeAt(GATHER_DATA)->getMemoryPtr()->GetPtr(); @@ -372,7 +375,7 @@ void MKLDNNGatherNode::executeDynamicImpl(mkldnn::stream strm) { } } -void MKLDNNGatherNode::initShortParams(threadExecParams& p, const uint64_t start) { +void Gather::initShortParams(threadExecParams& p, const uint64_t start) { if (!jitKernel) THROW_ERROR << "has uninitialized kernel in function initShortParams."; const uint64_t idxElPerVec = jitKernel->getIdxElPerVec(); @@ -439,7 +442,7 @@ void MKLDNNGatherNode::initShortParams(threadExecParams& p, const uint64_t start } } -void MKLDNNGatherNode::execReference() { +void Gather::execReference() { const int32_t* srcIndices = reinterpret_cast(getParentEdgeAt(GATHER_INDICES)->getMemoryPtr()->GetPtr()); const uint8_t* srcData = reinterpret_cast(getParentEdgeAt(GATHER_DATA)->getMemoryPtr()->GetPtr()); uint8_t* dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); @@ -471,12 +474,14 @@ void MKLDNNGatherNode::execReference() { }); } -std::vector MKLDNNGatherNode::shapeInfer() const { - return MKLDNNNode::shapeInferGeneric(PortMask(1, 2, 3)); +std::vector Gather::shapeInfer() const { + return Node::shapeInferGeneric(PortMask(1, 2, 3)); } -bool MKLDNNGatherNode::created() const { - return getType() == Gather; +bool Gather::created() const { + return getType() == Type::Gather; } -REG_MKLDNN_PRIM_FOR(MKLDNNGatherNode, Gather) +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/gather.h b/src/plugins/intel_cpu/src/nodes/gather.h index 90460c2978b..a34c443494b 100644 --- a/src/plugins/intel_cpu/src/nodes/gather.h +++ b/src/plugins/intel_cpu/src/nodes/gather.h @@ -13,10 +13,11 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNGatherNode : public MKLDNNNode { +class Gather : public Node { public: - MKLDNNGatherNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + Gather(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; @@ -87,5 +88,6 @@ private: std::shared_ptr jitKernel; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/gather_elements.cpp b/src/plugins/intel_cpu/src/nodes/gather_elements.cpp index 760827b0661..f9050f87fb3 100644 --- a/src/plugins/intel_cpu/src/nodes/gather_elements.cpp +++ b/src/plugins/intel_cpu/src/nodes/gather_elements.cpp @@ -12,10 +12,13 @@ #include #include "common/cpu_memcpy.h" -using namespace ov::intel_cpu; using namespace InferenceEngine; -bool MKLDNNGatherElementsNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +namespace ov { +namespace intel_cpu { +namespace node { + +bool GatherElements::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { if (!one_of(op->get_type_info(), ov::op::v6::GatherElements::get_type_info_static())) { @@ -29,8 +32,8 @@ bool MKLDNNGatherElementsNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { +GatherElements::GatherElements(const std::shared_ptr& op, const mkldnn::engine& eng, + WeightsSharing::Ptr &cache) : Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -54,7 +57,7 @@ MKLDNNGatherElementsNode::MKLDNNGatherElementsNode(const std::shared_ptrgetMemory().getStaticDims(); const auto& dstDims = getChildEdgesAtPort(0)[0]->getMemory().getStaticDims(); strideAxDst_ = 1; @@ -69,20 +72,20 @@ void MKLDNNGatherElementsNode::prepareParams() { } } -void MKLDNNGatherElementsNode::initSupportedPrimitiveDescriptors() { +void GatherElements::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; Precision inDataPrecision = getOriginalInputPrecisionAtPort(dataIndex_); - if (!ov::intel_cpu::one_of(inDataPrecision.size(), - sizeof(PrecisionTrait::value_type), - sizeof(PrecisionTrait::value_type), - sizeof(PrecisionTrait::value_type))) { + if (!one_of(inDataPrecision.size(), + sizeof(PrecisionTrait::value_type), + sizeof(PrecisionTrait::value_type), + sizeof(PrecisionTrait::value_type))) { IE_THROW() << errorPrefix_ << " has unsupported 'inputData' input precision: " << inDataPrecision; } Precision indicesPrecision = getOriginalInputPrecisionAtPort(indicesIndex_); - if (!ov::intel_cpu::one_of(indicesPrecision, Precision::I32, Precision::I64)) { + if (!one_of(indicesPrecision, Precision::I32, Precision::I64)) { IE_THROW() << errorPrefix_ << " has unsupported 'indices' input precision: " << indicesPrecision; } @@ -94,12 +97,12 @@ void MKLDNNGatherElementsNode::initSupportedPrimitiveDescriptors() { impl_desc_type::ref_any); } -void MKLDNNGatherElementsNode::executeDynamicImpl(mkldnn::stream strm) { +void GatherElements::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } template -void MKLDNNGatherElementsNode::directExecution() { +void GatherElements::directExecution() { const auto *srcData = reinterpret_cast(getParentEdgeAt(dataIndex_)->getMemoryPtr()->GetPtr()); const auto *indices = reinterpret_cast(getParentEdgeAt(indicesIndex_)->getMemoryPtr()->GetPtr()); auto *dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); @@ -131,7 +134,7 @@ void MKLDNNGatherElementsNode::directExecution() { parallel_nt(0, threadBody); } -void MKLDNNGatherElementsNode::execute(mkldnn::stream strm) { +void GatherElements::execute(mkldnn::stream strm) { switch (dataTypeSize_) { case sizeof(PrecisionTrait::value_type): return directExecution::value_type>(); @@ -144,8 +147,10 @@ void MKLDNNGatherElementsNode::execute(mkldnn::stream strm) { } } -bool MKLDNNGatherElementsNode::created() const { - return getType() == GatherElements; +bool GatherElements::created() const { + return getType() == Type::GatherElements; } -REG_MKLDNN_PRIM_FOR(MKLDNNGatherElementsNode, GatherElements) +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/gather_elements.h b/src/plugins/intel_cpu/src/nodes/gather_elements.h index 5c586ec6b8d..d6dfc54f16c 100644 --- a/src/plugins/intel_cpu/src/nodes/gather_elements.h +++ b/src/plugins/intel_cpu/src/nodes/gather_elements.h @@ -12,10 +12,11 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNGatherElementsNode : public MKLDNNNode { +class GatherElements : public Node { public: - MKLDNNGatherElementsNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + GatherElements(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; @@ -43,5 +44,6 @@ private: void directExecution(); }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/gather_nd.cpp b/src/plugins/intel_cpu/src/nodes/gather_nd.cpp index d0baee74151..5a074562b66 100644 --- a/src/plugins/intel_cpu/src/nodes/gather_nd.cpp +++ b/src/plugins/intel_cpu/src/nodes/gather_nd.cpp @@ -13,14 +13,17 @@ #include #include "common/cpu_memcpy.h" -using namespace ov::intel_cpu; using namespace InferenceEngine; #define THROW_ERROR IE_THROW() << "GatherND layer with name '" << getName() << "' " -bool MKLDNNGatherNDNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +namespace ov { +namespace intel_cpu { +namespace node { + +bool GatherND::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { - if (!ov::intel_cpu::one_of(op->get_type_info(), ngraph::op::v5::GatherND::get_type_info_static(), ngraph::op::v8::GatherND::get_type_info_static())) { + if (!one_of(op->get_type_info(), ngraph::op::v5::GatherND::get_type_info_static(), ngraph::op::v8::GatherND::get_type_info_static())) { errorMessage = "Node is not an instance of the GatherND operation from operation set v5 and v8."; return false; } @@ -31,8 +34,8 @@ bool MKLDNNGatherNDNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { +GatherND::GatherND(const std::shared_ptr& op, const mkldnn::engine& eng, + WeightsSharing::Ptr &cache) : Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -55,22 +58,22 @@ MKLDNNGatherNDNode::MKLDNNGatherNDNode(const std::shared_ptr& op, THROW_ERROR << "has invalid batch_dims attribute: " << attrs.batchDims; } -void MKLDNNGatherNDNode::initSupportedPrimitiveDescriptors() { +void GatherND::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; Precision inDataPrecision = getOriginalInputPrecisionAtPort(GATHERND_DATA); - if (!ov::intel_cpu::one_of(inDataPrecision.size(), - sizeof(PrecisionTrait::value_type), - sizeof(PrecisionTrait::value_type), - sizeof(PrecisionTrait::value_type))) { + if (!one_of(inDataPrecision.size(), + sizeof(PrecisionTrait::value_type), + sizeof(PrecisionTrait::value_type), + sizeof(PrecisionTrait::value_type))) { THROW_ERROR << "has unsupported 'data' input precision: " << inDataPrecision; } attrs.dataSize = inDataPrecision.size(); Precision indicesPrecision = getOriginalInputPrecisionAtPort(GATHERND_INDEXES); - if (!ov::intel_cpu::one_of(indicesPrecision, - Precision::I32, Precision::I64, Precision::I16, Precision::U16, Precision::I8, Precision::U8)) { + if (!one_of(indicesPrecision, + Precision::I32, Precision::I64, Precision::I16, Precision::U16, Precision::I8, Precision::U8)) { THROW_ERROR << "has unsupported 'indices' input precision: " << indicesPrecision; } @@ -80,7 +83,7 @@ void MKLDNNGatherNDNode::initSupportedPrimitiveDescriptors() { impl_desc_type::ref_any); } -void MKLDNNGatherNDNode::prepareParams() { +void GatherND::prepareParams() { auto& srcMemPtr = getParentEdgeAt(GATHERND_DATA)->getMemoryPtr(); auto& idxMemPtr = getParentEdgeAt(GATHERND_INDEXES)->getMemoryPtr(); auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); @@ -100,7 +103,7 @@ void MKLDNNGatherNDNode::prepareParams() { execPtr = std::make_shared(attrs); } -MKLDNNGatherNDNode::GatherNDExecutor::GatherNDExecutor(const GatherNDAttributes& attrs) : dataSize(attrs.dataSize), sliceRank(attrs.sliceRank) { +GatherND::GatherNDExecutor::GatherNDExecutor(const GatherNDAttributes& attrs) : dataSize(attrs.dataSize), sliceRank(attrs.sliceRank) { batchSize = std::accumulate(attrs.srcDims.begin(), attrs.srcDims.begin() + attrs.batchDims, 1lu, std::multiplies()); dataLength = std::accumulate(attrs.srcDims.begin() + sliceRank + attrs.batchDims, attrs.srcDims.end(), 1lu, std::multiplies()); @@ -124,7 +127,7 @@ MKLDNNGatherNDNode::GatherNDExecutor::GatherNDExecutor(const GatherNDAttributes& } } -void MKLDNNGatherNDNode::execute(mkldnn::stream strm) { +void GatherND::execute(mkldnn::stream strm) { if (!execPtr) THROW_ERROR << "has not compiled executor."; @@ -133,7 +136,7 @@ void MKLDNNGatherNDNode::execute(mkldnn::stream strm) { getChildEdgeAt(0)->getMemoryPtr()); } -void MKLDNNGatherNDNode::GatherNDExecutor::exec(const MKLDNNMemoryPtr& srcMemPtr, const MKLDNNMemoryPtr& idxMemPtr, MKLDNNMemoryPtr& dstMemPtr) { +void GatherND::GatherNDExecutor::exec(const MemoryPtr& srcMemPtr, const MemoryPtr& idxMemPtr, MemoryPtr& dstMemPtr) { if (dataLength > 1) { gatherBlocks(srcMemPtr, idxMemPtr, dstMemPtr); return; @@ -146,7 +149,7 @@ void MKLDNNGatherNDNode::GatherNDExecutor::exec(const MKLDNNMemoryPtr& srcMemPtr OV_CASE(sizeof(PrecisionTrait::value_type), PrecisionTrait::value_type)); } -void MKLDNNGatherNDNode::GatherNDExecutor::gatherBlocks(const MKLDNNMemoryPtr& srcMemPtr, const MKLDNNMemoryPtr& idxMemPtr, MKLDNNMemoryPtr& dstMemPtr) { +void GatherND::GatherNDExecutor::gatherBlocks(const MemoryPtr& srcMemPtr, const MemoryPtr& idxMemPtr, MemoryPtr& dstMemPtr) { const uint8_t* srcData = reinterpret_cast(srcMemPtr->GetPtr()); const int32_t* indices = reinterpret_cast(idxMemPtr->GetPtr()); uint8_t* dstData = reinterpret_cast(dstMemPtr->GetPtr()); @@ -183,7 +186,7 @@ void MKLDNNGatherNDNode::GatherNDExecutor::gatherBlocks(const MKLDNNMemoryPtr& s } template -void MKLDNNGatherNDNode::GatherNDExecutor::gatherElementwise(const MKLDNNMemoryPtr& srcMemPtr, const MKLDNNMemoryPtr& idxMemPtr, MKLDNNMemoryPtr& dstMemPtr) { +void GatherND::GatherNDExecutor::gatherElementwise(const MemoryPtr& srcMemPtr, const MemoryPtr& idxMemPtr, MemoryPtr& dstMemPtr) { const dataType* srcData = reinterpret_cast(srcMemPtr->GetPtr()); const int32_t* indices = reinterpret_cast(idxMemPtr->GetPtr()); dataType* dstData = reinterpret_cast(dstMemPtr->GetPtr()); @@ -219,12 +222,14 @@ void MKLDNNGatherNDNode::GatherNDExecutor::gatherElementwise(const MKLDNNMemoryP }); } -void MKLDNNGatherNDNode::executeDynamicImpl(mkldnn::stream strm) { +void GatherND::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -bool MKLDNNGatherNDNode::created() const { - return getType() == GatherND; +bool GatherND::created() const { + return getType() == Type::GatherND; } -REG_MKLDNN_PRIM_FOR(MKLDNNGatherNDNode, GatherND) +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/gather_nd.h b/src/plugins/intel_cpu/src/nodes/gather_nd.h index d5cf3639f3e..413821e3f9e 100644 --- a/src/plugins/intel_cpu/src/nodes/gather_nd.h +++ b/src/plugins/intel_cpu/src/nodes/gather_nd.h @@ -12,10 +12,11 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNGatherNDNode : public MKLDNNNode { +class GatherND : public Node { public: - MKLDNNGatherNDNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + GatherND(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; @@ -42,12 +43,12 @@ private: struct GatherNDExecutor { GatherNDExecutor(const GatherNDAttributes& attrs); ~GatherNDExecutor() = default; - void exec(const MKLDNNMemoryPtr& srcMemPtr, const MKLDNNMemoryPtr& idxMemPtr, MKLDNNMemoryPtr& dstMemPtr); + void exec(const MemoryPtr& srcMemPtr, const MemoryPtr& idxMemPtr, MemoryPtr& dstMemPtr); private: template - void gatherElementwise(const MKLDNNMemoryPtr& srcMemPtr, const MKLDNNMemoryPtr& idxMemPtr, MKLDNNMemoryPtr& dstMemPtr); - void gatherBlocks(const MKLDNNMemoryPtr& srcMemPtr, const MKLDNNMemoryPtr& idxMemPtr, MKLDNNMemoryPtr& dstMemPtr); + void gatherElementwise(const MemoryPtr& srcMemPtr, const MemoryPtr& idxMemPtr, MemoryPtr& dstMemPtr); + void gatherBlocks(const MemoryPtr& srcMemPtr, const MemoryPtr& idxMemPtr, MemoryPtr& dstMemPtr); size_t batchSize = 1lu; size_t cycles = 1lu; @@ -63,9 +64,9 @@ private: struct GatherNDContext { GatherNDExecutor* executor; - const MKLDNNMemoryPtr srcMemPtr; - const MKLDNNMemoryPtr idxMemPtr; - MKLDNNMemoryPtr dstMemPtr; + const MemoryPtr srcMemPtr; + const MemoryPtr idxMemPtr; + MemoryPtr dstMemPtr; }; template @@ -83,5 +84,6 @@ private: executorPtr execPtr = nullptr; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/gather_tree.cpp b/src/plugins/intel_cpu/src/nodes/gather_tree.cpp index d1c9c465933..ed14077c2dc 100644 --- a/src/plugins/intel_cpu/src/nodes/gather_tree.cpp +++ b/src/plugins/intel_cpu/src/nodes/gather_tree.cpp @@ -11,10 +11,13 @@ #include "gather_tree.h" #include -using namespace ov::intel_cpu; using namespace InferenceEngine; -bool MKLDNNGatherTreeNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +namespace ov { +namespace intel_cpu { +namespace node { + +bool GatherTree::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { if (isDynamicNgraphNode(op)) { errorMessage = "Doesn't support op with dynamic shapes"; @@ -31,8 +34,8 @@ bool MKLDNNGatherTreeNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { +GatherTree::GatherTree(const std::shared_ptr& op, const mkldnn::engine& eng, + WeightsSharing::Ptr &cache) : Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -54,12 +57,12 @@ MKLDNNGatherTreeNode::MKLDNNGatherTreeNode(const std::shared_ptr& IE_THROW() << errorPrefix << " end_token should be 1 dimension"; } -void MKLDNNGatherTreeNode::initSupportedPrimitiveDescriptors() { +void GatherTree::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; precision = getOriginalInputPrecisionAtPort(GATHER_TREE_STEP_IDX); - if (!ov::intel_cpu::one_of(precision, Precision::FP32, Precision::I32)) + if (!one_of(precision, Precision::FP32, Precision::I32)) precision = Precision::FP32; if (getOriginalInputPrecisionAtPort(GATHER_TREE_PARENT_IDX) != precision || @@ -77,7 +80,7 @@ void MKLDNNGatherTreeNode::initSupportedPrimitiveDescriptors() { impl_desc_type::ref_any); } -void MKLDNNGatherTreeNode::execute(mkldnn::stream strm) { +void GatherTree::execute(mkldnn::stream strm) { if (precision == Precision::FP32) return gatherTreeKernel(); else @@ -85,7 +88,7 @@ void MKLDNNGatherTreeNode::execute(mkldnn::stream strm) { } template -void MKLDNNGatherTreeNode::gatherTreeKernel() { +void GatherTree::gatherTreeKernel() { const auto *step_idx = reinterpret_cast(getParentEdgeAt(GATHER_TREE_STEP_IDX)->getMemoryPtr()->GetPtr()); const auto * const parent_idx = reinterpret_cast(getParentEdgeAt(GATHER_TREE_PARENT_IDX)->getMemoryPtr()->GetPtr()); const size_t parent_idx_size = getParentEdgeAt(GATHER_TREE_PARENT_IDX)->getMemory().GetShape().getElementsCount() @@ -145,8 +148,10 @@ void MKLDNNGatherTreeNode::gatherTreeKernel() { } } -bool MKLDNNGatherTreeNode::created() const { - return getType() == GatherTree; +bool GatherTree::created() const { + return getType() == Type::GatherTree; } -REG_MKLDNN_PRIM_FOR(MKLDNNGatherTreeNode, GatherTree) +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/gather_tree.h b/src/plugins/intel_cpu/src/nodes/gather_tree.h index 68848cdcf8c..c5a3485c346 100644 --- a/src/plugins/intel_cpu/src/nodes/gather_tree.h +++ b/src/plugins/intel_cpu/src/nodes/gather_tree.h @@ -9,10 +9,11 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNGatherTreeNode : public MKLDNNNode { +class GatherTree : public Node { public: - MKLDNNGatherTreeNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + GatherTree(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; @@ -36,5 +37,6 @@ private: std::string errorPrefix; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/generic.cpp b/src/plugins/intel_cpu/src/nodes/generic.cpp index 7d3c00056cb..3aa862e1586 100644 --- a/src/plugins/intel_cpu/src/nodes/generic.cpp +++ b/src/plugins/intel_cpu/src/nodes/generic.cpp @@ -3,7 +3,7 @@ // #include -#include +#include #include "generic.h" #include #include @@ -12,19 +12,22 @@ #include "memory_desc/dnnl_blocked_memory_desc.h" using namespace mkldnn; -using namespace ov::intel_cpu; -MKLDNNGenericNode::MKLDNNGenericNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : - MKLDNNNode(op, eng, cache), ngraphOp(op) { +namespace ov { +namespace intel_cpu { +namespace node { + +Generic::Generic(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache) + : Node(op, eng, cache), ngraphOp(op) { } -void MKLDNNGenericNode::getSupportedDescriptors() { - if (!extFactory && impls.empty()) { +void Generic::getSupportedDescriptors() { + if (impls.empty()) { IE_THROW() << "Cannot get generic primitive for layer: " << getName() << " with type: " << getTypeStr(); } } -NodeConfig MKLDNNGenericNode::convertLayerToNodeConfig(const InferenceEngine::LayerConfig &layerConfig) { +NodeConfig Generic::convertLayerToNodeConfig(const InferenceEngine::LayerConfig &layerConfig) { NodeConfig config; config.dynBatchSupport = layerConfig.dynBatchSupport; config.inConfs.resize(layerConfig.inConfs.size()); @@ -42,7 +45,7 @@ NodeConfig MKLDNNGenericNode::convertLayerToNodeConfig(const InferenceEngine::La return config; } -InferenceEngine::LayerConfig MKLDNNGenericNode::convertNodeToLayerConfig(const NodeConfig &nodeConfig) { +InferenceEngine::LayerConfig Generic::convertNodeToLayerConfig(const NodeConfig &nodeConfig) { InferenceEngine::LayerConfig config; config.dynBatchSupport = nodeConfig.dynBatchSupport; config.inConfs.resize(nodeConfig.inConfs.size()); @@ -60,7 +63,7 @@ InferenceEngine::LayerConfig MKLDNNGenericNode::convertNodeToLayerConfig(const N return config; } -void MKLDNNGenericNode::initSupportedPrimitiveDescriptors() { +void Generic::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -81,15 +84,10 @@ void MKLDNNGenericNode::initSupportedPrimitiveDescriptors() { } } -void MKLDNNGenericNode::createPrimitive() { - if (extFactory || !impls.empty()) { - return; - } - if (getSelectedPrimitiveDescriptor() == nullptr) - IE_THROW() << "Preferable primitive descriptor is not set."; +void Generic::createPrimitive() { } -void MKLDNNGenericNode::execute(mkldnn::stream strm) { +void Generic::execute(mkldnn::stream strm) { if (!impls.empty()) { execLayer(); } else { @@ -97,11 +95,11 @@ void MKLDNNGenericNode::execute(mkldnn::stream strm) { } } -bool MKLDNNGenericNode::created() const { - return Generic == getType(); +bool Generic::created() const { + return Type::Generic == getType(); } -bool MKLDNNGenericNode::created(const MKLDNNExtensionManager::Ptr &extMgr) { +bool Generic::created(const ExtensionManager::Ptr &extMgr) { if (ngraphOp && extMgr) { // We should save extension manager in order to avoid situation when // it will destroyed before extensibility primitives @@ -109,39 +107,19 @@ bool MKLDNNGenericNode::created(const MKLDNNExtensionManager::Ptr &extMgr) { if (auto execImpl = std::dynamic_pointer_cast(impl)) impls.emplace_back(execImpl); - if (impls.empty()) { - extFactory = extMgr->CreateExtensionFactory(ngraphOp); + if (impls.empty()) + return false; - if (!extFactory) - return false; - - std::vector impls_no_exec; - InferenceEngine::ResponseDesc resp; - InferenceEngine::StatusCode rc = extFactory->getImplementations(impls_no_exec, &resp); - if (rc == InferenceEngine::NOT_IMPLEMENTED) { - return false; - } else if (rc != InferenceEngine::OK) { - IE_THROW() << resp.msg; - } - - for (const auto& impl : impls_no_exec) { - if (auto exec_impl = std::dynamic_pointer_cast(impl)) { - impls.emplace_back(exec_impl); - } - } - } - - setType(Generic); + setType(Type::Generic); } return created(); } -void MKLDNNGenericNode::cleanup() { - MKLDNNNode::cleanup(); - extFactory.reset(); +void Generic::cleanup() { + Node::cleanup(); } -void MKLDNNGenericNode::execLayer() { +void Generic::execLayer() { bool isDynBatch = dynBatchLim > 0; std::vector inputs; std::vector constInputs; @@ -175,7 +153,7 @@ void MKLDNNGenericNode::execLayer() { } } -void MKLDNNGenericNode::initDescriptor(const NodeConfig &config) { +void Generic::initDescriptor(const NodeConfig &config) { NodeConfig rightConfig = config; InferenceEngine::StatusCode rc; InferenceEngine::ResponseDesc resp; @@ -196,7 +174,7 @@ void MKLDNNGenericNode::initDescriptor(const NodeConfig &config) { for (size_t j = 0; j < rightConfig.inConfs.size(); j++) { // TODO: we need to better recognize cases with possible inplace conficts - if (getParentEdgeAt(j)->getParent()->getType() != Split && + if (getParentEdgeAt(j)->getParent()->getType() != Type::Split && getParentEdgeAt(j)->getParent()->getChildEdges().size() > 1) { rightConfig.inConfs[j].inPlace(-1); } @@ -233,4 +211,6 @@ void MKLDNNGenericNode::initDescriptor(const NodeConfig &config) { } } -REG_MKLDNN_PRIM_FOR(MKLDNNGenericNode, Generic); +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/generic.h b/src/plugins/intel_cpu/src/nodes/generic.h index 10fb9e600d1..e8a5893a53b 100644 --- a/src/plugins/intel_cpu/src/nodes/generic.h +++ b/src/plugins/intel_cpu/src/nodes/generic.h @@ -14,18 +14,19 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNGenericNode : public MKLDNNNode { +class Generic : public Node { public: - MKLDNNGenericNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); - ~MKLDNNGenericNode() = default; + Generic(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); + ~Generic() = default; void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; void createPrimitive() override; void execute(mkldnn::stream strm) override; bool created() const override; - bool created(const MKLDNNExtensionManager::Ptr& extMgr) override; + bool created(const ExtensionManager::Ptr& extMgr) override; bool canBeInPlace() const override { return false; } @@ -39,11 +40,11 @@ protected: NodeConfig convertLayerToNodeConfig(const InferenceEngine::LayerConfig &layerConfig); InferenceEngine::LayerConfig convertNodeToLayerConfig(const NodeConfig &nodeConfig); - InferenceEngine::ILayerImplFactory::Ptr extFactory; std::vector impls; const std::shared_ptr ngraphOp; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/grn.cpp b/src/plugins/intel_cpu/src/nodes/grn.cpp index 3234b908d47..e03ece13d37 100644 --- a/src/plugins/intel_cpu/src/nodes/grn.cpp +++ b/src/plugins/intel_cpu/src/nodes/grn.cpp @@ -8,10 +8,13 @@ #include "ie_parallel.hpp" #include "grn.h" -using namespace ov::intel_cpu; using namespace InferenceEngine; -bool MKLDNNGRNNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +namespace ov { +namespace intel_cpu { +namespace node { + +bool GRN::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { if (isDynamicNgraphNode(op)) { errorMessage = "Doesn't support op with dynamic shapes"; @@ -28,8 +31,8 @@ bool MKLDNNGRNNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { +GRN::GRN(const std::shared_ptr& op, const mkldnn::engine& eng, + WeightsSharing::Ptr &cache) : Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -47,7 +50,7 @@ MKLDNNGRNNode::MKLDNNGRNNode(const std::shared_ptr& op, const mkld bias = grn->get_bias(); } -void MKLDNNGRNNode::initSupportedPrimitiveDescriptors() { +void GRN::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -56,7 +59,7 @@ void MKLDNNGRNNode::initSupportedPrimitiveDescriptors() { impl_desc_type::ref_any); } -void MKLDNNGRNNode::execute(mkldnn::stream strm) { +void GRN::execute(mkldnn::stream strm) { const float* src_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); float* dst_data = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); @@ -79,8 +82,10 @@ void MKLDNNGRNNode::execute(mkldnn::stream strm) { }); } -bool MKLDNNGRNNode::created() const { - return getType() == GRN; +bool GRN::created() const { + return getType() == Type::GRN; } -REG_MKLDNN_PRIM_FOR(MKLDNNGRNNode, GRN) +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/grn.h b/src/plugins/intel_cpu/src/nodes/grn.h index b0e7b36dc0a..751df3757ce 100644 --- a/src/plugins/intel_cpu/src/nodes/grn.h +++ b/src/plugins/intel_cpu/src/nodes/grn.h @@ -9,10 +9,11 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNGRNNode : public MKLDNNNode { +class GRN : public Node { public: - MKLDNNGRNNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + GRN(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; @@ -28,5 +29,6 @@ private: std::string errorPrefix; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/if.cpp b/src/plugins/intel_cpu/src/nodes/if.cpp index f8371e2b0ac..ea5b428032c 100644 --- a/src/plugins/intel_cpu/src/nodes/if.cpp +++ b/src/plugins/intel_cpu/src/nodes/if.cpp @@ -4,7 +4,7 @@ #include "if.h" -#include +#include #include "ie_ngraph_utils.hpp" #include "transformations/utils/utils.hpp" #include "common/cpu_memcpy.h" @@ -12,16 +12,18 @@ #include #include -using namespace ov::intel_cpu; +namespace ov { +namespace intel_cpu { +namespace node { -MKLDNNIfNode::PortMapHelper::PortMapHelper(const MKLDNNMemoryPtr &from, const std::deque& to, +If::PortMapHelper::PortMapHelper(const MemoryPtr &from, const std::deque& to, const mkldnn::engine& eng) : srcMemPtr(from), dstMemPtrs(to) { size = 0; if (srcMemPtr->getDesc().isDefined()) size = srcMemPtr->GetSize(); } -void MKLDNNIfNode::PortMapHelper::execute(mkldnn::stream& strm) { +void If::PortMapHelper::execute(mkldnn::stream& strm) { // if output shapes are changed, // after subgraph inference we should redefine out memory of 'If' redefineTo(); @@ -29,7 +31,7 @@ void MKLDNNIfNode::PortMapHelper::execute(mkldnn::stream& strm) { cpu_memcpy(dstMemPtrs.front()->GetPtr(), srcMemPtr->GetPtr(), size); } -void MKLDNNIfNode::PortMapHelper::redefineTo() { +void If::PortMapHelper::redefineTo() { const auto &currDesc = dstMemPtrs.front()->getDesc(); if (currDesc.getShape().isDynamic() || currDesc.getShape().getStaticDims() != srcMemPtr->getStaticDims()) { // TODO : check the entire dstMemPtrs usage considering the proper memory sharing @@ -42,7 +44,7 @@ void MKLDNNIfNode::PortMapHelper::redefineTo() { } } -bool MKLDNNIfNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool If::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { if (!one_of(op->get_type_info(), ov::op::v8::If::get_type_info_static())) { errorMessage = "Not supported If operation version " + std::to_string(op->get_type_info().version) + @@ -55,15 +57,15 @@ bool MKLDNNIfNode::isSupportedOperation(const std::shared_ptr& o return true; } -MKLDNNIfNode::MKLDNNIfNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : - MKLDNNNode(op, eng, cache), ovOp(op) { +If::If(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache) : + Node(op, eng, cache), ovOp(op) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; } } -void MKLDNNIfNode::getSupportedDescriptors() { +void If::getSupportedDescriptors() { auto ifOp = ov::as_type_ptr(ovOp); const std::shared_ptr& thenBody = ifOp->get_then_body(); @@ -145,7 +147,7 @@ void MKLDNNIfNode::getSupportedDescriptors() { } } -void MKLDNNIfNode::initSupportedPrimitiveDescriptors() { +void If::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -172,7 +174,7 @@ void MKLDNNIfNode::initSupportedPrimitiveDescriptors() { supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown); } -void MKLDNNIfNode::createPrimitive() { +void If::createPrimitive() { const auto& eng = getEngine(); prepareBeforeMappers(true, eng); prepareBeforeMappers(false, eng); @@ -184,7 +186,7 @@ void MKLDNNIfNode::createPrimitive() { } } -void MKLDNNIfNode::prepareBeforeMappers(const bool isThen, const dnnl::engine& eng) { +void If::prepareBeforeMappers(const bool isThen, const dnnl::engine& eng) { auto &inputPortMap = isThen ? thenInputPortMap : elseInputPortMap; auto &inputMems = isThen ? inputMemThen : inputMemElse; auto &beforeMappers = isThen ? beforeThenMappers : beforeElseMappers; @@ -196,7 +198,7 @@ void MKLDNNIfNode::prepareBeforeMappers(const bool isThen, const dnnl::engine& e } } -void MKLDNNIfNode::prepareAfterMappers(const bool isThen, const dnnl::engine& eng) { +void If::prepareAfterMappers(const bool isThen, const dnnl::engine& eng) { auto &outputPortMap = isThen ? thenOutputPortMap : elseOutputPortMap; auto &outputMems = isThen ? outputMemThen : outputMemElse; auto &afterMappers = isThen ? afterThenMappers : afterElseMappers; @@ -208,14 +210,14 @@ void MKLDNNIfNode::prepareAfterMappers(const bool isThen, const dnnl::engine& en } } -std::deque MKLDNNIfNode::getToMemories(const MKLDNNNode* node, const size_t port) const { - std::deque memories; +std::deque If::getToMemories(const Node* node, const size_t port) const { + std::deque memories; for (auto edge : node->getChildEdgesAtPort(port)) memories.push_back(edge->getMemoryPtr()); return memories; } -void MKLDNNIfNode::execute(mkldnn::stream strm) { +void If::execute(mkldnn::stream strm) { const bool condition = static_cast((reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()))[0]); auto& beforeMappers = condition ? beforeThenMappers : beforeElseMappers; @@ -230,12 +232,14 @@ void MKLDNNIfNode::execute(mkldnn::stream strm) { mapper->execute(strm); } -void MKLDNNIfNode::executeDynamicImpl(mkldnn::stream strm) { +void If::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -bool MKLDNNIfNode::created() const { - return getType() == If; +bool If::created() const { + return getType() == Type::If; } -REG_MKLDNN_PRIM_FOR(MKLDNNIfNode, If); +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/if.h b/src/plugins/intel_cpu/src/nodes/if.h index 3edc0baf8b1..0d595b231e8 100644 --- a/src/plugins/intel_cpu/src/nodes/if.h +++ b/src/plugins/intel_cpu/src/nodes/if.h @@ -13,10 +13,11 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNIfNode : public MKLDNNNode { +class If : public Node { public: - MKLDNNIfNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + If(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; void initSupportedPrimitiveDescriptors() override; @@ -26,7 +27,7 @@ public: void execute(mkldnn::stream strm) override; bool isExecutable() const override { return true; } - void inline setExtManager(const MKLDNNExtensionManager::Ptr& extMgr) { ext_mng = extMgr; } + void inline setExtManager(const ExtensionManager::Ptr& extMgr) { ext_mng = extMgr; } protected: void executeDynamicImpl(mkldnn::stream strm) override; @@ -37,7 +38,7 @@ private: void prepareBeforeMappers(const bool isThen, const dnnl::engine& eng); void prepareAfterMappers(const bool isThen, const dnnl::engine& eng); - std::deque getToMemories(const MKLDNNNode* node, const size_t port) const; + std::deque getToMemories(const Node* node, const size_t port) const; struct PortMap { int from; /**< Index of external/internal out data */ @@ -46,24 +47,24 @@ private: class PortMapHelper { public: - PortMapHelper(const MKLDNNMemoryPtr& from, const std::deque& to, const mkldnn::engine& eng); + PortMapHelper(const MemoryPtr& from, const std::deque& to, const mkldnn::engine& eng); ~PortMapHelper() = default; void execute(mkldnn::stream& strm); private: void redefineTo(); - MKLDNNMemoryPtr srcMemPtr; - std::deque dstMemPtrs; + MemoryPtr srcMemPtr; + std::deque dstMemPtrs; ptrdiff_t size; }; - MKLDNNExtensionManager::Ptr ext_mng; - MKLDNNGraph subGraphThen; - MKLDNNGraph subGraphElse; - std::vector> inputMemThen, inputMemElse; - std::deque outputMemThen, outputMemElse; + ExtensionManager::Ptr ext_mng; + Graph subGraphThen; + Graph subGraphElse; + std::vector> inputMemThen, inputMemElse; + std::deque outputMemThen, outputMemElse; std::vector> beforeThenMappers, @@ -80,5 +81,6 @@ private: const std::shared_ptr ovOp; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/input.cpp b/src/plugins/intel_cpu/src/nodes/input.cpp index 83f2d098ef2..759ad02f600 100644 --- a/src/plugins/intel_cpu/src/nodes/input.cpp +++ b/src/plugins/intel_cpu/src/nodes/input.cpp @@ -4,7 +4,7 @@ #include "input.h" #include "common/cpu_memcpy.h" -#include +#include #include #include @@ -23,13 +23,15 @@ #include "memory_desc/dnnl_blocked_memory_desc.h" using namespace mkldnn; -using namespace ov::intel_cpu; using namespace InferenceEngine; using namespace details; using namespace ngraph::op; using namespace dnnl::impl::cpu::x64; using namespace Xbyak; +namespace ov { +namespace intel_cpu { +namespace node { namespace { struct jit_has_subnormals_base : public jit_generator { @@ -227,8 +229,8 @@ jit_has_subnormals_base::fn_t jit_has_subnormals_function() { } // namespace -MKLDNNInputNode::MKLDNNInputNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) - : MKLDNNNode(op, eng, cache) { +Input::Input(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache) + : Node(op, eng, cache) { if (!one_of(op->get_type_info(), v0::Parameter::get_type_info_static(), v0::Constant::get_type_info_static(), @@ -246,17 +248,17 @@ MKLDNNInputNode::MKLDNNInputNode(const std::shared_ptr& op, const } } -void MKLDNNInputNode::cloneBlobIfRequired() { +void Input::cloneBlobIfRequired() { Shape shape(constOp->get_shape().empty() ? ngraph::Shape(1, 1) : constOp->get_shape()); const auto prec = convertPrecision(constOp->get_element_type()); const size_t size = shape.getElementsCount(); DnnlBlockedMemoryDesc memDesc(prec, shape); auto cloneBlob = [&, this] () { - MKLDNNMemory memory{ getEngine() }; + Memory memory{ getEngine() }; // CVS-74980 - // MKLDNN/oneDNN always allocate 1byte for element type with bitWidth < 8 (u4,u1...) + // oneDNN always allocate 1byte for element type with bitWidth < 8 (u4,u1...) // but ngraph Constant uses actual bitWidth for data storage allocation // in that case we make a copy to avoid overflow if (constOp->get_byte_size() >= memDesc.getCurrentMemSize()) { @@ -266,7 +268,7 @@ void MKLDNNInputNode::cloneBlobIfRequired() { memcpy(memory.GetPtr(), constOp->get_data_ptr(), constOp->get_byte_size()); } - MKLDNNMemoryPtr ptr = MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())); + MemoryPtr ptr = MemoryPtr(new Memory(getEngine())); ptr->Create(memDesc); ptr->SetData(memory); @@ -349,51 +351,51 @@ void MKLDNNInputNode::cloneBlobIfRequired() { }; if (weightCache) { - MKLDNNMemoryPtr ptr = *weightCache->findOrCreate(blobKey(), cloneBlob); - memoryPtr = std::const_pointer_cast(ptr); + MemoryPtr ptr = *weightCache->findOrCreate(blobKey(), cloneBlob); + memoryPtr = std::const_pointer_cast(ptr); } else if (isBlobAligned() && !hasSubnormals() && !isWA()) { - auto ptr = new MKLDNNMemory(getEngine()); + auto ptr = new Memory(getEngine()); ptr->Create(memDesc, constOp->get_data_ptr()); - memoryPtr = MKLDNNMemoryCPtr(ptr); + memoryPtr = MemoryCPtr(ptr); } else { - memoryPtr = std::const_pointer_cast(cloneBlob()); + memoryPtr = std::const_pointer_cast(cloneBlob()); } } -MKLDNNInputNode::MKLDNNInputNode(const Shape& shape, const InferenceEngine::Precision &prc, const std::string &name, - const std::string &type, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) - : MKLDNNNode(type, name, eng, cache) { +Input::Input(const Shape& shape, const InferenceEngine::Precision &prc, const std::string &name, + const std::string &type, const mkldnn::engine& eng, WeightsSharing::Ptr &cache) + : Node(type, name, eng, cache) { constant = ConstantType::NoConst; - if (getType() == Input) { + if (getType() == Type::Input) { outputShapes.emplace_back(shape); addOriginalOutputPrecision(prc); - } else if (getType() == Output) { + } else if (getType() == Type::Output) { inputShapes.emplace_back(shape); addOriginalInputPrecision(prc); } } -MKLDNNInputNode::MKLDNNInputNode(MemoryDescPtr memDesc, const std::string &name, const std::string &type, - const mkldnn::engine &eng, MKLDNNWeightsSharing::Ptr &cache) : - MKLDNNInputNode(memDesc->getShape(), memDesc->getPrecision(), name, type, eng, cache) { +Input::Input(MemoryDescPtr memDesc, const std::string &name, const std::string &type, + const mkldnn::engine &eng, WeightsSharing::Ptr &cache) : + Input(memDesc->getShape(), memDesc->getPrecision(), name, type, eng, cache) { extMemDesc = memDesc; } -void MKLDNNInputNode::withMeanImage() { +void Input::withMeanImage() { isMeanImage = true; } -MKLDNNMemoryCPtr MKLDNNInputNode::getMemoryPtr() const { +MemoryCPtr Input::getMemoryPtr() const { return memoryPtr; } -void MKLDNNInputNode::getSupportedDescriptors() { - if (getType() == Input) { +void Input::getSupportedDescriptors() { + if (getType() == Type::Input) { if (!getParentEdges().empty()) IE_THROW() << "Incorrect number of input edges for layer " << getName(); if (getChildEdges().empty()) IE_THROW() << "Incorrect number of output edges for layer " << getName(); - } else if (getType() == Output) { + } else if (getType() == Type::Output) { if (getParentEdges().size() != 1) IE_THROW() << "Incorrect number of input edges for layer " << getName(); if (!getChildEdges().empty()) @@ -401,7 +403,7 @@ void MKLDNNInputNode::getSupportedDescriptors() { } } -void MKLDNNInputNode::initSupportedPrimitiveDescriptors() { +void Input::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -412,7 +414,7 @@ void MKLDNNInputNode::initSupportedPrimitiveDescriptors() { } } -void MKLDNNInputNode::createPrimitive() { +void Input::createPrimitive() { for (size_t i = 0; i < getChildEdges().size(); i++) { auto &dstMemPtr = getChildEdgeAt(i)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) @@ -431,15 +433,15 @@ void MKLDNNInputNode::createPrimitive() { IE_THROW() << "Preferable primitive descriptor is not set for node " << getName() << "."; } -bool MKLDNNInputNode::created() const { - return getType() == Input || getType() == Output; +bool Input::created() const { + return getType() == Type::Input || getType() == Type::Output; } -void MKLDNNInputNode::initSupportedPdDefault() { +void Input::initSupportedPdDefault() { std::vector inPortConfs; std::vector outPortConfs; - if (getType() == Input || getType() == MemoryInput) { + if (getType() == Type::Input || getType() == Type::MemoryInput) { auto precision = getOriginalOutputPrecisionAtPort(0); if (precision == Precision::U16 || isMeanImage) { precision = Precision::FP32; @@ -449,7 +451,7 @@ void MKLDNNInputNode::initSupportedPdDefault() { if (!getParentEdges().empty()) { inPortConfs.push_back({LayoutType::ncsp, precision, true}); } - } else if (getType() == Output) { + } else if (getType() == Type::Output) { auto precision = getOriginalInputPrecisionAtPort(0); if (precision == Precision::U16) precision = Precision::FP32; @@ -461,19 +463,20 @@ void MKLDNNInputNode::initSupportedPdDefault() { impl_desc_type::unknown); } -void MKLDNNInputNode::initSupportedPdFromMemDesc() { +void Input::initSupportedPdFromMemDesc() { NodeConfig config; PortConfig portConfig; portConfig.inPlace(-1); portConfig.constant(false); portConfig.setMemDesc(extMemDesc); - if (getType() == Input || getType() == MemoryInput) { + if (getType() == Type::Input || getType() == Type::MemoryInput) { config.outConfs.push_back(portConfig); - } else if (getType() == Output) { + } else if (getType() == Type::Output) { config.inConfs.push_back(portConfig); } supportedPrimitiveDescriptors.emplace_back(std::move(config), impl_desc_type::unknown); } -REG_MKLDNN_PRIM_FOR(MKLDNNInputNode, Input); -REG_MKLDNN_PRIM_FOR(MKLDNNInputNode, Output); +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/input.h b/src/plugins/intel_cpu/src/nodes/input.h index 770f1b583f0..88c910d4c78 100644 --- a/src/plugins/intel_cpu/src/nodes/input.h +++ b/src/plugins/intel_cpu/src/nodes/input.h @@ -11,14 +11,15 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNInputNode : public MKLDNNNode { +class Input : public Node { public: - MKLDNNInputNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); - MKLDNNInputNode(const Shape& shape, const InferenceEngine::Precision &prc, const std::string &name, - const std::string &type, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); - MKLDNNInputNode(MemoryDescPtr memDesc, const std::string &name, const std::string &type, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &cache); + Input(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); + Input(const Shape& shape, const InferenceEngine::Precision &prc, const std::string &name, + const std::string &type, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); + Input(MemoryDescPtr memDesc, const std::string &name, const std::string &type, const mkldnn::engine& eng, + WeightsSharing::Ptr &cache); void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; @@ -26,7 +27,7 @@ public: bool created() const override; void withMeanImage(); - MKLDNNMemoryCPtr getMemoryPtr() const; + MemoryCPtr getMemoryPtr() const; void executeDynamicImpl(mkldnn::stream strm) override {} bool isExecutable() const override { @@ -43,10 +44,11 @@ private: private: std::shared_ptr constOp; - MKLDNNMemoryCPtr memoryPtr; + MemoryCPtr memoryPtr; MemoryDescPtr extMemDesc = nullptr; bool isMeanImage = false; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/interpolate.cpp b/src/plugins/intel_cpu/src/nodes/interpolate.cpp index dddd1910780..23d1c15c551 100644 --- a/src/plugins/intel_cpu/src/nodes/interpolate.cpp +++ b/src/plugins/intel_cpu/src/nodes/interpolate.cpp @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include "ie_parallel.hpp" #include @@ -31,7 +31,6 @@ #include "utils/cpu_utils.hpp" using namespace mkldnn; -using namespace ov::intel_cpu; using namespace InferenceEngine; using namespace mkldnn::impl; using namespace mkldnn::impl::cpu; @@ -42,6 +41,10 @@ using namespace Xbyak; #define GET_OFF(field) offsetof(jit_interpolate_call_args, field) +namespace ov { +namespace intel_cpu { +namespace node { + template struct jit_uni_interpolate_kernel_f32 : public jit_uni_interpolate_kernel, public jit_generator { DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_uni_interpolate_kernel_f32) @@ -1614,7 +1617,7 @@ private: namespace { struct InterpolateKey { - MKLDNNInterpolateNode::InterpolateAttrs nodeAttrs; + Interpolate::InterpolateAttrs nodeAttrs; VectorDims srcDims; VectorDims dstDims; std::vector dataScales; @@ -1730,7 +1733,7 @@ using ngInterpCoordTransf = ngraph::opset4::Interpolate::CoordinateTransformMode using ngInterpNearMode = ngraph::opset4::Interpolate::NearestMode; using ngInterpShapeCalcMode = ngraph::opset4::Interpolate::ShapeCalcMode; -bool MKLDNNInterpolateNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool Interpolate::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { const auto interp = std::dynamic_pointer_cast(op); if (!interp) { @@ -1793,8 +1796,8 @@ bool MKLDNNInterpolateNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) - : MKLDNNNode(op, eng, cache) { +Interpolate::Interpolate(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache) + : Node(op, eng, cache) { std::string errorMessage; if (isSupportedOperation(op, errorMessage)) { errorPrefix = "Interpolate node with name '" + getName() + "'"; @@ -1907,7 +1910,7 @@ MKLDNNInterpolateNode::MKLDNNInterpolateNode(const std::shared_ptr } } -void MKLDNNInterpolateNode::getSupportedDescriptors() { +void Interpolate::getSupportedDescriptors() { if (getParentEdges().size() != 3 && getParentEdges().size() != 4) // data, target_shape, scale, axis(optional). IE_THROW() << errorPrefix << " has incorrect number of input edges"; @@ -1951,7 +1954,7 @@ void MKLDNNInterpolateNode::getSupportedDescriptors() { } } -void MKLDNNInterpolateNode::initSupportedPrimitiveDescriptors() { +void Interpolate::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -2028,8 +2031,8 @@ void MKLDNNInterpolateNode::initSupportedPrimitiveDescriptors() { } } -bool MKLDNNInterpolateNode::needShapeInfer() const { - if (MKLDNNNode::inputShapesModified()) { +bool Interpolate::needShapeInfer() const { + if (Node::inputShapesModified()) { return true; } if (shapeCalcMode == InterpolateShapeCalcMode::scales) { @@ -2056,12 +2059,12 @@ bool MKLDNNInterpolateNode::needShapeInfer() const { return false; } -std::vector MKLDNNInterpolateNode::shapeInfer() const { +std::vector Interpolate::shapeInfer() const { const size_t port = shapeCalcMode == InterpolateShapeCalcMode::sizes ? TARGET_SHAPE_ID : SCALES_ID; return shapeInferGeneric(PortMask(port, AXES_ID)); } -void MKLDNNInterpolateNode::executeDynamicImpl(mkldnn::stream strm) { +void Interpolate::executeDynamicImpl(mkldnn::stream strm) { execute(strm); const size_t port = shapeCalcMode == InterpolateShapeCalcMode::sizes ? TARGET_SHAPE_ID : SCALES_ID; @@ -2075,11 +2078,11 @@ void MKLDNNInterpolateNode::executeDynamicImpl(mkldnn::stream strm) { } } -bool MKLDNNInterpolateNode::needPrepareParams() const { +bool Interpolate::needPrepareParams() const { return (inputShapesModified() || lastOutputDims != getChildEdgesAtPort(0)[0]->getMemory().getStaticDims()); } -void MKLDNNInterpolateNode::prepareParams() { +void Interpolate::prepareParams() { if (!shapesDefined()) { IE_THROW() << "Can't prepare params for Interpolate node with name: " << getName() << ", because input/output dims aren't defined"; } @@ -2149,7 +2152,7 @@ void MKLDNNInterpolateNode::prepareParams() { lastOutputDims = dstDims; } -void MKLDNNInterpolateNode::createPrimitive() { +void Interpolate::createPrimitive() { auto& srcMemPtr = getParentEdgeAt(DATA_ID)->getMemoryPtr(); auto& dstMemPtr = getChildEdgesAtPort(0)[0]->getMemoryPtr(); if (!srcMemPtr || !srcMemPtr->isAllocated()) @@ -2184,18 +2187,18 @@ static inline float triangleCoeff(float x) { return (std::max)(0.0f, 1 - std::abs(x)); } -void MKLDNNInterpolateNode::setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims) { +void Interpolate::setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims) { mkldnn::post_ops ops; postOpsDataPtrs.clear(); for (auto &node : fusedWith) { - auto* fakeQuantizeNode = dynamic_cast(node.get()); + auto* fakeQuantizeNode = dynamic_cast(node.get()); if (fakeQuantizeNode) { fakeQuantizeNode->appendPostOps(ops, {}, postOpsDataPtrs); continue; } - auto* eltwiseNode = dynamic_cast(node.get()); + auto* eltwiseNode = dynamic_cast(node.get()); if (eltwiseNode) { eltwiseNode->appendPostOps(ops, dims, postOpsDataPtrs); continue; @@ -2207,7 +2210,7 @@ void MKLDNNInterpolateNode::setPostOps(mkldnn::primitive_attr &attr, const Vecto attr.set_post_ops(ops); } -SizeVector MKLDNNInterpolateNode::getPaddedInputShape(const VectorDims &srcDims, +SizeVector Interpolate::getPaddedInputShape(const VectorDims &srcDims, const std::vector &padBegin, const std::vector &padEnd) { SizeVector paddedShape; @@ -2222,7 +2225,7 @@ SizeVector MKLDNNInterpolateNode::getPaddedInputShape(const VectorDims &srcDims, // if "scale" version: set scales with input scales, 1.f for other dims not in axis // if "size" version: scales = shape[target] / shape[input].pad, 1.f for other dims not in axis // scales is a required input, but should not use input scales when "size" case, which may added eps that lead to inaccurate result, recalculate scales instead. -std::vector MKLDNNInterpolateNode::getScales(const VectorDims &srcDimPad, const VectorDims &dstDim) { +std::vector Interpolate::getScales(const VectorDims &srcDimPad, const VectorDims &dstDim) { const size_t dataRank = getInputShapeAtPort(DATA_ID).getRank(); std::vector fullScales(dataRank, 1.f); const size_t axesRank = axes.size(); @@ -2234,7 +2237,7 @@ std::vector MKLDNNInterpolateNode::getScales(const VectorDims &srcDimPad, return fullScales; } -void MKLDNNInterpolateNode::execute(mkldnn::stream strm) { +void Interpolate::execute(mkldnn::stream strm) { if (!execPtr) { IE_THROW() << "Can't execute Interpolate node. Primitive didn't created"; } @@ -2322,7 +2325,7 @@ void MKLDNNInterpolateNode::execute(mkldnn::stream strm) { // for ndhwc and nCdhw8c[16c] // input may be f32/bf16/int8, fused->output varies -void MKLDNNInterpolateNode::InterpolateJitExecutor::NNCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_, +void Interpolate::InterpolateJitExecutor::NNCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW) { int *index_d = static_cast(&indexTable[0]); int *index_h = static_cast(&indexTable[OD]); @@ -2378,7 +2381,7 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::NNCGathered(const uint8_t *i } // batch end } -void MKLDNNInterpolateNode::InterpolateJitExecutor::NNPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_, +void Interpolate::InterpolateJitExecutor::NNPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW) { int *index_d = static_cast(&indexTable[0]); int *index_h = static_cast(&indexTable[OD]); @@ -2409,7 +2412,7 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::NNPlanar(const uint8_t *in_p }); } -void MKLDNNInterpolateNode::InterpolateJitExecutor::linearOnnxPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_, int B, int C, +void Interpolate::InterpolateJitExecutor::linearOnnxPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW) { // FrontTopLeft:0, FrontTopRight:1, FrontBottomLeft:2, FrontBottomRight:3, EndTopLeft:4, EndTopRight:5, EndBottomLeft:6, EndBottomRight:7 // weight: Left:0, ritht:1, top:2, bottom:3, front:4, end:5 @@ -2433,7 +2436,7 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::linearOnnxPlanar(const uint8 }); } -void MKLDNNInterpolateNode::InterpolateJitExecutor::linearOnnxCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_, +void Interpolate::InterpolateJitExecutor::linearOnnxCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW) { // left:OW right:OW Top:OH Bottom:OH Front:OD End:OD std::vector indexPtr(MAX_INPUT_INTERPOLATE, 0); @@ -2505,7 +2508,7 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::linearOnnxCGathered(const ui }); } -void MKLDNNInterpolateNode::InterpolateJitExecutor::cubicCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_, +void Interpolate::InterpolateJitExecutor::cubicCGathered(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_, int B, int C, int IH, int IW, int OH, int OW) { const int idxNum = 1; int *xOrigin = static_cast(&indexTable[0]); @@ -2552,7 +2555,7 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::cubicCGathered(const uint8_t }); } -void MKLDNNInterpolateNode::InterpolateJitExecutor::cubicPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_, +void Interpolate::InterpolateJitExecutor::cubicPlanar(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_, int B, int C, int IH, int IW, int OH, int OW) { int tblAdvance = 0; int *xOrigin = static_cast(&indexTable[tblAdvance]); @@ -2591,7 +2594,7 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::cubicPlanar(const uint8_t *i // ===================================================================================================================== // index layout: // d_0............d_OD-1, h_0..............h_OH-1, w_0................w_OW-1 -void MKLDNNInterpolateNode::InterpolateExecutor::buildTblNN(const SizeVector& srcDimPad5d, const SizeVector& dstDim5d, +void Interpolate::InterpolateExecutor::buildTblNN(const SizeVector& srcDimPad5d, const SizeVector& dstDim5d, const std::vector& dataScales, InterpolateLayoutType layout, InterpolateNearestMode nearestMode) { const int dimSize = dataRank; float fz = (dimSize == 5) ? dataScales[dimSize - 3] : 1.f; @@ -2624,7 +2627,7 @@ void MKLDNNInterpolateNode::InterpolateExecutor::buildTblNN(const SizeVector& sr // scale is float(outShape) / float(inShape) // strictly consistent with onnx calc manner(div scale, not multiply inverse), given this is done offline // the slight precison diff can produce obvious wrong value due to "nearest round" behavior for NN mode -float MKLDNNInterpolateNode::InterpolateExecutor::coordTransToInput(int outCoord, float scale, int inShape, int outShape) const { +float Interpolate::InterpolateExecutor::coordTransToInput(int outCoord, float scale, int inShape, int outShape) const { if (scale == 1.0f || (inShape == outShape)) { return outCoord; } @@ -2662,7 +2665,7 @@ float MKLDNNInterpolateNode::InterpolateExecutor::coordTransToInput(int outCoord } } -int MKLDNNInterpolateNode::InterpolateExecutor::nearestRound(float originCoord, bool isDownsample, InterpolateNearestMode nearestMode) const { +int Interpolate::InterpolateExecutor::nearestRound(float originCoord, bool isDownsample, InterpolateNearestMode nearestMode) const { switch (nearestMode) { case InterpolateNearestMode::round_prefer_floor: { if (originCoord == (static_cast(originCoord) + 0.5f)) @@ -2696,7 +2699,7 @@ int MKLDNNInterpolateNode::InterpolateExecutor::nearestRound(float originCoord, } } -void MKLDNNInterpolateNode::InterpolateExecutor::linearOnnxCF(int outCoord, float scale, int inShape, int outShape, +void Interpolate::InterpolateExecutor::linearOnnxCF(int outCoord, float scale, int inShape, int outShape, int& index0, int& index1, float& weight0, float& weight1) { float inCoord = coordTransToInput(outCoord, scale, inShape, outShape); inCoord = std::max(0.0f, std::min(inCoord, static_cast(inShape - 1))); @@ -2711,7 +2714,7 @@ void MKLDNNInterpolateNode::InterpolateExecutor::linearOnnxCF(int outCoord, floa } } -void MKLDNNInterpolateNode::InterpolateExecutor::buildTblLinearOnnx(const SizeVector& srcDimPad5d, const SizeVector& dstDim5d, +void Interpolate::InterpolateExecutor::buildTblLinearOnnx(const SizeVector& srcDimPad5d, const SizeVector& dstDim5d, const std::vector& dataScales, InterpolateLayoutType layout) { int dimSize = dataRank; float fz = (spatialDimSize > 2) ? dataScales[dimSize - 3] : 1.f; @@ -2824,7 +2827,7 @@ void MKLDNNInterpolateNode::InterpolateExecutor::buildTblLinearOnnx(const SizeVe // wd .........wd, wh............wh, ww.............ww, id...........id, ih............ih, iw..............iw // | | // wh0.....wh_diameter ih0.....ih_diameter -void MKLDNNInterpolateNode::InterpolateExecutor::buildTblLinear(const SizeVector& srcDimPad5d, const SizeVector& dstDim5d, +void Interpolate::InterpolateExecutor::buildTblLinear(const SizeVector& srcDimPad5d, const SizeVector& dstDim5d, const std::vector& dataScales, int kernel_width, bool antialias) { int dimSize = dataRank; float fz = (dimSize == 5) ? dataScales[dimSize - 3] : 1.f; @@ -2901,7 +2904,7 @@ void MKLDNNInterpolateNode::InterpolateExecutor::buildTblLinear(const SizeVector } } -std::vector MKLDNNInterpolateNode::InterpolateExecutor::getCubicCoeffs(float mantissa, float a) { +std::vector Interpolate::InterpolateExecutor::getCubicCoeffs(float mantissa, float a) { float m = std::fabs(mantissa); std::vector coeffs(4, 0.f); @@ -2915,7 +2918,7 @@ std::vector MKLDNNInterpolateNode::InterpolateExecutor::getCubicCoeffs(fl // table layout: // OW OW OW OW OW OH OH OH OH OH // x_idx x_weight0 x_weight1 x_weight2 x_weight3 y_idx y_weight0 y_weight1 y_weight2 y_weight3 -void MKLDNNInterpolateNode::InterpolateExecutor::buildTblCubic(const SizeVector& srcDimPad5d, const SizeVector& dstDim5d, const std::vector& dataScales, +void Interpolate::InterpolateExecutor::buildTblCubic(const SizeVector& srcDimPad5d, const SizeVector& dstDim5d, const std::vector& dataScales, float cubicCoeff, InterpolateLayoutType layout) { int dimSize = dataRank; float fy = dataScales[dimSize - 2]; @@ -2980,7 +2983,7 @@ void MKLDNNInterpolateNode::InterpolateExecutor::buildTblCubic(const SizeVector& } } -void MKLDNNInterpolateNode::InterpolateRefExecutor::NNRef(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, +void Interpolate::InterpolateRefExecutor::NNRef(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW) { int *index_d = static_cast(&indexTable[0]); int *index_h = static_cast(&indexTable[OD]); @@ -3002,7 +3005,7 @@ void MKLDNNInterpolateNode::InterpolateRefExecutor::NNRef(const uint8_t *in_ptr_ }); } -void MKLDNNInterpolateNode::InterpolateRefExecutor::linearOnnxRef(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, +void Interpolate::InterpolateRefExecutor::linearOnnxRef(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, int OD, int OH, int OW) { std::vector indexPtr(MAX_INPUT_INTERPOLATE, 0); std::vector weightPtr(MAX_INPUT_INTERPOLATE, 0); @@ -3100,7 +3103,7 @@ void MKLDNNInterpolateNode::InterpolateRefExecutor::linearOnnxRef(const uint8_t }); } -void MKLDNNInterpolateNode::InterpolateRefExecutor::cubicRef(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW) { +void Interpolate::InterpolateRefExecutor::cubicRef(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int IH, int IW, int OH, int OW) { const int idxNum = 1; int *xOrigin = static_cast(&indexTable[0]); float *xFactor = reinterpret_cast(&indexTable[OW]); @@ -3132,7 +3135,7 @@ void MKLDNNInterpolateNode::InterpolateRefExecutor::cubicRef(const uint8_t *in_p }); } -float MKLDNNInterpolateNode::InterpolateRefExecutor::getValue(const uint8_t *base, size_t offset, InferenceEngine::Precision prec) { +float Interpolate::InterpolateRefExecutor::getValue(const uint8_t *base, size_t offset, InferenceEngine::Precision prec) { const uint8_t *baseOffset = base + offset; switch (prec) { case Precision::U8: { @@ -3161,7 +3164,7 @@ float MKLDNNInterpolateNode::InterpolateRefExecutor::getValue(const uint8_t *bas } } -void MKLDNNInterpolateNode::InterpolateRefExecutor::setValue(uint8_t *base, size_t offset, float value, InferenceEngine::Precision prec) { +void Interpolate::InterpolateRefExecutor::setValue(uint8_t *base, size_t offset, float value, InferenceEngine::Precision prec) { uint8_t *baseOffset = base + offset; switch (prec) { case Precision::U8: { @@ -3190,7 +3193,7 @@ void MKLDNNInterpolateNode::InterpolateRefExecutor::setValue(uint8_t *base, size } } -void MKLDNNInterpolateNode::InterpolateRefExecutor::linearInterpolation(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, +void Interpolate::InterpolateRefExecutor::linearInterpolation(const uint8_t *in_ptr_, uint8_t *out_ptr_, int B, int C, int ID, int IH, int IW, float fx, float fy, float fz, int OD, int OH, int OW, int kernel_width, bool antialias) { if (IW == OW && IH == OH && ID == OD) { size_t spatialDimSize = IW * IH * ID; @@ -3307,7 +3310,7 @@ void MKLDNNInterpolateNode::InterpolateRefExecutor::linearInterpolation(const ui }); } -MKLDNNInterpolateNode::InterpolateExecutor::InterpolateExecutor(const InterpolateAttrs& interpAttrs, +Interpolate::InterpolateExecutor::InterpolateExecutor(const InterpolateAttrs& interpAttrs, const VectorDims &srcDims, const VectorDims &dstDims, const std::vector &dataScales) : @@ -3345,7 +3348,7 @@ MKLDNNInterpolateNode::InterpolateExecutor::InterpolateExecutor(const Interpolat } } -MKLDNNInterpolateNode::InterpolateJitExecutor::InterpolateJitExecutor(const InterpolateAttrs& interpAttrs, +Interpolate::InterpolateJitExecutor::InterpolateJitExecutor(const InterpolateAttrs& interpAttrs, const VectorDims &srcDims, const VectorDims &dstDims, const std::vector &dataScales, @@ -3353,10 +3356,10 @@ MKLDNNInterpolateNode::InterpolateJitExecutor::InterpolateJitExecutor(const Inte InterpolateExecutor(interpAttrs, srcDims, dstDims, dataScales) { auto jcp = jit_interpolate_config_params(); jcp.mode = mode; - jcp.src_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(interpAttrs.inPrc); - jcp.dst_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(interpAttrs.outPrc); - jcp.src_data_size = MKLDNNExtensionUtils::sizeOfDataType(jcp.src_dt); - jcp.dst_data_size = MKLDNNExtensionUtils::sizeOfDataType(jcp.dst_dt); + jcp.src_dt = DnnlExtensionUtils::IEPrecisionToDataType(interpAttrs.inPrc); + jcp.dst_dt = DnnlExtensionUtils::IEPrecisionToDataType(interpAttrs.outPrc); + jcp.src_data_size = DnnlExtensionUtils::sizeOfDataType(jcp.src_dt); + jcp.dst_data_size = DnnlExtensionUtils::sizeOfDataType(jcp.dst_dt); jcp.indices_size = sizeof(int); jcp.OW = dstDim5d[4]; jcp.OH = dstDim5d[3]; @@ -3387,7 +3390,7 @@ MKLDNNInterpolateNode::InterpolateJitExecutor::InterpolateJitExecutor(const Inte } } -void MKLDNNInterpolateNode::InterpolateJitExecutor::exec(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_) { +void Interpolate::InterpolateJitExecutor::exec(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_) { size_t N = srcDimPad5d[0], C = srcDimPad5d[1], ID = srcDimPad5d[2], IH = srcDimPad5d[3], IW = srcDimPad5d[4]; size_t OD = dstDim5d[2], OH = dstDim5d[3], OW = dstDim5d[4]; @@ -3425,7 +3428,7 @@ void MKLDNNInterpolateNode::InterpolateJitExecutor::exec(const uint8_t *in_ptr_, } } -void MKLDNNInterpolateNode::InterpolateRefExecutor::exec(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_) { +void Interpolate::InterpolateRefExecutor::exec(const uint8_t *in_ptr_, uint8_t *out_ptr_, const void *post_ops_data_) { size_t N = srcDimPad5d[0], C = srcDimPad5d[1], ID = srcDimPad5d[2], IH = srcDimPad5d[3], IW = srcDimPad5d[4]; size_t OD = dstDim5d[2], OH = dstDim5d[3], OW = dstDim5d[4]; @@ -3458,7 +3461,7 @@ void MKLDNNInterpolateNode::InterpolateRefExecutor::exec(const uint8_t *in_ptr_, } } -size_t MKLDNNInterpolateNode::getSpatialDimsNum(const Dim rank) { +size_t Interpolate::getSpatialDimsNum(const Dim rank) { switch (rank) { case 1: case 3: @@ -3473,7 +3476,7 @@ size_t MKLDNNInterpolateNode::getSpatialDimsNum(const Dim rank) { } } -bool MKLDNNInterpolateNode::canFuse(const MKLDNNNodePtr& node) const { +bool Interpolate::canFuse(const NodePtr& node) const { if (!mayiuse(cpu::x64::sse41) || interpAttrs.mode == InterpolateMode::linear) { return false; } @@ -3481,8 +3484,10 @@ bool MKLDNNInterpolateNode::canFuse(const MKLDNNNodePtr& node) const { return canFuseSimpleOperation(node); } -bool MKLDNNInterpolateNode::created() const { - return getType() == Interpolate; +bool Interpolate::created() const { + return getType() == Type::Interpolate; } -REG_MKLDNN_PRIM_FOR(MKLDNNInterpolateNode, Interpolate); +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/interpolate.h b/src/plugins/intel_cpu/src/nodes/interpolate.h index aeb3daa8547..24fd4292b15 100644 --- a/src/plugins/intel_cpu/src/nodes/interpolate.h +++ b/src/plugins/intel_cpu/src/nodes/interpolate.h @@ -16,6 +16,7 @@ using namespace InferenceEngine; namespace ov { namespace intel_cpu { +namespace node { enum InterpolateLayoutType { planar, @@ -92,9 +93,9 @@ struct jit_uni_interpolate_kernel { }; -class MKLDNNInterpolateNode : public MKLDNNNode { +class Interpolate : public Node { public: - MKLDNNInterpolateNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + Interpolate(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; @@ -105,7 +106,7 @@ public: bool canBeInPlace() const override { return false; } - bool canFuse(const MKLDNNNodePtr& node) const override; + bool canFuse(const NodePtr& node) const override; static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; @@ -259,5 +260,6 @@ private: std::string errorPrefix; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/list.hpp b/src/plugins/intel_cpu/src/nodes/list.hpp deleted file mode 100644 index 34d1b4fec04..00000000000 --- a/src/plugins/intel_cpu/src/nodes/list.hpp +++ /dev/null @@ -1,71 +0,0 @@ -// Copyright (C) 2018-2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include - -#include - -#include -#include -#include -#include -#include - -namespace InferenceEngine { - -class ILayerImplFactory { -public: - /** - * @brief A shared pointer to the ILayerImplFactory interface - */ - using Ptr = std::shared_ptr; - - using ImplCreator = std::function; - - /** - * @brief Destructor - */ - virtual ~ILayerImplFactory() = default; - - /** - * @brief Gets all possible implementations for the given cnn Layer - * - * @param impls the vector with implementations which is ordered by priority - * @param resp response descriptor - * @return status code - */ - virtual StatusCode getImplementations(std::vector& impls, ResponseDesc* resp) noexcept = 0; -}; - -namespace Extensions { -namespace Cpu { - -// TODO: remove this -class MKLDNNExtensions : public IExtension { -public: - MKLDNNExtensions(); - - virtual StatusCode - getFactoryFor(ILayerImplFactory*& factory, const std::shared_ptr& op, ResponseDesc* resp) noexcept { - return NOT_FOUND; - } - - void GetVersion(const InferenceEngine::Version*& versionInfo) const noexcept override { - static Version ExtensionDescription = { - { 2, 1 }, // extension API version - "2.1", - "ie-cpu-ext" // extension description message - }; - - versionInfo = &ExtensionDescription; - } - - void Unload() noexcept override {} -}; - -} // namespace Cpu -} // namespace Extensions -} // namespace InferenceEngine diff --git a/src/plugins/intel_cpu/src/nodes/log_softmax.cpp b/src/plugins/intel_cpu/src/nodes/log_softmax.cpp index 1da6111c179..a71a99f0851 100644 --- a/src/plugins/intel_cpu/src/nodes/log_softmax.cpp +++ b/src/plugins/intel_cpu/src/nodes/log_softmax.cpp @@ -8,10 +8,13 @@ #include "ie_parallel.hpp" #include "log_softmax.h" -using namespace ov::intel_cpu; using namespace InferenceEngine; -bool MKLDNNLogSoftmaxNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +namespace ov { +namespace intel_cpu { +namespace node { + +bool LogSoftmax::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { const auto logSoftMax = std::dynamic_pointer_cast(op); if (!logSoftMax) { @@ -24,8 +27,8 @@ bool MKLDNNLogSoftmaxNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { +LogSoftmax::LogSoftmax(const std::shared_ptr& op, const mkldnn::engine& eng, + WeightsSharing::Ptr &cache) : Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -51,7 +54,7 @@ MKLDNNLogSoftmaxNode::MKLDNNLogSoftmaxNode(const std::shared_ptr& IE_THROW() << errorPrefix << " has incorrect input parameters dimensions and axis number!"; } -void MKLDNNLogSoftmaxNode::initSupportedPrimitiveDescriptors() { +void LogSoftmax::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -60,7 +63,7 @@ void MKLDNNLogSoftmaxNode::initSupportedPrimitiveDescriptors() { impl_desc_type::ref_any); } -void MKLDNNLogSoftmaxNode::prepareParams() { +void LogSoftmax::prepareParams() { const auto &dims = getParentEdgesAtPort(0)[0]->getMemory().getStaticDims(); reducedAxisStride = 1; axisStep = 1; @@ -79,11 +82,11 @@ void MKLDNNLogSoftmaxNode::prepareParams() { reducedAxisStride *= dims[i]; } -void MKLDNNLogSoftmaxNode::executeDynamicImpl(mkldnn::stream strm) { +void LogSoftmax::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -void MKLDNNLogSoftmaxNode::execute(mkldnn::stream strm) { +void LogSoftmax::execute(mkldnn::stream strm) { const float *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); float* dstData = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); @@ -123,8 +126,10 @@ void MKLDNNLogSoftmaxNode::execute(mkldnn::stream strm) { } } -bool MKLDNNLogSoftmaxNode::created() const { - return getType() == LogSoftmax; +bool LogSoftmax::created() const { + return getType() == Type::LogSoftmax; } -REG_MKLDNN_PRIM_FOR(MKLDNNLogSoftmaxNode, LogSoftmax) +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/log_softmax.h b/src/plugins/intel_cpu/src/nodes/log_softmax.h index b21e65b100c..6fb057a03b3 100644 --- a/src/plugins/intel_cpu/src/nodes/log_softmax.h +++ b/src/plugins/intel_cpu/src/nodes/log_softmax.h @@ -9,11 +9,12 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNLogSoftmaxNode : public MKLDNNNode { +class LogSoftmax : public Node { public: - MKLDNNLogSoftmaxNode(const std::shared_ptr& op, - const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + LogSoftmax(const std::shared_ptr& op, + const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; @@ -35,5 +36,6 @@ private: std::string errorPrefix; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/lrn.cpp b/src/plugins/intel_cpu/src/nodes/lrn.cpp index af8156f495c..427ca5a08a8 100644 --- a/src/plugins/intel_cpu/src/nodes/lrn.cpp +++ b/src/plugins/intel_cpu/src/nodes/lrn.cpp @@ -4,16 +4,19 @@ #include "lrn.h" #include -#include +#include #include #include #include "memory_desc/dnnl_blocked_memory_desc.h" #include -using namespace ov::intel_cpu; using namespace InferenceEngine; +namespace ov { +namespace intel_cpu { +namespace node { namespace { + struct LrnKey { DnnlMemoryDescCPtr inp0; impl_desc_type implType; @@ -56,7 +59,7 @@ bool LrnKey::operator==(const LrnKey &rhs) const { } } // namespace -bool MKLDNNLrnNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool Lrn::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { auto lrn = ngraph::as_type_ptr(op); if (!lrn) { @@ -102,8 +105,8 @@ bool MKLDNNLrnNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : - MKLDNNNode(op, eng, cache) { +Lrn::Lrn(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache) : + Node(op, eng, cache) { std::string errorMessage; if (isSupportedOperation(op, errorMessage)) { errorPrefix = "LRN node with name '" + getName() + "'"; @@ -121,7 +124,7 @@ MKLDNNLrnNode::MKLDNNLrnNode(const std::shared_ptr& op, const mkld } } -void MKLDNNLrnNode::getSupportedDescriptors() { +void Lrn::getSupportedDescriptors() { if (!descs.empty()) return; @@ -133,7 +136,7 @@ void MKLDNNLrnNode::getSupportedDescriptors() { InferenceEngine::Precision precision = getOriginalOutputPrecisionAtPort(0); if (precision != InferenceEngine::Precision::FP32 && precision != InferenceEngine::Precision::BF16) precision = InferenceEngine::Precision::FP32; - auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); + auto inputDataType = DnnlExtensionUtils::IEPrecisionToDataType(precision); const auto &parentShape = getInputShapeAtPort(0); @@ -143,18 +146,18 @@ void MKLDNNLrnNode::getSupportedDescriptors() { } } -std::shared_ptr MKLDNNLrnNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { +std::shared_ptr Lrn::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { if (idx > 0) { return std::make_shared(getOriginalInputPrecisionAtPort(idx), getInputShapeAtPort(idx)); } else { if (getInputShapeAtPort(idx).isDynamic()) { - return MKLDNNExtensionUtils::makeUndefinedDesc(primitive_desc_it.src_desc(idx), getInputShapeAtPort(idx)); + return DnnlExtensionUtils::makeUndefinedDesc(primitive_desc_it.src_desc(idx), getInputShapeAtPort(idx)); } - return MKLDNNExtensionUtils::makeDescriptor(primitive_desc_it.src_desc(idx)); + return DnnlExtensionUtils::makeDescriptor(primitive_desc_it.src_desc(idx)); } } -void MKLDNNLrnNode::prepareParams() { +void Lrn::prepareParams() { auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); if (!srcMemPtr || !srcMemPtr->isAllocated()) @@ -172,7 +175,7 @@ void MKLDNNLrnNode::prepareParams() { auto engine = getEngine(); auto builder = [&engine](const LrnKey& key) -> std::shared_ptr { - MKLDNNDescriptor desc(std::shared_ptr( + DnnlDesriptor desc(std::shared_ptr( new mkldnn::lrn_forward::desc(mkldnn::prop_kind::forward_scoring, key.alg, key.inp0->getDnnlDesc(), key.size, key.alpha, key.beta, key.k))); mkldnn::lrn_forward::primitive_desc prim_desc; @@ -201,27 +204,29 @@ void MKLDNNLrnNode::prepareParams() { primArgs = { {DNNL_ARG_SRC, src}, {DNNL_ARG_DST, dst} }; } -bool MKLDNNLrnNode::created() const { - return getType() == Lrn; +bool Lrn::created() const { + return getType() == Type::Lrn; } -void MKLDNNLrnNode::createDescriptor(const std::vector &inputDesc, +void Lrn::createDescriptor(const std::vector &inputDesc, const std::vector &outputDesc) { auto inpDesc = inputDesc[0]->isDefined() ? inputDesc[0] : MemoryDescUtils::makeDummyDesc(*inputDesc[0]); DnnlMemoryDescPtr definedInpMemDesc = MemoryDescUtils::convertToDnnlMemoryDesc(inpDesc); const auto& in_candidate = definedInpMemDesc->getDnnlDesc(); - MKLDNNDescriptor desc(std::shared_ptr( + DnnlDesriptor desc(std::shared_ptr( new mkldnn::lrn_forward::desc(mkldnn::prop_kind::forward_scoring, alg, in_candidate, size, alpha, beta, k))); descs.push_back(desc); } -std::vector MKLDNNLrnNode::shapeInfer() const { +std::vector Lrn::shapeInfer() const { return { getParentEdgesAtPort(0).front()->getMemory().getStaticDims() }; } -void MKLDNNLrnNode::executeDynamicImpl(mkldnn::stream strm) { +void Lrn::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -REG_MKLDNN_PRIM_FOR(MKLDNNLrnNode, Lrn); +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/lrn.h b/src/plugins/intel_cpu/src/nodes/lrn.h index 424922d9496..39a72837025 100644 --- a/src/plugins/intel_cpu/src/nodes/lrn.h +++ b/src/plugins/intel_cpu/src/nodes/lrn.h @@ -12,15 +12,16 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNLrnNode : public MKLDNNNode { +class Lrn : public Node { public: - MKLDNNLrnNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + Lrn(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override; void createDescriptor(const std::vector& inputDesc, const std::vector& outputDesc) override; - size_t descInputNumbers(MKLDNNDescriptor desc) override { + size_t descInputNumbers(DnnlDesriptor desc) override { return static_cast(getOriginalInputsNumber()); } std::shared_ptr getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; @@ -45,5 +46,6 @@ private: std::string errorPrefix; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/mathematics.cpp b/src/plugins/intel_cpu/src/nodes/mathematics.cpp index 8e98a89dcf1..121ec938148 100644 --- a/src/plugins/intel_cpu/src/nodes/mathematics.cpp +++ b/src/plugins/intel_cpu/src/nodes/mathematics.cpp @@ -11,17 +11,20 @@ #include "mathematics.h" #include "utils/general_utils.h" -using namespace ov::intel_cpu; using namespace InferenceEngine; -bool MKLDNNMathNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +namespace ov { +namespace intel_cpu { +namespace node { + +bool Math::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { if (initializers.find(op->get_type_info()) == initializers.end()) { errorMessage = "Unsupported Math layer type."; return false; } - if (ov::intel_cpu::one_of(op->get_type_info(), ngraph::op::v0::HardSigmoid::get_type_info_static(), ngraph::op::v0::Selu::get_type_info_static())) { + if (one_of(op->get_type_info(), ngraph::op::v0::HardSigmoid::get_type_info_static(), ngraph::op::v0::Selu::get_type_info_static())) { auto firstConst = ngraph::as_type_ptr(op->get_input_node_shared_ptr(1)); auto secondConst = ngraph::as_type_ptr(op->get_input_node_shared_ptr(2)); if (!firstConst || !secondConst) { @@ -35,8 +38,8 @@ bool MKLDNNMathNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache), alpha(0.f), beta(0.f), gamma(0.f) { +Math::Math(const std::shared_ptr& op, const mkldnn::engine& eng, + WeightsSharing::Ptr &cache) : Node(op, eng, cache), alpha(0.f), beta(0.f), gamma(0.f) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -45,7 +48,7 @@ MKLDNNMathNode::MKLDNNMathNode(const std::shared_ptr& op, const mk initializers[op->get_type_info()](op, *this); } -void MKLDNNMathNode::initSupportedPrimitiveDescriptors() { +void Math::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -59,98 +62,98 @@ void MKLDNNMathNode::initSupportedPrimitiveDescriptors() { impl_desc_type::ref_any); } -std::vector MKLDNNMathNode::shapeInfer() const { +std::vector Math::shapeInfer() const { return std::vector{getParentEdgesAtPort(0)[0]->getMemory().getStaticDims()}; } -void MKLDNNMathNode::executeDynamicImpl(mkldnn::stream strm) { +void Math::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -void MKLDNNMathNode::execute(mkldnn::stream strm) { +void Math::execute(mkldnn::stream strm) { size_t dataSize = getChildEdgesAtPort(0)[0]->getMemory().GetShape().getElementsCount(); const float *src_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); float* dst_data = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); switch (getAlgorithm()) { - case ov::intel_cpu::MathAbs: + case Algorithm::MathAbs: parallel_for(dataSize, [&](size_t i) { dst_data[i] = (std::abs)(src_data[i]); }); break; - case ov::intel_cpu::MathAcos: + case Algorithm::MathAcos: parallel_for(dataSize, [&](size_t i) { dst_data[i] = acosf(src_data[i]); }); break; - case ov::intel_cpu::MathAcosh: + case Algorithm::MathAcosh: parallel_for(dataSize, [&](size_t i) { dst_data[i] = acoshf(src_data[i]); }); break; - case ov::intel_cpu::MathAsin: + case Algorithm::MathAsin: parallel_for(dataSize, [&](size_t i) { dst_data[i] = asinf(src_data[i]); }); break; - case ov::intel_cpu::MathAsinh: + case Algorithm::MathAsinh: parallel_for(dataSize, [&](size_t i) { dst_data[i] = asinhf(src_data[i]); }); break; - case ov::intel_cpu::MathAtan: + case Algorithm::MathAtan: parallel_for(dataSize, [&](size_t i) { dst_data[i] = atanf(src_data[i]); }); break; - case ov::intel_cpu::MathAtanh: + case Algorithm::MathAtanh: parallel_for(dataSize, [&](size_t i) { dst_data[i] = atanhf(src_data[i]); }); break; - case ov::intel_cpu::MathCeiling: + case Algorithm::MathCeiling: parallel_for(dataSize, [&](size_t i) { dst_data[i] = ceilf(src_data[i]); }); break; - case ov::intel_cpu::MathCos: + case Algorithm::MathCos: parallel_for(dataSize, [&](size_t i) { dst_data[i] = cosf(src_data[i]); }); break; - case ov::intel_cpu::MathCosh: + case Algorithm::MathCosh: parallel_for(dataSize, [&](size_t i) { dst_data[i] = coshf(src_data[i]); }); break; - case ov::intel_cpu::MathFloor: + case Algorithm::MathFloor: parallel_for(dataSize, [&](size_t i) { dst_data[i] = floorf(src_data[i]); }); break; - case ov::intel_cpu::MathHardSigmoid: + case Algorithm::MathHardSigmoid: alpha = (alpha == 0.0f) ? 0.2f : alpha; beta = (beta == 0.0f) ? 0.5f : beta; parallel_for(dataSize, [&](size_t i) { dst_data[i] = (std::max)(0.f, (std::min)(1.f, alpha * src_data[i] + beta)); }); break; - case ov::intel_cpu::MathLog: + case Algorithm::MathLog: parallel_for(dataSize, [&](size_t i) { dst_data[i] = logf(src_data[i]); }); break; - case ov::intel_cpu::MathNegative: + case Algorithm::MathNegative: parallel_for(dataSize, [&](size_t i) { dst_data[i] = -src_data[i]; }); break; - case ov::intel_cpu::MathReciprocal: + case Algorithm::MathReciprocal: parallel_for(dataSize, [&](size_t i) { dst_data[i] = 1.0f / src_data[i]; }); break; - case ov::intel_cpu::MathSelu: + case Algorithm::MathSelu: alpha = (alpha == 0.0f) ? 1.67326f : alpha; gamma = (gamma == 0.0f) ? 1.0507f : gamma; parallel_for(dataSize, [&](size_t i) { @@ -158,7 +161,7 @@ void MKLDNNMathNode::execute(mkldnn::stream strm) { dst_data[i] = (x > 0.0f) ? (gamma * x) : (gamma * alpha * (exp(x) - 1.0f)); }); break; - case ov::intel_cpu::MathSign: + case Algorithm::MathSign: parallel_for(dataSize, [&](size_t i) { if (src_data[i] > 0.0f) dst_data[i] = 1.0f; @@ -168,28 +171,28 @@ void MKLDNNMathNode::execute(mkldnn::stream strm) { dst_data[i] = 0.0f; }); break; - case ov::intel_cpu::MathSin: + case Algorithm::MathSin: parallel_for(dataSize, [&](size_t i) { dst_data[i] = sinf(src_data[i]); }); break; - case ov::intel_cpu::MathSinh: + case Algorithm::MathSinh: parallel_for(dataSize, [&](size_t i) { dst_data[i] = sinhf(src_data[i]); }); break; - case ov::intel_cpu::MathSoftPlus: + case Algorithm::MathSoftPlus: parallel_for(dataSize, [&](size_t i) { dst_data[i] = logf(expf(src_data[i]) + 1); }); break; - case ov::intel_cpu::MathSoftsign: + case Algorithm::MathSoftsign: parallel_for(dataSize, [&](size_t i) { float x = src_data[i]; dst_data[i] = x / (1.f + (std::abs)(x)); }); break; - case ov::intel_cpu::MathTan: + case Algorithm::MathTan: parallel_for(dataSize, [&](size_t i) { dst_data[i] = tanf(src_data[i]); }); @@ -199,75 +202,77 @@ void MKLDNNMathNode::execute(mkldnn::stream strm) { } } -bool MKLDNNMathNode::created() const { - return getType() == Math; +bool Math::created() const { + return getType() == Type::Math; } -std::map&, MKLDNNMathNode& node)>> MKLDNNMathNode::initializers { - {ngraph::op::v0::Abs::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNMathNode& node) { - node.algorithm = ov::intel_cpu::MathAbs; +std::map&, Math& node)>> Math::initializers { + {ngraph::op::v0::Abs::get_type_info_static(), [](const std::shared_ptr& op, Math& node) { + node.algorithm = Algorithm::MathAbs; }}, - {ngraph::op::v0::Acos::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNMathNode& node) { - node.algorithm = ov::intel_cpu::MathAcos; + {ngraph::op::v0::Acos::get_type_info_static(), [](const std::shared_ptr& op, Math& node) { + node.algorithm = Algorithm::MathAcos; }}, - {ngraph::op::v3::Acosh::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNMathNode& node) { - node.algorithm = ov::intel_cpu::MathAcosh; + {ngraph::op::v3::Acosh::get_type_info_static(), [](const std::shared_ptr& op, Math& node) { + node.algorithm = Algorithm::MathAcosh; }}, - {ngraph::op::v0::Asin::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNMathNode& node) { - node.algorithm = ov::intel_cpu::MathAsin; + {ngraph::op::v0::Asin::get_type_info_static(), [](const std::shared_ptr& op, Math& node) { + node.algorithm = Algorithm::MathAsin; }}, - {ngraph::op::v3::Asinh::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNMathNode& node) { - node.algorithm = ov::intel_cpu::MathAsinh; + {ngraph::op::v3::Asinh::get_type_info_static(), [](const std::shared_ptr& op, Math& node) { + node.algorithm = Algorithm::MathAsinh; }}, - {ngraph::op::v0::Atan::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNMathNode& node) { - node.algorithm = ov::intel_cpu::MathAtan; + {ngraph::op::v0::Atan::get_type_info_static(), [](const std::shared_ptr& op, Math& node) { + node.algorithm = Algorithm::MathAtan; }}, - {ngraph::op::v0::Ceiling::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNMathNode& node) { - node.algorithm = ov::intel_cpu::MathCeiling; + {ngraph::op::v0::Ceiling::get_type_info_static(), [](const std::shared_ptr& op, Math& node) { + node.algorithm = Algorithm::MathCeiling; }}, - {ngraph::op::v0::Cos::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNMathNode& node) { - node.algorithm = ov::intel_cpu::MathCos; + {ngraph::op::v0::Cos::get_type_info_static(), [](const std::shared_ptr& op, Math& node) { + node.algorithm = Algorithm::MathCos; }}, - {ngraph::op::v0::Cosh::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNMathNode& node) { - node.algorithm = ov::intel_cpu::MathCosh; + {ngraph::op::v0::Cosh::get_type_info_static(), [](const std::shared_ptr& op, Math& node) { + node.algorithm = Algorithm::MathCosh; }}, - {ngraph::op::v0::Floor::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNMathNode& node) { - node.algorithm = ov::intel_cpu::MathFloor; + {ngraph::op::v0::Floor::get_type_info_static(), [](const std::shared_ptr& op, Math& node) { + node.algorithm = Algorithm::MathFloor; }}, - {ngraph::op::v0::HardSigmoid::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNMathNode& node) { - node.algorithm = ov::intel_cpu::MathHardSigmoid; + {ngraph::op::v0::HardSigmoid::get_type_info_static(), [](const std::shared_ptr& op, Math& node) { + node.algorithm = Algorithm::MathHardSigmoid; node.alpha = ngraph::as_type_ptr(op->get_input_node_shared_ptr(1))->cast_vector()[0]; node.beta = ngraph::as_type_ptr(op->get_input_node_shared_ptr(2))->cast_vector()[0]; }}, - {ngraph::op::v0::Log::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNMathNode& node) { - node.algorithm = ov::intel_cpu::MathLog; + {ngraph::op::v0::Log::get_type_info_static(), [](const std::shared_ptr& op, Math& node) { + node.algorithm = Algorithm::MathLog; }}, - {ngraph::op::v0::Negative::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNMathNode& node) { - node.algorithm = ov::intel_cpu::MathNegative; + {ngraph::op::v0::Negative::get_type_info_static(), [](const std::shared_ptr& op, Math& node) { + node.algorithm = Algorithm::MathNegative; }}, - {ngraph::op::v0::Selu::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNMathNode& node) { - node.algorithm = ov::intel_cpu::MathSelu; + {ngraph::op::v0::Selu::get_type_info_static(), [](const std::shared_ptr& op, Math& node) { + node.algorithm = Algorithm::MathSelu; node.alpha = ngraph::as_type_ptr(op->get_input_node_shared_ptr(1))->cast_vector()[0]; node.gamma = ngraph::as_type_ptr(op->get_input_node_shared_ptr(2))->cast_vector()[0]; }}, - {ngraph::op::v0::Sign::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNMathNode& node) { - node.algorithm = ov::intel_cpu::MathSign; + {ngraph::op::v0::Sign::get_type_info_static(), [](const std::shared_ptr& op, Math& node) { + node.algorithm = Algorithm::MathSign; }}, - {ngraph::op::v0::Sin::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNMathNode& node) { - node.algorithm = ov::intel_cpu::MathSin; + {ngraph::op::v0::Sin::get_type_info_static(), [](const std::shared_ptr& op, Math& node) { + node.algorithm = Algorithm::MathSin; }}, - {ngraph::op::v0::Sinh::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNMathNode& node) { - node.algorithm = ov::intel_cpu::MathSinh; + {ngraph::op::v0::Sinh::get_type_info_static(), [](const std::shared_ptr& op, Math& node) { + node.algorithm = Algorithm::MathSinh; }}, - {ngraph::op::v4::SoftPlus::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNMathNode& node) { - node.algorithm = ov::intel_cpu::MathSoftPlus; + {ngraph::op::v4::SoftPlus::get_type_info_static(), [](const std::shared_ptr& op, Math& node) { + node.algorithm = Algorithm::MathSoftPlus; }}, - {ngraph::op::v0::Tan::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNMathNode& node) { - node.algorithm = ov::intel_cpu::MathTan; + {ngraph::op::v0::Tan::get_type_info_static(), [](const std::shared_ptr& op, Math& node) { + node.algorithm = Algorithm::MathTan; }}, - {ngraph::op::v3::Atanh::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNMathNode& node) { - node.algorithm = ov::intel_cpu::MathAtanh; + {ngraph::op::v3::Atanh::get_type_info_static(), [](const std::shared_ptr& op, Math& node) { + node.algorithm = Algorithm::MathAtanh; }} }; -REG_MKLDNN_PRIM_FOR(MKLDNNMathNode, Math); +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/mathematics.h b/src/plugins/intel_cpu/src/nodes/mathematics.h index 81620fb870c..cfb38430778 100644 --- a/src/plugins/intel_cpu/src/nodes/mathematics.h +++ b/src/plugins/intel_cpu/src/nodes/mathematics.h @@ -9,10 +9,11 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNMathNode : public MKLDNNNode { +class Math : public Node { public: - MKLDNNMathNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + Math(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; @@ -26,7 +27,7 @@ public: static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: - static std::map&, MKLDNNMathNode& node)>> initializers; + static std::map&, Math& node)>> initializers; float alpha = 0.0f; float beta = 0.0f; @@ -35,5 +36,6 @@ private: std::string errorPrefix; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/matmul.cpp b/src/plugins/intel_cpu/src/nodes/matmul.cpp index 4152f919ecc..74aa817a11b 100644 --- a/src/plugins/intel_cpu/src/nodes/matmul.cpp +++ b/src/plugins/intel_cpu/src/nodes/matmul.cpp @@ -19,14 +19,17 @@ #include "fake_quantize.h" #include "utils/general_utils.h" #include "memory_desc/cpu_memory_desc_utils.h" -#include +#include #include using namespace mkldnn; -using namespace ov::intel_cpu; using namespace InferenceEngine; +namespace ov { +namespace intel_cpu { +namespace node { namespace { + struct MatMulKey { DnnlMemoryDescCPtr inp0; DnnlMemoryDescCPtr inp1; @@ -80,7 +83,7 @@ bool canBeExecutedInInt8(const Precision& firstInput, const Precision& secondInp } } // namespace -bool MKLDNNMatMulNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MatMul::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { const auto matMul = std::dynamic_pointer_cast(op); if (!matMul) { @@ -107,8 +110,8 @@ bool MKLDNNMatMulNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : - MKLDNNNode(op, eng, cache), withBiases(false) { +MatMul::MatMul(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache) : + Node(op, eng, cache), withBiases(false) { std::string errorMessage; errorPrefix = "MatMul node with name '" + getName() + "'"; @@ -126,17 +129,22 @@ MKLDNNMatMulNode::MKLDNNMatMulNode(const std::shared_ptr& op, cons transposeIn[1] = matMul->get_transpose_b(); } -bool MKLDNNMatMulNode::canFuse(const MKLDNNNodePtr& node) const { +bool MatMul::canFuse(const NodePtr& node) const { // per channel binary post op for rank > 2D is supported only by oneDNN reference implementation because of unusual MatMul channel axis (issue 6669) if (getOutputShapeAtPort(0).getRank() > 2) { - if (const auto* eltwiseNode = dynamic_cast(node.get())) { - if (one_of(eltwiseNode->getAlgorithm(), - EltwiseAdd, EltwiseMultiply, EltwiseSubtract, EltwiseDivide, EltwisePrelu, EltwiseMulAdd, EltwisePowerStatic) && - eltwiseNode->getBroadcastingPolicy() != MKLDNNEltwiseNode::PerTensor) { + if (const auto* eltwiseNode = dynamic_cast(node.get())) { + if (one_of(eltwiseNode->getAlgorithm(), Algorithm::EltwiseAdd, + Algorithm::EltwiseMultiply, + Algorithm::EltwiseSubtract, + Algorithm::EltwiseDivide, + Algorithm::EltwisePrelu, + Algorithm::EltwiseMulAdd, + Algorithm::EltwisePowerStatic) && + eltwiseNode->getBroadcastingPolicy() != Eltwise::PerTensor) { return false; } - } else if (const auto* fakeQuantizeNode = dynamic_cast(node.get())) { - if (fakeQuantizeNode->getBroadcastingPolicy() != MKLDNNFakeQuantizeNode::PerTensor) { + } else if (const auto* fakeQuantizeNode = dynamic_cast(node.get())) { + if (fakeQuantizeNode->getBroadcastingPolicy() != FakeQuantize::PerTensor) { return false; } } @@ -146,14 +154,14 @@ bool MKLDNNMatMulNode::canFuse(const MKLDNNNodePtr& node) const { // Consider the case when Matmul doesn't support execution in int8, but is getting fused with FQ with int8 output. // Then the Matmul will change its output precision to fp32, but the FQ child will still has the int8 input precision. // This information should be propagated! Note that we may need to propagate updated precision to child fused nodes. - if (node->getType() == FakeQuantize && + if (node->getType() == Type::FakeQuantize && one_of(node->getOriginalOutputPrecisionAtPort(0), Precision::I8, Precision::U8) && !canBeExecutedInInt8(getOriginalInputPrecisionAtPort(0), getOriginalInputPrecisionAtPort(1))) return false; return canFuseSimpleOperation(node); } -void MKLDNNMatMulNode::setPostOps(mkldnn::primitive_attr &attr, const VectorDims& dims, bool initWeights = false) { +void MatMul::setPostOps(mkldnn::primitive_attr &attr, const VectorDims& dims, bool initWeights = false) { mkldnn::post_ops ops; auto getBinPostOpShape = [&](){ @@ -165,14 +173,14 @@ void MKLDNNMatMulNode::setPostOps(mkldnn::primitive_attr &attr, const VectorDims }; for (const auto &node : fusedWith) { - if (auto* eltwiseNode = dynamic_cast(node.get())) { - if (eltwiseNode->getMKLDNNAlgorithm() != mkldnn::algorithm::undef) { + if (auto* eltwiseNode = dynamic_cast(node.get())) { + if (eltwiseNode->getOneDnnAlgorithm() != mkldnn::algorithm::undef) { eltwiseNode->appendPostOps(ops, dims, postOpsArgs); } else { eltwiseNode->appendBinPostOps(ops, getBinPostOpShape(), postOpsArgs); } continue; - } else if (auto* fakeQuantizeNode = dynamic_cast(node.get())) { + } else if (auto* fakeQuantizeNode = dynamic_cast(node.get())) { fakeQuantizeNode->appendBinPostOps(ops, getBinPostOpShape(), postOpsArgs); continue; } @@ -183,7 +191,7 @@ void MKLDNNMatMulNode::setPostOps(mkldnn::primitive_attr &attr, const VectorDims attr.set_post_ops(ops); } -MKLDNNNode::AttrPtr MKLDNNMatMulNode::initPrimitiveAttr(const VectorDims &dims) { +Node::AttrPtr MatMul::initPrimitiveAttr(const VectorDims &dims) { auto attr = std::make_shared(mkldnn::primitive_attr()); setPostOps(*attr, dims, true); @@ -191,7 +199,7 @@ MKLDNNNode::AttrPtr MKLDNNMatMulNode::initPrimitiveAttr(const VectorDims &dims) return attr; } -MKLDNNNode::AttrPtr MKLDNNMatMulNode::initPrimitiveAttr() { +Node::AttrPtr MatMul::initPrimitiveAttr() { auto dummyShape = MemoryDescUtils::makeDummyShape(getOutputShapeAtPort(0)); return initPrimitiveAttr(dummyShape.getStaticDims()); } @@ -225,18 +233,18 @@ static VectorDims getStridesAndModifyShape(Shape& shape, const bool transpose) { return strides; } -mkldnn::memory::desc MKLDNNMatMulNode::getBiasDescFrom(const DnnlMemoryDescCPtr outMemDesc) { +mkldnn::memory::desc MatMul::getBiasDescFrom(const DnnlMemoryDescCPtr outMemDesc) { // oneDNN matmul requires shape for bias desc to be the same rank VectorDims biasDims(outMemDesc->getShape().getRank(), 1); const auto outDims = outMemDesc->getShape().getStaticDims(); const auto chIdx = getFusingAxis(); biasDims[chIdx] = outDims[chIdx]; - const auto bdt = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(2)); + const auto bdt = DnnlExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(2)); - return mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(biasDims), bdt, memory::format_tag::any); + return mkldnn::memory::desc(DnnlExtensionUtils::convertToDnnlDims(biasDims), bdt, memory::format_tag::any); } -void MKLDNNMatMulNode::getSupportedDescriptors() { +void MatMul::getSupportedDescriptors() { if (getParentEdges().size() != getOriginalInputsNumber()) IE_THROW() << errorPrefix << " has incorrect number of input edges for layer " << getName(); if (getChildEdges().empty()) @@ -315,7 +323,7 @@ void MKLDNNMatMulNode::getSupportedDescriptors() { createDescriptor({inDataDesc[0], inDataDesc[1]}, {outDataDesc}); } -std::pair MKLDNNMatMulNode::makeDummyInputShapes(const Shape& in0, const Shape& in1) const { +std::pair MatMul::makeDummyInputShapes(const Shape& in0, const Shape& in1) const { if (in0.getRank() < 2 || in1.getRank() < 2) { IE_THROW() << "Can't create dummy inputs with rank less 2"; } @@ -389,7 +397,7 @@ std::pair MKLDNNMatMulNode::makeDummyInputShapes(const Shape& in0, return {Shape(inDims0), Shape(inDims1)}; } -void MKLDNNMatMulNode::createDescriptor(const std::vector& inputDesc, +void MatMul::createDescriptor(const std::vector& inputDesc, const std::vector& outputDesc) { std::shared_ptr matmul_desc; if (withBiases) { @@ -406,7 +414,7 @@ void MKLDNNMatMulNode::createDescriptor(const std::vector& inputD descs.emplace_back(matmul_desc); } -void MKLDNNMatMulNode::initSupportedPrimitiveDescriptors() { +void MatMul::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -444,32 +452,32 @@ void MKLDNNMatMulNode::initSupportedPrimitiveDescriptors() { } } -MemoryDescPtr MKLDNNMatMulNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { +MemoryDescPtr MatMul::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { auto desc = idx > 0 ? primitive_desc_it.weights_desc(idx - 1): primitive_desc_it.src_desc(idx); if (idx < 2) // inputs return std::make_shared( - MKLDNNExtensionUtils::DataTypeToIEPrecision(static_cast(desc.data.data_type)), + DnnlExtensionUtils::DataTypeToIEPrecision(static_cast(desc.data.data_type)), getInputShapeAtPort(idx)); /* provide initial shapes, so hide transpose effect */ else // bias - return MKLDNNExtensionUtils::makeDescriptor(desc); + return DnnlExtensionUtils::makeDescriptor(desc); } -bool MKLDNNMatMulNode::created() const { - return getType() == MatMul; +bool MatMul::created() const { + return getType() == Type::MatMul; } -size_t MKLDNNMatMulNode::getMaxBatch() const { +size_t MatMul::getMaxBatch() const { if (!outputShapes.empty()) return outputShapes[0].getStaticDims()[0]; return 0; } -InferenceEngine::Precision MKLDNNMatMulNode::getRuntimePrecision() const { +InferenceEngine::Precision MatMul::getRuntimePrecision() const { return getMaxPrecision(getInputPrecisions()); } -void MKLDNNMatMulNode::prepareParams() { +void MatMul::prepareParams() { auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); auto& src0MemPtr = getParentEdgeAt(0)->getMemoryPtr(); auto& src1MemPtr = getParentEdgeAt(1)->getMemoryPtr(); @@ -535,7 +543,7 @@ void MKLDNNMatMulNode::prepareParams() { key.out->getDnnlDesc())); } - MKLDNNDescriptor desc(matmul_desc); + DnnlDesriptor desc(matmul_desc); primitive_desc_iterator itpd = desc.createPrimitiveDescriptorIterator(engine, key.attr); matmul::primitive_desc prim_desc; @@ -570,11 +578,11 @@ void MKLDNNMatMulNode::prepareParams() { appendPostOpArgs(*attr, primArgs, postOpsArgs); } -void MKLDNNMatMulNode::executeDynamicImpl(dnnl::stream strm) { - MKLDNNNode::execute(strm); +void MatMul::executeDynamicImpl(dnnl::stream strm) { + Node::execute(strm); } -const std::vector& MKLDNNMatMulNode::getPrimitivesPriority() { +const std::vector& MatMul::getPrimitivesPriority() { std::vector priorities = { impl_desc_type::unknown, impl_desc_type::brgemm_avx512_amx, @@ -611,4 +619,6 @@ const std::vector& MKLDNNMatMulNode::getPrimitivesPriority() { return implPriorities; } -REG_MKLDNN_PRIM_FOR(MKLDNNMatMulNode, MatMul); +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/matmul.h b/src/plugins/intel_cpu/src/nodes/matmul.h index 775f1451fa2..cb6aa05538e 100644 --- a/src/plugins/intel_cpu/src/nodes/matmul.h +++ b/src/plugins/intel_cpu/src/nodes/matmul.h @@ -13,22 +13,23 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNMatMulNode : public MKLDNNNode { +class MatMul : public Node { public: - MKLDNNMatMulNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + MatMul(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override; void createDescriptor(const std::vector& inputDesc, const std::vector& outputDesc) override; void initSupportedPrimitiveDescriptors() override; MemoryDescPtr getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; - bool canFuse(const MKLDNNNodePtr& node) const override; + bool canFuse(const NodePtr& node) const override; bool created() const override; size_t getMaxBatch() const override; InferenceEngine::Precision getRuntimePrecision() const override; - size_t descInputNumbers(MKLDNNDescriptor desc) override { + size_t descInputNumbers(DnnlDesriptor desc) override { return getOriginalInputsNumber(); } @@ -63,5 +64,6 @@ private: DnnlBlockedMemoryDescPtr outDataDesc; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/matrix_nms.cpp b/src/plugins/intel_cpu/src/nodes/matrix_nms.cpp index dab204012dd..83c501ad863 100644 --- a/src/plugins/intel_cpu/src/nodes/matrix_nms.cpp +++ b/src/plugins/intel_cpu/src/nodes/matrix_nms.cpp @@ -14,13 +14,16 @@ #include "ngraph/opsets/opset8.hpp" #include "utils/general_utils.h" -using namespace ov::intel_cpu; using namespace InferenceEngine; +namespace ov { +namespace intel_cpu { +namespace node { + using ngNmsSortResultType = ngraph::op::util::NmsBase::SortResultType; using ngNmseDcayFunction = ngraph::op::v8::MatrixNms::DecayFunction; -bool MKLDNNMatrixNmsNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MatrixNms::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { const auto nms = std::dynamic_pointer_cast(op); if (!nms) { @@ -44,8 +47,8 @@ bool MKLDNNMatrixNmsNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr& cache) - : MKLDNNNode(op, eng, cache) { +MatrixNms::MatrixNms(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr& cache) + : Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -103,7 +106,7 @@ MKLDNNMatrixNmsNode::MKLDNNMatrixNmsNode(const std::shared_ptr& op IE_THROW() << m_errorPrefix << "has unsupported 'scores' input rank: " << scores_dims.size(); } -void MKLDNNMatrixNmsNode::initSupportedPrimitiveDescriptors() { +void MatrixNms::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -125,8 +128,8 @@ void MKLDNNMatrixNmsNode::initSupportedPrimitiveDescriptors() { impl_desc_type::ref_any); } -bool MKLDNNMatrixNmsNode::created() const { - return getType() == MatrixNms; +bool MatrixNms::created() const { + return getType() == Type::MatrixNms; } namespace { @@ -164,7 +167,7 @@ static inline float intersectionOverUnion(const float* bbox1, const float* bbox2 } } // namespace -size_t MKLDNNMatrixNmsNode::nmsMatrix(const float* boxesData, const float* scoresData, BoxInfo* filterBoxes, const int64_t batchIdx, const int64_t classIdx) { +size_t MatrixNms::nmsMatrix(const float* boxesData, const float* scoresData, BoxInfo* filterBoxes, const int64_t batchIdx, const int64_t classIdx) { std::vector candidateIndex(m_numBoxes); std::iota(candidateIndex.begin(), candidateIndex.end(), 0); auto end = std::remove_if(candidateIndex.begin(), candidateIndex.end(), [&scoresData, this](int32_t idx) { @@ -240,7 +243,7 @@ size_t MKLDNNMatrixNmsNode::nmsMatrix(const float* boxesData, const float* score return numDet; } -void MKLDNNMatrixNmsNode::prepareParams() { +void MatrixNms::prepareParams() { const auto& boxes_dims = getParentEdgeAt(NMS_BOXES)->getMemory().getStaticDims(); const auto& scores_dims = getParentEdgeAt(NMS_SCORES)->getMemory().getStaticDims(); if (!(boxes_dims[0] == scores_dims[0] && boxes_dims[1] == scores_dims[2])) { @@ -281,11 +284,11 @@ void MKLDNNMatrixNmsNode::prepareParams() { } } -bool MKLDNNMatrixNmsNode::isExecutable() const { - return isDynamicNode() || MKLDNNNode::isExecutable(); +bool MatrixNms::isExecutable() const { + return isDynamicNode() || Node::isExecutable(); } -void MKLDNNMatrixNmsNode::executeDynamicImpl(mkldnn::stream strm) { +void MatrixNms::executeDynamicImpl(mkldnn::stream strm) { if (hasEmptyInputTensors()) { redefineOutputMemory({{0, 6}, {0, 1}, {0}}); return; @@ -293,7 +296,7 @@ void MKLDNNMatrixNmsNode::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -void MKLDNNMatrixNmsNode::execute(mkldnn::stream strm) { +void MatrixNms::execute(mkldnn::stream strm) { const float* boxes = reinterpret_cast(getParentEdgeAt(NMS_BOXES)->getMemoryPtr()->GetPtr()); const float* scores = reinterpret_cast(getParentEdgeAt(NMS_SCORES)->getMemoryPtr()->GetPtr()); @@ -405,9 +408,11 @@ void MKLDNNMatrixNmsNode::execute(mkldnn::stream strm) { } } -void MKLDNNMatrixNmsNode::checkPrecision(const Precision prec, const std::vector precList, const std::string name, const std::string type) { +void MatrixNms::checkPrecision(const Precision prec, const std::vector precList, const std::string name, const std::string type) { if (std::find(precList.begin(), precList.end(), prec) == precList.end()) IE_THROW() << m_errorPrefix << "has unsupported '" << name << "' " << type << " precision: " << prec; } -REG_MKLDNN_PRIM_FOR(MKLDNNMatrixNmsNode, MatrixNms); +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/matrix_nms.h b/src/plugins/intel_cpu/src/nodes/matrix_nms.h index 2041caa5dcf..604431d5def 100644 --- a/src/plugins/intel_cpu/src/nodes/matrix_nms.h +++ b/src/plugins/intel_cpu/src/nodes/matrix_nms.h @@ -13,6 +13,7 @@ namespace ov { namespace intel_cpu { +namespace node { enum class MatrixNmsSortResultType { CLASSID, // sort selected boxes by class id (ascending) in each batch element @@ -22,9 +23,9 @@ enum class MatrixNmsSortResultType { enum MatrixNmsDecayFunction { GAUSSIAN, LINEAR }; -class MKLDNNMatrixNmsNode : public MKLDNNNode { +class MatrixNms : public Node { public: - MKLDNNMatrixNmsNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr& cache); + MatrixNms(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr& cache); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; @@ -103,5 +104,6 @@ private: size_t nmsMatrix(const float* boxesData, const float* scoresData, BoxInfo* filterBoxes, const int64_t batchIdx, const int64_t classIdx); }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/memory.cpp b/src/plugins/intel_cpu/src/nodes/memory.cpp index 27d43818922..d78fc818a5f 100644 --- a/src/plugins/intel_cpu/src/nodes/memory.cpp +++ b/src/plugins/intel_cpu/src/nodes/memory.cpp @@ -4,7 +4,7 @@ #include #include -#include +#include #include "memory.hpp" #include "common/cpu_memcpy.h" #include "utils/general_utils.h" @@ -12,12 +12,15 @@ #include "utils/ngraph_utils.hpp" using namespace mkldnn; -using namespace ov::intel_cpu; using namespace InferenceEngine; -std::mutex MKLDNNMemoryNodeVirtualEdge::holderMutex; +namespace ov { +namespace intel_cpu { +namespace node { -MKLDNNMemoryNode::MKLDNNMemoryNode(const std::shared_ptr& op) { +std::mutex MemoryNodeVirtualEdge::holderMutex; + +MemoryNode::MemoryNode(const std::shared_ptr& op) { if (auto assignOp = std::dynamic_pointer_cast(op)) { _id = assignOp->get_variable_id(); } else if (auto readValueOp = std::dynamic_pointer_cast(op)) { @@ -25,14 +28,14 @@ MKLDNNMemoryNode::MKLDNNMemoryNode(const std::shared_ptr& op) { } } -bool MKLDNNMemoryOutputNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MemoryOutput::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { if (isDynamicNgraphNode(op)) { errorMessage = "Doesn't support op with dynamic shapes"; return false; } - if (!ov::intel_cpu::one_of(op->get_type_info(), + if (!one_of(op->get_type_info(), ngraph::op::v3::Assign::get_type_info_static(), ngraph::op::v6::Assign::get_type_info_static())) { errorMessage = "Node is not an instance of Assign from the operation set v3 or v6."; @@ -44,24 +47,24 @@ bool MKLDNNMemoryOutputNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) - : MKLDNNNode(op, eng, cache) , MKLDNNMemoryNode(op) { +MemoryOutput::MemoryOutput(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache) + : Node(op, eng, cache) , MemoryNode(op) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; } if (created()) { - holder = MKLDNNMemoryNodeVirtualEdge::registerOutput(this); + holder = MemoryNodeVirtualEdge::registerOutput(this); } } -MKLDNNMemoryOutputNode::~MKLDNNMemoryOutputNode() { - MKLDNNMemoryNodeVirtualEdge::remove(this, holder); +MemoryOutput::~MemoryOutput() { + MemoryNodeVirtualEdge::remove(this, holder); } -void MKLDNNMemoryOutputNode::getSupportedDescriptors() {} +void MemoryOutput::getSupportedDescriptors() {} -void MKLDNNMemoryOutputNode::initSupportedPrimitiveDescriptors() { +void MemoryOutput::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -75,22 +78,22 @@ void MKLDNNMemoryOutputNode::initSupportedPrimitiveDescriptors() { supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown); } -void MKLDNNMemoryOutputNode::execute(mkldnn::stream strm) { +void MemoryOutput::execute(mkldnn::stream strm) { auto& srcMemory = getParentEdgeAt(0)->getMemory(); - auto inputMemoryNode = dynamic_cast(inputNode); + auto inputMemoryNode = dynamic_cast(inputNode); IE_ASSERT(inputMemoryNode != nullptr); inputMemoryNode->storeState(srcMemory); } -bool MKLDNNMemoryInputNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MemoryInput::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { if (isDynamicNgraphNode(op)) { errorMessage = "Doesn't support op with dynamic shapes"; return false; } - if (!ov::intel_cpu::one_of(op->get_type_info(), + if (!one_of(op->get_type_info(), ngraph::op::v3::ReadValue::get_type_info_static(), ngraph::op::v6::ReadValue::get_type_info_static())) { errorMessage = "Node is not an instance of ReadValue from the operation set v3 or v6."; @@ -102,19 +105,19 @@ bool MKLDNNMemoryInputNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) - : MKLDNNInputNode(op, eng, cache), MKLDNNMemoryNode(op), dataStore(new MKLDNNMemory{eng}) { +MemoryInput::MemoryInput(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache) + : Input(op, eng, cache), MemoryNode(op), dataStore(new Memory{eng}) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; } if (created()) { - holder = MKLDNNMemoryNodeVirtualEdge::registerInput(this); + holder = MemoryNodeVirtualEdge::registerInput(this); } } -void MKLDNNMemoryInputNode::createPrimitive() { - MKLDNNInputNode::createPrimitive(); +void MemoryInput::createPrimitive() { + Input::createPrimitive(); dataStore->Create(getChildEdgeAt(0)->getMemory().getDesc()); @@ -130,46 +133,46 @@ void MKLDNNMemoryInputNode::createPrimitive() { * @param src source memory object */ inline -static void simple_copy(const MKLDNNMemory& dst, const MKLDNNMemory& src) { +static void simple_copy(const Memory& dst, const Memory& src) { auto srcPtr = static_cast(src.GetPtr()); auto dstPtr = static_cast(dst.GetPtr()); auto srcSizeInByte = src.GetSize(); auto dstSizeInByte = dst.GetSize(); - IE_ASSERT(srcSizeInByte == dstSizeInByte) << "Memory objects are not compatible. Has different sizes."; + IE_ASSERT(srcSizeInByte == dstSizeInByte) << "MemoryNode objects are not compatible. Has different sizes."; cpu_memcpy(dstPtr, srcPtr, srcSizeInByte); } -MKLDNNMemoryInputNode::~MKLDNNMemoryInputNode() { - MKLDNNMemoryNodeVirtualEdge::remove(this, holder); +MemoryInput::~MemoryInput() { + MemoryNodeVirtualEdge::remove(this, holder); } -MKLDNNMemoryPtr MKLDNNMemoryInputNode::getStore() { +MemoryPtr MemoryInput::getStore() { return dataStore; } -void MKLDNNMemoryInputNode::storeState(const MKLDNNMemory &new_state) { +void MemoryInput::storeState(const Memory &new_state) { // TODO: Should be next one call: // dataStore.SetData(new_state, false); // But because of performance reason we use simple manual copy simple_copy(*dataStore, new_state); } -void MKLDNNMemoryInputNode::execute(mkldnn::stream strm) { +void MemoryInput::execute(mkldnn::stream strm) { // TODO: Should be simple call of: // dst_mem.SetData(dataStore, false); // But because of performance reason we use simple manual copy simple_copy(getChildEdgeAt(0)->getMemory(), *dataStore); } -MKLDNNMemoryNodeVirtualEdge::Holder* MKLDNNMemoryNodeVirtualEdge::registerInput(MKLDNNMemoryInputNode * node) { - std::lock_guard lock{MKLDNNMemoryNodeVirtualEdge::holderMutex}; +MemoryNodeVirtualEdge::Holder* MemoryNodeVirtualEdge::registerInput(MemoryInput * node) { + std::lock_guard lock{MemoryNodeVirtualEdge::holderMutex}; // in case of output already registered - auto& holder = MKLDNNMemoryNodeVirtualEdge::getExisted(); - auto sibling = MKLDNNMemoryNodeVirtualEdge::getByName(holder, node->getId()); + auto& holder = MemoryNodeVirtualEdge::getExisted(); + auto sibling = MemoryNodeVirtualEdge::getByName(holder, node->getId()); if (sibling != nullptr) { - auto outputNode = dynamic_cast(sibling); + auto outputNode = dynamic_cast(sibling); IE_ASSERT(outputNode != nullptr); outputNode->setInputNode(node); } else { @@ -178,13 +181,13 @@ MKLDNNMemoryNodeVirtualEdge::Holder* MKLDNNMemoryNodeVirtualEdge::registerInput( return &holder; } -MKLDNNMemoryNodeVirtualEdge::Holder* MKLDNNMemoryNodeVirtualEdge::registerOutput(MKLDNNMemoryOutputNode * node) { - std::lock_guard lock{MKLDNNMemoryNodeVirtualEdge::holderMutex}; +MemoryNodeVirtualEdge::Holder* MemoryNodeVirtualEdge::registerOutput(MemoryOutput * node) { + std::lock_guard lock{MemoryNodeVirtualEdge::holderMutex}; // in case of output layer - auto& holder = MKLDNNMemoryNodeVirtualEdge::getExisted(); - auto sibling = MKLDNNMemoryNodeVirtualEdge::getByName(holder, node->getId()); + auto& holder = MemoryNodeVirtualEdge::getExisted(); + auto sibling = MemoryNodeVirtualEdge::getByName(holder, node->getId()); if (sibling != nullptr) { - auto inputNode = dynamic_cast(sibling); + auto inputNode = dynamic_cast(sibling); IE_ASSERT(inputNode != nullptr); node->setInputNode(inputNode); } else { @@ -193,8 +196,8 @@ MKLDNNMemoryNodeVirtualEdge::Holder* MKLDNNMemoryNodeVirtualEdge::registerOutput return &holder; } -void MKLDNNMemoryNodeVirtualEdge::remove(MKLDNNMemoryNode * node, Holder* holder) { - std::lock_guard lock{MKLDNNMemoryNodeVirtualEdge::holderMutex}; +void MemoryNodeVirtualEdge::remove(MemoryNode * node, Holder* holder) { + std::lock_guard lock{MemoryNodeVirtualEdge::holderMutex}; if (nullptr != holder) { InferenceEngine::details::erase_if(*holder, [&](const Holder::value_type & it){ return it.second == node; @@ -202,5 +205,6 @@ void MKLDNNMemoryNodeVirtualEdge::remove(MKLDNNMemoryNode * node, Holder* holder } } -REG_MKLDNN_PRIM_FOR(MKLDNNMemoryInputNode, MemoryInput); -REG_MKLDNN_PRIM_FOR(MKLDNNMemoryOutputNode, MemoryOutput); +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/memory.hpp b/src/plugins/intel_cpu/src/nodes/memory.hpp index 4ad9f2c3fc7..2afa724481e 100644 --- a/src/plugins/intel_cpu/src/nodes/memory.hpp +++ b/src/plugins/intel_cpu/src/nodes/memory.hpp @@ -5,6 +5,7 @@ #pragma once #include +#include #include "ie_algorithm.hpp" #include "input.h" #include @@ -14,35 +15,37 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNMemoryNode { +class MemoryNode { std::string _id; public: - explicit MKLDNNMemoryNode(std::string id) : _id(id) {} - explicit MKLDNNMemoryNode(const std::shared_ptr& op); - virtual ~MKLDNNMemoryNode() = default; + explicit MemoryNode(std::string id) : _id(id) {} + explicit MemoryNode(const std::shared_ptr& op); + virtual ~MemoryNode() = default; std::string getId() { return _id; } - virtual void setInputNode(MKLDNNNode *) = 0; + virtual void setInputNode(Node *) = 0; }; -class MKLDNNMemoryOutputNode; -class MKLDNNMemoryInputNode; + +class MemoryOutput; +class MemoryInput; /** * @brief * TODO: ATTENTION: this is a temporary solution, this connection should be keep in graph * WARNING: thread_local and holderMutex are not needed if moved into graph */ -class MKLDNNMemoryNodeVirtualEdge { - public: - using Holder = std::map; +class MemoryNodeVirtualEdge { +public: + using Holder = std::map; static Holder & getExisted() { thread_local static Holder existed; return existed; } - static MKLDNNMemoryNode * getByName(Holder& holder, std::string name) { + static MemoryNode * getByName(Holder& holder, std::string name) { auto result = holder.find(name); if (result != holder.end()) { return result->second; @@ -50,26 +53,26 @@ class MKLDNNMemoryNodeVirtualEdge { return nullptr; } - static Holder* registerOutput(MKLDNNMemoryOutputNode * node); - static Holder* registerInput(MKLDNNMemoryInputNode * node); - static void remove(MKLDNNMemoryNode * node, Holder* holder); + static Holder* registerOutput(MemoryOutput * node); + static Holder* registerInput(MemoryInput * node); + static void remove(MemoryNode * node, Holder* holder); static std::mutex holderMutex; }; -class MKLDNNMemoryOutputNode : public MKLDNNNode, public MKLDNNMemoryNode { - public: - MKLDNNMemoryOutputNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); - ~MKLDNNMemoryOutputNode() override; +class MemoryOutput : public Node, public MemoryNode { +public: + MemoryOutput(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); + ~MemoryOutput() override; static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; void createPrimitive() override {} void execute(mkldnn::stream strm) override; bool created() const override { - return getType() == MemoryOutput; + return getType() == Type::MemoryOutput; } - void setInputNode(MKLDNNNode* node) override { + void setInputNode(Node* node) override { inputNode = node; } @@ -77,18 +80,18 @@ class MKLDNNMemoryOutputNode : public MKLDNNNode, public MKLDNNMemoryNode { /** * @brief keeps reference to input sibling node */ - MKLDNNNode* inputNode = nullptr; - MKLDNNMemoryNodeVirtualEdge::Holder* holder = nullptr; + Node* inputNode = nullptr; + MemoryNodeVirtualEdge::Holder* holder = nullptr; }; -class MKLDNNMemoryInputNode : public MKLDNNInputNode, public MKLDNNMemoryNode { +class MemoryInput : public Input, public MemoryNode { public: - MKLDNNMemoryInputNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); - ~MKLDNNMemoryInputNode() override; + MemoryInput(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); + ~MemoryInput() override; static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; bool created() const override { - return getType() == MemoryInput; + return getType() == Type::MemoryInput; } bool isExecutable() const override { return true; @@ -97,13 +100,14 @@ public: void createPrimitive() override; - void setInputNode(MKLDNNNode* node) override {} - void storeState(const MKLDNNMemory& mem); - MKLDNNMemoryPtr getStore(); + void setInputNode(Node* node) override {} + void storeState(const Memory& mem); + MemoryPtr getStore(); private: - MKLDNNMemoryPtr dataStore; - MKLDNNMemoryNodeVirtualEdge::Holder* holder = nullptr; + MemoryPtr dataStore; + MemoryNodeVirtualEdge::Holder* holder = nullptr; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/multiclass_nms.cpp b/src/plugins/intel_cpu/src/nodes/multiclass_nms.cpp index f59a7b3a13f..0b46e685d2b 100644 --- a/src/plugins/intel_cpu/src/nodes/multiclass_nms.cpp +++ b/src/plugins/intel_cpu/src/nodes/multiclass_nms.cpp @@ -17,12 +17,15 @@ #include "ie_parallel.hpp" #include "utils/general_utils.h" -using namespace ov::intel_cpu; using namespace InferenceEngine; +namespace ov { +namespace intel_cpu { +namespace node { + using ngNmsSortResultType = ngraph::op::util::NmsBase::SortResultType; -bool MKLDNNMultiClassNmsNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MultiClassNms::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { const auto nms = std::dynamic_pointer_cast(op); if (!nms) { @@ -41,8 +44,8 @@ bool MKLDNNMultiClassNmsNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr& cache) - : MKLDNNNode(op, eng, cache) { +MultiClassNms::MultiClassNms(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr& cache) + : Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -83,7 +86,7 @@ MKLDNNMultiClassNmsNode::MKLDNNMultiClassNmsNode(const std::shared_ptrgetMemory().getStaticDims(); const auto& scores_dims = getParentEdgeAt(NMS_SCORES)->getMemory().getStaticDims(); if (!(boxes_dims[0] == scores_dims[0] && boxes_dims[1] == scores_dims[2])) { @@ -138,11 +141,11 @@ void MKLDNNMultiClassNmsNode::prepareParams() { m_numBoxOffset.resize(m_numBatches); } -bool MKLDNNMultiClassNmsNode::isExecutable() const { - return isDynamicNode() || MKLDNNNode::isExecutable(); +bool MultiClassNms::isExecutable() const { + return isDynamicNode() || Node::isExecutable(); } -void MKLDNNMultiClassNmsNode::executeDynamicImpl(mkldnn::stream strm) { +void MultiClassNms::executeDynamicImpl(mkldnn::stream strm) { if (hasEmptyInputTensors()) { redefineOutputMemory({{0, 6}, {0, 1}, {0}}); return; @@ -150,7 +153,7 @@ void MKLDNNMultiClassNmsNode::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -void MKLDNNMultiClassNmsNode::execute(mkldnn::stream strm) { +void MultiClassNms::execute(mkldnn::stream strm) { const float* boxes = reinterpret_cast(getParentEdgeAt(NMS_BOXES)->getMemoryPtr()->GetPtr()); const float* scores = reinterpret_cast(getParentEdgeAt(NMS_SCORES)->getMemoryPtr()->GetPtr()); @@ -299,11 +302,11 @@ void MKLDNNMultiClassNmsNode::execute(mkldnn::stream strm) { } } -bool MKLDNNMultiClassNmsNode::created() const { - return getType() == MulticlassNms; +bool MultiClassNms::created() const { + return getType() == Type::MulticlassNms; } -float MKLDNNMultiClassNmsNode::intersectionOverUnion(const float* boxesI, const float* boxesJ, const bool normalized) { +float MultiClassNms::intersectionOverUnion(const float* boxesI, const float* boxesJ, const bool normalized) { float yminI, xminI, ymaxI, xmaxI, yminJ, xminJ, ymaxJ, xmaxJ; const float norm = static_cast(normalized == false); @@ -327,7 +330,7 @@ float MKLDNNMultiClassNmsNode::intersectionOverUnion(const float* boxesI, const return intersection_area / (areaI + areaJ - intersection_area); } -void MKLDNNMultiClassNmsNode::nmsWithEta(const float* boxes, const float* scores, const SizeVector& boxesStrides, const SizeVector& scoresStrides) { +void MultiClassNms::nmsWithEta(const float* boxes, const float* scores, const SizeVector& boxesStrides, const SizeVector& scoresStrides) { auto less = [](const boxInfo& l, const boxInfo& r) { return l.score < r.score || ((l.score == r.score) && (l.idx > r.idx)); }; @@ -393,7 +396,7 @@ void MKLDNNMultiClassNmsNode::nmsWithEta(const float* boxes, const float* scores }); } -void MKLDNNMultiClassNmsNode::nmsWithoutEta(const float* boxes, const float* scores, const SizeVector& boxesStrides, const SizeVector& scoresStrides) { +void MultiClassNms::nmsWithoutEta(const float* boxes, const float* scores, const SizeVector& boxesStrides, const SizeVector& scoresStrides) { parallel_for2d(m_numBatches, m_numClasses, [&](int batch_idx, int class_idx) { if (class_idx != m_backgroundClass) { const float* boxesPtr = boxes + batch_idx * boxesStrides[0]; @@ -437,9 +440,11 @@ void MKLDNNMultiClassNmsNode::nmsWithoutEta(const float* boxes, const float* sco }); } -void MKLDNNMultiClassNmsNode::checkPrecision(const Precision prec, const std::vector precList, const std::string name, const std::string type) { +void MultiClassNms::checkPrecision(const Precision prec, const std::vector precList, const std::string name, const std::string type) { if (std::find(precList.begin(), precList.end(), prec) == precList.end()) IE_THROW() << m_errorPrefix << "has unsupported '" << name << "' " << type << " precision: " << prec; } -REG_MKLDNN_PRIM_FOR(MKLDNNMultiClassNmsNode, MulticlassNms) +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/multiclass_nms.hpp b/src/plugins/intel_cpu/src/nodes/multiclass_nms.hpp index 08816bc40ea..061bc49b992 100644 --- a/src/plugins/intel_cpu/src/nodes/multiclass_nms.hpp +++ b/src/plugins/intel_cpu/src/nodes/multiclass_nms.hpp @@ -11,6 +11,7 @@ namespace ov { namespace intel_cpu { +namespace node { enum class MulticlassNmsSortResultType { CLASSID, // sort selected boxes by class id (ascending) in each batch element @@ -18,9 +19,9 @@ enum class MulticlassNmsSortResultType { NONE // do not guarantee the order in each batch element }; -class MKLDNNMultiClassNmsNode : public MKLDNNNode { +class MultiClassNms : public Node { public: - MKLDNNMultiClassNmsNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr& cache); + MultiClassNms(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr& cache); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; @@ -98,5 +99,6 @@ private: const InferenceEngine::SizeVector& scoresStrides); }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/mvn.cpp b/src/plugins/intel_cpu/src/nodes/mvn.cpp index 3d525c3ae24..03182819614 100644 --- a/src/plugins/intel_cpu/src/nodes/mvn.cpp +++ b/src/plugins/intel_cpu/src/nodes/mvn.cpp @@ -10,7 +10,7 @@ #include "fake_quantize.h" #include "eltwise.h" -#include +#include #include "utils/bfloat16.hpp" #include "ie_parallel.hpp" #include "emitters/jit_load_store_emitters.hpp" @@ -27,7 +27,6 @@ #include "utils/cpu_utils.hpp" using namespace mkldnn; -using namespace ov::intel_cpu; using namespace InferenceEngine; using namespace mkldnn::impl; using namespace mkldnn::impl::cpu::x64; @@ -36,9 +35,13 @@ using namespace Xbyak; #define GET_OFF(field) offsetof(jit_mvn_call_args, field) +namespace ov { +namespace intel_cpu { +namespace node { namespace { + struct MVNKey { - MKLDNNMVNNode::MVNAttrs mvnAttrs; + MVN::MVNAttrs mvnAttrs; mkldnn::primitive_attr attr; size_t hash() const; @@ -657,7 +660,7 @@ private: }; ////////////////////////////////////////////////////////////////////////////////// -bool MKLDNNMVNNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MVN::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { if (op->get_output_partial_shape(0).rank().is_dynamic()) { errorMessage = "Unsupported dynamic input rank."; @@ -723,8 +726,8 @@ bool MKLDNNMVNNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) - : MKLDNNNode(op, eng, cache) { +MVN::MVN(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache) + : Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -750,9 +753,9 @@ MKLDNNMVNNode::MKLDNNMVNNode(const std::shared_ptr& op, const mkld mvnAttrs.execAcrossChannels_ = mvnAttrs.initAcrossChannels_; } -void MKLDNNMVNNode::getSupportedDescriptors() {} +void MVN::getSupportedDescriptors() {} -void MKLDNNMVNNode::initSupportedPrimitiveDescriptors() { +void MVN::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -832,12 +835,12 @@ void MKLDNNMVNNode::initSupportedPrimitiveDescriptors() { pushDesc(LayoutType::ncsp, impl_type); } -MKLDNNMVNNode::MVNExecutor::MVNExecutor(const MVNAttrs& mvnAttrs) +MVN::MVNExecutor::MVNExecutor(const MVNAttrs& mvnAttrs) : mvnAttrs(mvnAttrs), src_data_size(mvnAttrs.src_prc.size()), dst_data_size(mvnAttrs.dst_prc.size()) {} -MKLDNNMVNNode::MVNJitExecutor::MVNJitExecutor(const MVNAttrs& mvnAttrs, +MVN::MVNJitExecutor::MVNJitExecutor(const MVNAttrs& mvnAttrs, const mkldnn::primitive_attr& attr): MVNExecutor(mvnAttrs) { auto jcp = jit_mvn_config_params(); @@ -886,7 +889,7 @@ MKLDNNMVNNode::MVNJitExecutor::MVNJitExecutor(const MVNAttrs& mvnAttrs, mvn_variance_kernel->create_ker(); } -void MKLDNNMVNNode::MVNJitExecutor::exec(const uint8_t *src_data, uint8_t *dst_data, const void *post_ops_data_) { +void MVN::MVNJitExecutor::exec(const uint8_t *src_data, uint8_t *dst_data, const void *post_ops_data_) { if (!mvn_mean_kernel || (mvnAttrs.normalizeVariance_ && !mvn_variance_kernel) || !mvn_kernel) { IE_THROW() << "MVN layer doesn't create kernel to execute on sse41 above platform."; } @@ -897,13 +900,13 @@ void MKLDNNMVNNode::MVNJitExecutor::exec(const uint8_t *src_data, uint8_t *dst_d } } -MKLDNNMVNNode::MVNRefExecutor::MVNRefExecutor(const MVNAttrs& mvnAttrs):MVNExecutor(mvnAttrs) {} +MVN::MVNRefExecutor::MVNRefExecutor(const MVNAttrs& mvnAttrs):MVNExecutor(mvnAttrs) {} -void MKLDNNMVNNode::MVNRefExecutor::exec(const uint8_t *src_data, uint8_t *dst_data, const void *post_ops_data_) { +void MVN::MVNRefExecutor::exec(const uint8_t *src_data, uint8_t *dst_data, const void *post_ops_data_) { mvn_ref(src_data, dst_data); } -void MKLDNNMVNNode::prepareParams() { +void MVN::prepareParams() { auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) @@ -945,7 +948,7 @@ void MKLDNNMVNNode::prepareParams() { execPtr = result.first; } -void MKLDNNMVNNode::transformTo5DCase(const SizeVector& shape) { +void MVN::transformTo5DCase(const SizeVector& shape) { switch (shape.size()) { // for 1 and 2 rank, if initAcrossChannels_ is true, adjust shape to fully vectorize under unified 5d procedure. // otherwise there are not enough data in spatial dimension to process in one kernel. @@ -974,20 +977,20 @@ void MKLDNNMVNNode::transformTo5DCase(const SizeVector& shape) { } } -void MKLDNNMVNNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeights) { +void MVN::setPostOps(mkldnn::primitive_attr &attr, bool initWeights) { mkldnn::post_ops ops; VectorDims postOpDims(5); std::tie(postOpDims[0], postOpDims[1], postOpDims[2], postOpDims[3], postOpDims[4]) = mvnAttrs.shape5D; postOpsDataPtrs.clear(); for (auto &node : fusedWith) { - auto* fakeQuantizeNode = dynamic_cast(node.get()); + auto* fakeQuantizeNode = dynamic_cast(node.get()); if (fakeQuantizeNode) { fakeQuantizeNode->appendPostOps(ops, {}, postOpsDataPtrs); continue; } - auto* eltwiseNode = dynamic_cast(node.get()); + auto* eltwiseNode = dynamic_cast(node.get()); if (eltwiseNode) { eltwiseNode->appendPostOps(ops, postOpDims, postOpsDataPtrs); continue; @@ -997,11 +1000,11 @@ void MKLDNNMVNNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeights) { attr.set_post_ops(ops); } -void MKLDNNMVNNode::executeDynamicImpl(mkldnn::stream strm) { +void MVN::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -void MKLDNNMVNNode::execute(mkldnn::stream strm) { +void MVN::execute(mkldnn::stream strm) { if (!execPtr) { IE_THROW() << "Can't execute MVN node. Primitive didn't created"; } @@ -1013,7 +1016,7 @@ void MKLDNNMVNNode::execute(mkldnn::stream strm) { execPtr->exec(src_data, dst_data, postOpsDataPtrs.data()); } -void MKLDNNMVNNode::MVNJitExecutor::mvn_pln(const uint8_t* src_data, uint8_t* dst_data, const void *post_ops_data_) { +void MVN::MVNJitExecutor::mvn_pln(const uint8_t* src_data, uint8_t* dst_data, const void *post_ops_data_) { size_t blk_size = 1; // blk size in vmm if (mayiuse(cpu::x64::avx512_common)) { blk_size = 16; @@ -1153,7 +1156,7 @@ void MKLDNNMVNNode::MVNJitExecutor::mvn_pln(const uint8_t* src_data, uint8_t* ds } } -void MKLDNNMVNNode::MVNRefExecutor::mvn_ref(const uint8_t* src_data, uint8_t* dst_data) { +void MVN::MVNRefExecutor::mvn_ref(const uint8_t* src_data, uint8_t* dst_data) { const float *src_data_ptr = reinterpret_cast(src_data); float *dst_data_ptr = reinterpret_cast(dst_data); size_t N = 0; size_t C = 0; size_t D = 0; size_t H = 0; size_t W = 0; @@ -1251,7 +1254,7 @@ void MKLDNNMVNNode::MVNRefExecutor::mvn_ref(const uint8_t* src_data, uint8_t* ds } } -void MKLDNNMVNNode::MVNJitExecutor::mvn_blk(const uint8_t* src_data, uint8_t* dst_data, const void *post_ops_data_) { +void MVN::MVNJitExecutor::mvn_blk(const uint8_t* src_data, uint8_t* dst_data, const void *post_ops_data_) { size_t blk_size = 1; // channel blk for memory layout if (mayiuse(cpu::x64::avx512_common)) { blk_size = 16; @@ -1493,16 +1496,28 @@ void MKLDNNMVNNode::MVNJitExecutor::mvn_blk(const uint8_t* src_data, uint8_t* ds } } -bool MKLDNNMVNNode::canFuse(const MKLDNNNodePtr& node) const { +bool MVN::canFuse(const NodePtr& node) const { if (!mayiuse(cpu::x64::sse41)) { return false; } // limit post ops to unary when shape transformed on channel // 1D only fused with unary int inputRank = getInputShapeAtPort(0).getRank(); - bool unaryEltwise = one_of(node->getAlgorithm(), EltwiseRelu, EltwiseGelu, EltwiseElu, EltwiseSigmoid, EltwiseClamp, EltwiseTanh, - EltwiseSwish, EltwiseHswish, EltwiseMish, EltwiseHsigmoid, EltwiseRoundHalfToEven, - EltwiseRoundHalfAwayFromZero, EltwiseAbs, EltwiseSqrt, EltwiseSoftRelu); + bool unaryEltwise = one_of(node->getAlgorithm(), Algorithm::EltwiseRelu, + Algorithm::EltwiseGelu, + Algorithm::EltwiseElu, + Algorithm::EltwiseSigmoid, + Algorithm::EltwiseClamp, + Algorithm::EltwiseTanh, + Algorithm::EltwiseSwish, + Algorithm::EltwiseHswish, + Algorithm::EltwiseMish, + Algorithm::EltwiseHsigmoid, + Algorithm::EltwiseRoundHalfToEven, + Algorithm::EltwiseRoundHalfAwayFromZero, + Algorithm::EltwiseAbs, + Algorithm::EltwiseSqrt, + Algorithm::EltwiseSoftRelu); if ((inputRank == 1 && !unaryEltwise) || (inputRank == 2 && !unaryEltwise && mvnAttrs.initAcrossChannels_)) { return false; @@ -1511,8 +1526,10 @@ bool MKLDNNMVNNode::canFuse(const MKLDNNNodePtr& node) const { return canFuseSimpleOperation(node); } -bool MKLDNNMVNNode::created() const { - return getType() == MVN; +bool MVN::created() const { + return getType() == Type::MVN; } -REG_MKLDNN_PRIM_FOR(MKLDNNMVNNode, MVN); +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/mvn.h b/src/plugins/intel_cpu/src/nodes/mvn.h index f1068e6d453..965296224d9 100644 --- a/src/plugins/intel_cpu/src/nodes/mvn.h +++ b/src/plugins/intel_cpu/src/nodes/mvn.h @@ -12,6 +12,7 @@ namespace ov { namespace intel_cpu { +namespace node { struct jit_mvn_config_params { bool planar_layout; @@ -70,9 +71,9 @@ struct jit_uni_mvn_kernel { const mkldnn_primitive_attr &attr_; }; -class MKLDNNMVNNode : public MKLDNNNode { +class MVN : public Node { public: - MKLDNNMVNNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + MVN(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; void getSupportedDescriptors() override; @@ -92,7 +93,7 @@ public: return mvnAttrs.normalizeVariance_; } - bool canFuse(const MKLDNNNodePtr& node) const override; + bool canFuse(const NodePtr& node) const override; void prepareParams() override; // Defines way to add epsilon: inside sqrt or outside. @@ -168,5 +169,6 @@ private: }; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/non_max_suppression.cpp b/src/plugins/intel_cpu/src/nodes/non_max_suppression.cpp index 3458ce2223a..247ae52f83d 100644 --- a/src/plugins/intel_cpu/src/nodes/non_max_suppression.cpp +++ b/src/plugins/intel_cpu/src/nodes/non_max_suppression.cpp @@ -19,7 +19,6 @@ #include "emitters/jit_load_store_emitters.hpp" #include -using namespace ov::intel_cpu; using namespace InferenceEngine; using namespace mkldnn; using namespace mkldnn::impl; @@ -29,6 +28,10 @@ using namespace Xbyak; #define GET_OFF(field) offsetof(jit_nms_args, field) +namespace ov { +namespace intel_cpu { +namespace node { + template struct jit_uni_nms_kernel_f32 : public jit_uni_nms_kernel, public jit_generator { DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_uni_nms_kernel_f32) @@ -546,7 +549,7 @@ private: } }; -bool MKLDNNNonMaxSuppressionNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool NonMaxSuppression::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { // TODO [DS NMS]: remove when nodes from models where nms is not last node in model supports DS using NonMaxSuppressionV5 = ngraph::op::v5::NonMaxSuppression; @@ -569,8 +572,8 @@ bool MKLDNNNonMaxSuppressionNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache), isSoftSuppressedByIOU(true) { +NonMaxSuppression::NonMaxSuppression(const std::shared_ptr& op, const mkldnn::engine& eng, + WeightsSharing::Ptr &cache) : Node(op, eng, cache), isSoftSuppressedByIOU(true) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -613,7 +616,7 @@ MKLDNNNonMaxSuppressionNode::MKLDNNNonMaxSuppressionNode(const std::shared_ptrgetMemory().getStaticDims() : getInputShapeAtPort(NMS_BOXES).getStaticDims(); const auto& scoresDims = isDynamicNode() ? getParentEdgesAtPort(NMS_SCORES)[0]->getMemory().getStaticDims() : @@ -689,11 +692,11 @@ void MKLDNNNonMaxSuppressionNode::prepareParams() { i.resize(numClasses); } -bool MKLDNNNonMaxSuppressionNode::isExecutable() const { - return isDynamicNode() || MKLDNNNode::isExecutable(); +bool NonMaxSuppression::isExecutable() const { + return isDynamicNode() || Node::isExecutable(); } -void MKLDNNNonMaxSuppressionNode::createJitKernel() { +void NonMaxSuppression::createJitKernel() { auto jcp = jit_nms_config_params(); jcp.box_encode_type = boxEncodingType; jcp.is_soft_suppressed_by_iou = isSoftSuppressedByIOU; @@ -710,7 +713,7 @@ void MKLDNNNonMaxSuppressionNode::createJitKernel() { nms_kernel->create_ker(); } -void MKLDNNNonMaxSuppressionNode::executeDynamicImpl(mkldnn::stream strm) { +void NonMaxSuppression::executeDynamicImpl(mkldnn::stream strm) { if (hasEmptyInputTensors() || (inputShapes.size() > NMS_MAXOUTPUTBOXESPERCLASS && reinterpret_cast(getParentEdgeAt(NMS_MAXOUTPUTBOXESPERCLASS)->getMemoryPtr()->GetPtr())[0] == 0)) { redefineOutputMemory({{0, 3}, {0, 3}, {1}}); @@ -720,7 +723,7 @@ void MKLDNNNonMaxSuppressionNode::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -void MKLDNNNonMaxSuppressionNode::execute(mkldnn::stream strm) { +void NonMaxSuppression::execute(mkldnn::stream strm) { const float *boxes = reinterpret_cast(getParentEdgeAt(NMS_BOXES)->getMemoryPtr()->GetPtr()); const float *scores = reinterpret_cast(getParentEdgeAt(NMS_SCORES)->getMemoryPtr()->GetPtr()); @@ -822,11 +825,11 @@ void MKLDNNNonMaxSuppressionNode::execute(mkldnn::stream strm) { *valid_outputs = static_cast(validOutputs); } -bool MKLDNNNonMaxSuppressionNode::created() const { - return getType() == NonMaxSuppression; +bool NonMaxSuppression::created() const { + return getType() == Type::NonMaxSuppression; } -float MKLDNNNonMaxSuppressionNode::intersectionOverUnion(const float *boxesI, const float *boxesJ) { +float NonMaxSuppression::intersectionOverUnion(const float *boxesI, const float *boxesJ) { float yminI, xminI, ymaxI, xmaxI, yminJ, xminJ, ymaxJ, xmaxJ; if (boxEncodingType == NMSBoxEncodeType::CENTER) { // box format: x_center, y_center, width, height @@ -861,7 +864,7 @@ float MKLDNNNonMaxSuppressionNode::intersectionOverUnion(const float *boxesI, co return intersection_area / (areaI + areaJ - intersection_area); } -void MKLDNNNonMaxSuppressionNode::nmsWithSoftSigma(const float *boxes, const float *scores, const VectorDims &boxesStrides, +void NonMaxSuppression::nmsWithSoftSigma(const float *boxes, const float *scores, const VectorDims &boxesStrides, const VectorDims &scoresStrides, std::vector &filtBoxes) { auto less = [](const boxInfo& l, const boxInfo& r) { return l.score < r.score || ((l.score == r.score) && (l.idx > r.idx)); @@ -983,7 +986,7 @@ void MKLDNNNonMaxSuppressionNode::nmsWithSoftSigma(const float *boxes, const flo }); } -void MKLDNNNonMaxSuppressionNode::nmsWithoutSoftSigma(const float *boxes, const float *scores, const VectorDims &boxesStrides, +void NonMaxSuppression::nmsWithoutSoftSigma(const float *boxes, const float *scores, const VectorDims &boxesStrides, const VectorDims &scoresStrides, std::vector &filtBoxes) { int max_out_box = static_cast(maxOutputBoxesPerClass); parallel_for2d(numBatches, numClasses, [&](int batch_idx, int class_idx) { @@ -1070,13 +1073,13 @@ void MKLDNNNonMaxSuppressionNode::nmsWithoutSoftSigma(const float *boxes, const }); } -void MKLDNNNonMaxSuppressionNode::checkPrecision(const Precision& prec, const std::vector& precList, +void NonMaxSuppression::checkPrecision(const Precision& prec, const std::vector& precList, const std::string& name, const std::string& type) { if (std::find(precList.begin(), precList.end(), prec) == precList.end()) IE_THROW() << errorPrefix << "has unsupported '" << name << "' " << type << " precision: " << prec; } -void MKLDNNNonMaxSuppressionNode::check1DInput(const Shape& shape, const std::vector& precList, +void NonMaxSuppression::check1DInput(const Shape& shape, const std::vector& precList, const std::string& name, const size_t port) { checkPrecision(getOriginalInputPrecisionAtPort(port), precList, name, inType); @@ -1087,7 +1090,7 @@ void MKLDNNNonMaxSuppressionNode::check1DInput(const Shape& shape, const std::ve IE_THROW() << errorPrefix << "has unsupported '" << name << "' input 1st dimension size: " << MemoryDescUtils::dim2str(shape.getDims()[0]); } -void MKLDNNNonMaxSuppressionNode::checkOutput(const Shape& shape, const std::vector& precList, +void NonMaxSuppression::checkOutput(const Shape& shape, const std::vector& precList, const std::string& name, const size_t port) { checkPrecision(getOriginalOutputPrecisionAtPort(port), precList, name, outType); @@ -1097,5 +1100,6 @@ void MKLDNNNonMaxSuppressionNode::checkOutput(const Shape& shape, const std::vec IE_THROW() << errorPrefix << "has unsupported '" << name << "' output 2nd dimension size: " << MemoryDescUtils::dim2str(shape.getDims()[1]); } - -REG_MKLDNN_PRIM_FOR(MKLDNNNonMaxSuppressionNode, NonMaxSuppression) +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/non_max_suppression.h b/src/plugins/intel_cpu/src/nodes/non_max_suppression.h index b9d8e282ac2..5ef7916e28a 100644 --- a/src/plugins/intel_cpu/src/nodes/non_max_suppression.h +++ b/src/plugins/intel_cpu/src/nodes/non_max_suppression.h @@ -16,6 +16,7 @@ using namespace InferenceEngine; namespace ov { namespace intel_cpu { +namespace node { enum class NMSBoxEncodeType { CORNER, @@ -61,9 +62,9 @@ struct jit_uni_nms_kernel { jit_nms_config_params jcp; }; -class MKLDNNNonMaxSuppressionNode : public MKLDNNNode { +class NonMaxSuppression : public Node { public: - MKLDNNNonMaxSuppressionNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + NonMaxSuppression(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; @@ -148,5 +149,6 @@ private: std::shared_ptr nms_kernel; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/non_zero.cpp b/src/plugins/intel_cpu/src/nodes/non_zero.cpp index 6e4291c4774..c3288166e51 100644 --- a/src/plugins/intel_cpu/src/nodes/non_zero.cpp +++ b/src/plugins/intel_cpu/src/nodes/non_zero.cpp @@ -6,10 +6,13 @@ #include #include -using namespace ov::intel_cpu; using namespace InferenceEngine; -bool MKLDNNNonZeroNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +namespace ov { +namespace intel_cpu { +namespace node { + +bool NonZero::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { if (op->get_type_info() != ngraph::op::v3::NonZero::get_type_info_static()) { errorMessage = "Node is not an instance of NonZero from the operation set v3."; @@ -21,8 +24,8 @@ bool MKLDNNNonZeroNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { +NonZero::NonZero(const std::shared_ptr& op, const mkldnn::engine& eng, + WeightsSharing::Ptr &cache) : Node(op, eng, cache) { std::string errorMessage; if (isSupportedOperation(op, errorMessage)) { errorPrefix = "NonZero layer with name '" + getName() + "' "; @@ -34,7 +37,7 @@ MKLDNNNonZeroNode::MKLDNNNonZeroNode(const std::shared_ptr& op, co } } -void MKLDNNNonZeroNode::getSupportedDescriptors() { +void NonZero::getSupportedDescriptors() { if (!descs.empty()) return; if (getParentEdges().size() != 1) @@ -43,7 +46,7 @@ void MKLDNNNonZeroNode::getSupportedDescriptors() { IE_THROW() << errorPrefix << "has incorrect number of output edges: " << getChildEdges().size(); } -void MKLDNNNonZeroNode::initSupportedPrimitiveDescriptors() { +void NonZero::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -59,7 +62,7 @@ void MKLDNNNonZeroNode::initSupportedPrimitiveDescriptors() { } template -size_t MKLDNNNonZeroNode::getNonZeroElementsCount(const T* src, const Shape& inShape) { +size_t NonZero::getNonZeroElementsCount(const T* src, const Shape& inShape) { T zero = 0; size_t count = 0; size_t inSize = inShape.getElementsCount(); @@ -76,21 +79,21 @@ size_t MKLDNNNonZeroNode::getNonZeroElementsCount(const T* src, const Shape& inS } namespace { struct NonZeroContext { - MKLDNNNonZeroNode &node; + NonZero &node; }; } template -struct MKLDNNNonZeroNode::NonZeroExecute { +struct NonZero::NonZeroExecute { void operator()(NonZeroContext & ctx) { ctx.node.executeSpecified(); } }; -void MKLDNNNonZeroNode::executeDynamicImpl(mkldnn::stream strm) { +void NonZero::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -void MKLDNNNonZeroNode::execute(mkldnn::stream strm) { +void NonZero::execute(mkldnn::stream strm) { auto inputPrec = getParentEdgesAtPort(0)[0]->getMemory().getDesc().getPrecision(); NonZeroContext ctx = {*this }; OV_SWITCH(intel_cpu, NonZeroExecute, ctx, inputPrec, @@ -102,7 +105,7 @@ void MKLDNNNonZeroNode::execute(mkldnn::stream strm) { OV_CASE(Precision::U8, uint8_t)) } template -void MKLDNNNonZeroNode::executeSpecified() { +void NonZero::executeSpecified() { T zero = 0; T *src = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); @@ -137,8 +140,10 @@ void MKLDNNNonZeroNode::executeSpecified() { } } -bool MKLDNNNonZeroNode::created() const { - return getType() == NonZero; +bool NonZero::created() const { + return getType() == Type::NonZero; } -REG_MKLDNN_PRIM_FOR(MKLDNNNonZeroNode, NonZero) +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/non_zero.h b/src/plugins/intel_cpu/src/nodes/non_zero.h index 16bd0e7cfcc..ebdc57fc1d3 100644 --- a/src/plugins/intel_cpu/src/nodes/non_zero.h +++ b/src/plugins/intel_cpu/src/nodes/non_zero.h @@ -8,14 +8,15 @@ #include #include #include -#include +#include namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNNonZeroNode : public MKLDNNNode { +class NonZero : public Node { public: - MKLDNNNonZeroNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + NonZero(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; @@ -38,5 +39,6 @@ private: size_t getNonZeroElementsCount(const T* arg, const Shape& arg_shape); }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/normalize.cpp b/src/plugins/intel_cpu/src/nodes/normalize.cpp index 20c5808e91d..240d187af99 100644 --- a/src/plugins/intel_cpu/src/nodes/normalize.cpp +++ b/src/plugins/intel_cpu/src/nodes/normalize.cpp @@ -10,7 +10,7 @@ #include "eltwise.h" #include "utils/bfloat16.hpp" #include "utils/general_utils.h" -#include +#include #include "emitters/jit_bf16_emitters.hpp" #include #include @@ -25,7 +25,6 @@ #include using namespace mkldnn; -using namespace ov::intel_cpu; using namespace InferenceEngine; using namespace mkldnn::impl; using namespace mkldnn::impl::cpu::x64; @@ -36,9 +35,13 @@ using namespace Xbyak; #define THROW_ERROR IE_THROW() << "NormalizeL2 layer with name '" << getName() << "' " +namespace ov { +namespace intel_cpu { +namespace node { namespace { + struct NormalizeKey { - MKLDNNNormalizeL2Node::NormalizeL2Attrs attrs; + NormalizeL2::NormalizeL2Attrs attrs; mkldnn::primitive_attr kernel_attrs; VectorDims dims; @@ -693,7 +696,7 @@ private: } }; -bool MKLDNNNormalizeL2Node::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool NormalizeL2::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { auto norm = ov::as_type_ptr(op); if (!norm) { @@ -751,8 +754,8 @@ bool MKLDNNNormalizeL2Node::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : - MKLDNNNode(op, eng, cache) { +NormalizeL2::NormalizeL2(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache) : + Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -774,7 +777,7 @@ MKLDNNNormalizeL2Node::MKLDNNNormalizeL2Node(const std::shared_ptr attrs.cornerCase = ngraph::shape_size(op->get_input_shape(AXES)) == 0; } -void MKLDNNNormalizeL2Node::initSupportedPrimitiveDescriptors() { +void NormalizeL2::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -843,22 +846,22 @@ void MKLDNNNormalizeL2Node::initSupportedPrimitiveDescriptors() { pushDesc(LayoutType::ncsp, impl_type); } -bool MKLDNNNormalizeL2Node::canFuse(const MKLDNNNodePtr& node) const { +bool NormalizeL2::canFuse(const NodePtr& node) const { return !attrs.cornerCase && canFuseSimpleOperation(node); } -void MKLDNNNormalizeL2Node::setPostOps(mkldnn::primitive_attr& kernel_attrs, const VectorDims& dims, bool initWeights) { +void NormalizeL2::setPostOps(mkldnn::primitive_attr& kernel_attrs, const VectorDims& dims, bool initWeights) { mkldnn::post_ops ops; postOpsDataPtrs.clear(); for (auto &node : fusedWith) { - auto* fakeQuantizeNode = dynamic_cast(node.get()); + auto* fakeQuantizeNode = dynamic_cast(node.get()); if (fakeQuantizeNode) { fakeQuantizeNode->appendPostOps(ops, {}, postOpsDataPtrs); continue; } - auto* eltwiseNode = dynamic_cast(node.get()); + auto* eltwiseNode = dynamic_cast(node.get()); if (eltwiseNode) { eltwiseNode->appendPostOps(ops, dims, postOpsDataPtrs); continue; @@ -870,7 +873,7 @@ void MKLDNNNormalizeL2Node::setPostOps(mkldnn::primitive_attr& kernel_attrs, con kernel_attrs.set_post_ops(ops); } -void MKLDNNNormalizeL2Node::createPrimitive() { +void NormalizeL2::createPrimitive() { auto& dstMemPtr = getChildEdgeAt(DATA)->getMemoryPtr(); auto& srcMemPtr = getParentEdgeAt(DATA)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) @@ -901,11 +904,11 @@ void MKLDNNNormalizeL2Node::createPrimitive() { } } -bool MKLDNNNormalizeL2Node::isExecutable() const { +bool NormalizeL2::isExecutable() const { return !isInputTensorAtPortEmpty(0); } -void MKLDNNNormalizeL2Node::prepareParams() { +void NormalizeL2::prepareParams() { const auto& dims = getParentEdgeAt(DATA)->getMemoryPtr()->getStaticDims(); setPostOps(kernel_attrs, dims, true); @@ -913,7 +916,7 @@ void MKLDNNNormalizeL2Node::prepareParams() { NormalizeKey key = {attrs, kernel_attrs, dims}; auto engine = getEngine(); - auto builder = [&engine](const NormalizeKey& key) -> std::shared_ptr { + auto builder = [&engine](const NormalizeKey& key) -> std::shared_ptr { return NormalizeL2Executor::getNormalizeL2Executor(key.attrs, key.kernel_attrs, key.dims); }; @@ -927,11 +930,11 @@ void MKLDNNNormalizeL2Node::prepareParams() { execPtr = result.first; } -void MKLDNNNormalizeL2Node::executeDynamicImpl(mkldnn::stream strm) { +void NormalizeL2::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -void MKLDNNNormalizeL2Node::execute(mkldnn::stream strm) { +void NormalizeL2::execute(mkldnn::stream strm) { if (!execPtr) THROW_ERROR << "doesn't have a compiled executor."; @@ -940,14 +943,14 @@ void MKLDNNNormalizeL2Node::execute(mkldnn::stream strm) { execPtr->exec(src_ptr, dst_ptr, postOpsDataPtrs.data()); } -std::vector MKLDNNNormalizeL2Node::shapeInfer() const { +std::vector NormalizeL2::shapeInfer() const { return std::vector{getParentEdgesAtPort(DATA)[0]->getMemory().getStaticDims()}; } // *====================* CornerCase *===================* template -class MKLDNNNormalizeL2Node::NormalizeL2CornerCaseExecutor : public MKLDNNNormalizeL2Node::NormalizeL2Executor { +class NormalizeL2::NormalizeL2CornerCaseExecutor : public NormalizeL2::NormalizeL2Executor { public: NormalizeL2CornerCaseExecutor(const VectorDims& dims) { workAmount = std::accumulate(dims.begin(), dims.end(), 1, std::multiplies()); @@ -971,7 +974,7 @@ private: // *=================* JIT case *=================* template -class MKLDNNNormalizeL2Node::NormalizeL2JitExecutor : public MKLDNNNormalizeL2Node::NormalizeL2Executor { +class NormalizeL2::NormalizeL2JitExecutor : public NormalizeL2::NormalizeL2Executor { public: NormalizeL2JitExecutor(const NormalizeL2Attrs& attrs_, const mkldnn::primitive_attr& kernel_attrs, @@ -982,8 +985,8 @@ public: IE_THROW() << "Normalaize2L executor has selected layout which is not supported"; } - jcp.src_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(attrs.input_prec); - jcp.dst_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(attrs.output_prec); + jcp.src_dt = DnnlExtensionUtils::IEPrecisionToDataType(attrs.input_prec); + jcp.dst_dt = DnnlExtensionUtils::IEPrecisionToDataType(attrs.output_prec); jcp.src_data_size = attrs.input_prec.size(); jcp.dst_data_size = attrs.output_prec.size(); jcp.across_spatial = attrs.across_spatial; @@ -1309,7 +1312,7 @@ private: // *=============* Reference case *===============* template -class MKLDNNNormalizeL2Node::NormalizeL2ReferenceExecutor : public MKLDNNNormalizeL2Node::NormalizeL2Executor { +class NormalizeL2::NormalizeL2ReferenceExecutor : public NormalizeL2::NormalizeL2Executor { public: NormalizeL2ReferenceExecutor(const NormalizeL2Attrs& attrs, const mkldnn::primitive_attr& kernel_attrs, const VectorDims& dims) : attrs(attrs), kernel_attrs(kernel_attrs), dims(dims) { @@ -1476,7 +1479,7 @@ private: // *=================* *======* *=================* -std::shared_ptr MKLDNNNormalizeL2Node::NormalizeL2Executor::getNormalizeL2Executor( +std::shared_ptr NormalizeL2::NormalizeL2Executor::getNormalizeL2Executor( const NormalizeL2Attrs& attrs, const mkldnn::primitive_attr& kernel_attrs, const VectorDims& dims) { NormalizeContext ctx = { nullptr, attrs, kernel_attrs, dims }; @@ -1496,7 +1499,7 @@ std::shared_ptr MKLDNNNormalizeL2Nod } template -std::shared_ptr MKLDNNNormalizeL2Node::NormalizeL2Executor::makeExecutor( +std::shared_ptr NormalizeL2::NormalizeL2Executor::makeExecutor( const NormalizeL2Attrs& attrs, const mkldnn::primitive_attr& kernel_attrs, const VectorDims& dims) { if (attrs.cornerCase) return std::make_shared>(dims); @@ -1508,8 +1511,10 @@ std::shared_ptr MKLDNNNormalizeL2Nod IE_THROW() << "'NormalizeL2' cannot create Executor"; } -bool MKLDNNNormalizeL2Node::created() const { - return getType() == NormalizeL2; +bool NormalizeL2::created() const { + return getType() == Type::NormalizeL2; } -REG_MKLDNN_PRIM_FOR(MKLDNNNormalizeL2Node, NormalizeL2); +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/normalize.h b/src/plugins/intel_cpu/src/nodes/normalize.h index 0ad36f7695d..95a8e235474 100644 --- a/src/plugins/intel_cpu/src/nodes/normalize.h +++ b/src/plugins/intel_cpu/src/nodes/normalize.h @@ -14,10 +14,9 @@ #include "utils/cpu_utils.hpp" #include "ie_parallel.hpp" -using namespace InferenceEngine; - namespace ov { namespace intel_cpu { +namespace node { struct jit_normalize_config_params { bool is_nchw; @@ -77,9 +76,9 @@ struct jit_uni_normalize_kernel { const mkldnn_primitive_attr &attr_; }; -class MKLDNNNormalizeL2Node : public MKLDNNNode { +class NormalizeL2 : public Node { public: - MKLDNNNormalizeL2Node(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + NormalizeL2(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; void getSupportedDescriptors() override {}; @@ -90,7 +89,7 @@ public: bool canBeInPlace() const override { return false; } - bool canFuse(const MKLDNNNodePtr& node) const override; + bool canFuse(const NodePtr& node) const override; std::vector shapeInfer() const override; void prepareParams() override; @@ -110,8 +109,8 @@ public: bool cornerCase = false; float eps = 1e-10f; - InferenceEngine::Precision input_prec = Precision::UNSPECIFIED; - InferenceEngine::Precision output_prec = Precision::UNSPECIFIED; + InferenceEngine::Precision input_prec = InferenceEngine::Precision::UNSPECIFIED; + InferenceEngine::Precision output_prec = InferenceEngine::Precision::UNSPECIFIED; size_t src_data_size = 0lu; size_t dst_data_size = 0lu; }; @@ -175,5 +174,6 @@ private: executorPtr execPtr = nullptr; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/one_hot.cpp b/src/plugins/intel_cpu/src/nodes/one_hot.cpp index 1d3fdaf4d91..483c6c95d4e 100644 --- a/src/plugins/intel_cpu/src/nodes/one_hot.cpp +++ b/src/plugins/intel_cpu/src/nodes/one_hot.cpp @@ -15,10 +15,13 @@ #include #include "common/cpu_memcpy.h" -using namespace ov::intel_cpu; using namespace InferenceEngine; -bool MKLDNNOneHotNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +namespace ov { +namespace intel_cpu { +namespace node { + +bool OneHot::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { const auto oneHot = std::dynamic_pointer_cast(op); if (!oneHot) { @@ -39,8 +42,8 @@ bool MKLDNNOneHotNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { +OneHot::OneHot(const std::shared_ptr& op, const mkldnn::engine& eng, + WeightsSharing::Ptr &cache) : Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -76,14 +79,14 @@ MKLDNNOneHotNode::MKLDNNOneHotNode(const std::shared_ptr& op, cons IE_THROW() << errorPrefix << " has incorrect number of input/output dimensions!"; } -bool MKLDNNOneHotNode::needShapeInfer() const { +bool OneHot::needShapeInfer() const { const auto depthNodePtr = reinterpret_cast(getParentEdgesAtPort(1)[0]->getMemoryPtr()->GetPtr()); if (depth != depthNodePtr[0]) return true; - return MKLDNNNode::needShapeInfer(); + return Node::needShapeInfer(); } -std::vector MKLDNNOneHotNode::shapeInfer() const { +std::vector OneHot::shapeInfer() const { depth = reinterpret_cast(getParentEdgesAtPort(1)[0]->getMemoryPtr()->GetPtr())[0]; auto result = getParentEdgesAtPort(0)[0]->getMemory().getStaticDims(); @@ -92,7 +95,7 @@ std::vector MKLDNNOneHotNode::shapeInfer() const { return { result }; } -void MKLDNNOneHotNode::initSupportedPrimitiveDescriptors() { +void OneHot::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -112,7 +115,7 @@ void MKLDNNOneHotNode::initSupportedPrimitiveDescriptors() { } template -void MKLDNNOneHotNode::one_hot(size_t prefix_size, size_t suffix_size) { +void OneHot::one_hot(size_t prefix_size, size_t suffix_size) { const auto *src_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); auto *dst_data = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); @@ -137,11 +140,11 @@ void MKLDNNOneHotNode::one_hot(size_t prefix_size, size_t suffix_size) { }); } -void MKLDNNOneHotNode::executeDynamicImpl(mkldnn::stream strm) { +void OneHot::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -void MKLDNNOneHotNode::execute(mkldnn::stream strm) { +void OneHot::execute(mkldnn::stream strm) { std::size_t prefix_size = 1; auto input_dims = getParentEdgeAt(0)->getMemory().getStaticDims(); @@ -158,8 +161,10 @@ void MKLDNNOneHotNode::execute(mkldnn::stream strm) { OV_CASE(sizeof(uint8_t), uint8_t)) } -bool MKLDNNOneHotNode::created() const { - return getType() == OneHot; +bool OneHot::created() const { + return getType() == Type::OneHot; } -REG_MKLDNN_PRIM_FOR(MKLDNNOneHotNode, OneHot) +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/one_hot.h b/src/plugins/intel_cpu/src/nodes/one_hot.h index b2e32dd2d7b..76fdf1e5dc3 100644 --- a/src/plugins/intel_cpu/src/nodes/one_hot.h +++ b/src/plugins/intel_cpu/src/nodes/one_hot.h @@ -13,10 +13,11 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNOneHotNode : public MKLDNNNode { +class OneHot : public Node { public: - MKLDNNOneHotNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + OneHot(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; @@ -35,7 +36,7 @@ private: typedef InferenceEngine::PrecisionTrait::value_type in_type; struct OneHotContext { - MKLDNNOneHotNode* nodePtr; + OneHot* nodePtr; size_t prefix_size; size_t suffix_size; }; @@ -63,5 +64,6 @@ private: void one_hot(size_t prefix_size, size_t suffix_size); }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/pad.cpp b/src/plugins/intel_cpu/src/nodes/pad.cpp index 0c358724ca2..7bd45cc1653 100644 --- a/src/plugins/intel_cpu/src/nodes/pad.cpp +++ b/src/plugins/intel_cpu/src/nodes/pad.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include "ie_parallel.hpp" #include "common/cpu_memcpy.h" @@ -15,12 +15,15 @@ #include using namespace mkldnn; -using namespace ov::intel_cpu; using namespace InferenceEngine; #define THROW_ERROR IE_THROW() << "Pad layer with name '" << getName() << "' " -bool MKLDNNPadNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +namespace ov { +namespace intel_cpu { +namespace node { + +bool Pad::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { auto pad = ov::as_type_ptr(op); if (!pad) { @@ -29,7 +32,7 @@ bool MKLDNNPadNode::isSupportedOperation(const std::shared_ptrget_pad_mode(); - if (!ov::intel_cpu::one_of(pad_mode, ngraph::op::PadMode::CONSTANT, ngraph::op::PadMode::EDGE, ngraph::op::PadMode::REFLECT, + if (!one_of(pad_mode, ngraph::op::PadMode::CONSTANT, ngraph::op::PadMode::EDGE, ngraph::op::PadMode::REFLECT, ngraph::op::PadMode::SYMMETRIC)) { errorMessage = "Has unsupported pad_mode: " + ngraph::as_string(pad_mode); return false; @@ -57,8 +60,8 @@ bool MKLDNNPadNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) - : MKLDNNNode(op, eng, cache) { +Pad::Pad(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache) + : Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -113,9 +116,9 @@ MKLDNNPadNode::MKLDNNPadNode(const std::shared_ptr& op, const mkld } } -void MKLDNNPadNode::getSupportedDescriptors() {} +void Pad::getSupportedDescriptors() {} -void MKLDNNPadNode::initSupportedPrimitiveDescriptors() { +void Pad::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -166,7 +169,7 @@ void MKLDNNPadNode::initSupportedPrimitiveDescriptors() { } } -void MKLDNNPadNode::createPrimitive() { +void Pad::createPrimitive() { auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) @@ -227,17 +230,17 @@ void MKLDNNPadNode::createPrimitive() { } } -bool MKLDNNPadNode::isExecutable() const { +bool Pad::isExecutable() const { return !isOutputTensorAtPortEmpty(0); } -void MKLDNNPadNode::prepareParams() { +void Pad::prepareParams() { execPtr = std::make_shared(attrs, getParentEdgeAt(0)->getMemoryPtr()->GetDescWithType()->getBlockDims(), getChildEdgeAt(0)->getMemoryPtr()->GetDescWithType()->getBlockDims()); } -MKLDNNPadNode::PadExecutor::PadExecutor(const PadAttrs& attrs, +Pad::PadExecutor::PadExecutor(const PadAttrs& attrs, const VectorDims& srcDims, const VectorDims& dstDims) { params.attrs = attrs; @@ -299,7 +302,7 @@ MKLDNNPadNode::PadExecutor::PadExecutor(const PadAttrs& attrs, } } -void MKLDNNPadNode::PadExecutor::exec(MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { +void Pad::PadExecutor::exec(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr) { if (zeroInputDimsCase) { padConstant(srcMemPtr, dstMemPtr); } else { @@ -320,19 +323,19 @@ void MKLDNNPadNode::PadExecutor::exec(MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPt } } -void MKLDNNPadNode::execute(mkldnn::stream strm) { +void Pad::execute(mkldnn::stream strm) { if (!execPtr) THROW_ERROR << "has not compiled executor."; execPtr->exec(getParentEdgeAt(0)->getMemoryPtr(), getChildEdgeAt(0)->getMemoryPtr()); } -void MKLDNNPadNode::executeDynamicImpl(mkldnn::stream strm) { +void Pad::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -std::vector MKLDNNPadNode::shapeInfer() const { - return MKLDNNNode::shapeInferGeneric(PortMask(PADS_BEGIN_ID, PADS_END_ID)); +std::vector Pad::shapeInfer() const { + return Node::shapeInferGeneric(PortMask(PADS_BEGIN_ID, PADS_END_ID)); } static inline size_t parallel_init(size_t start, size_t nDims, const VectorDims& dims, VectorDims& indexes) { @@ -353,7 +356,7 @@ static inline void parallel_step(size_t nDims, const VectorDims& dims, VectorDim } } -void MKLDNNPadNode::PadExecutor::padConstant(MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { +void Pad::PadExecutor::padConstant(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr) { if (params.attrs.padValue == 0 && !zeroInputDimsCase) { padConstantZero(srcMemPtr, dstMemPtr); return; @@ -369,7 +372,7 @@ void MKLDNNPadNode::PadExecutor::padConstant(MKLDNNMemoryPtr& srcMemPtr, MKLDNNM } template -void MKLDNNPadNode::PadExecutor::padConstantCommon(MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { +void Pad::PadExecutor::padConstantCommon(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr) { T* dstData = reinterpret_cast(dstMemPtr->GetPtr()); const T value = static_cast(params.attrs.padValue); if (zeroInputDimsCase) { @@ -421,7 +424,7 @@ void MKLDNNPadNode::PadExecutor::padConstantCommon(MKLDNNMemoryPtr& srcMemPtr, M }); } -void MKLDNNPadNode::PadExecutor::padConstantZero(MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { +void Pad::PadExecutor::padConstantZero(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr) { const uint8_t* srcData = reinterpret_cast(srcMemPtr->GetPtr()); uint8_t* dstData = reinterpret_cast(dstMemPtr->GetPtr()); @@ -466,7 +469,7 @@ void MKLDNNPadNode::PadExecutor::padConstantZero(MKLDNNMemoryPtr& srcMemPtr, MKL }); } -void MKLDNNPadNode::PadExecutor::padEdge(MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { +void Pad::PadExecutor::padEdge(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr) { const uint8_t* srcData = reinterpret_cast(srcMemPtr->GetPtr()); uint8_t* dstData = reinterpret_cast(dstMemPtr->GetPtr()); @@ -506,7 +509,7 @@ void MKLDNNPadNode::PadExecutor::padEdge(MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemor }); } -void MKLDNNPadNode::PadExecutor::padReflectOrSymmetric(MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr, const bool isSymmetric) { +void Pad::PadExecutor::padReflectOrSymmetric(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr, const bool isSymmetric) { const uint8_t* srcData = reinterpret_cast(srcMemPtr->GetPtr()); uint8_t* dstData = reinterpret_cast(dstMemPtr->GetPtr()); size_t shift = isSymmetric ? 1 : 0; @@ -548,12 +551,15 @@ void MKLDNNPadNode::PadExecutor::padReflectOrSymmetric(MKLDNNMemoryPtr& srcMemPt }); } -inline void MKLDNNPadNode::PadExecutor::getDstIdx(const VectorDims& indexes, size_t& dstIdx) const { +inline void Pad::PadExecutor::getDstIdx(const VectorDims& indexes, size_t& dstIdx) const { for (size_t i = 0; i < params.nDimsForWork; ++i) dstIdx += indexes[i] * params.dstStrides[i]; } -bool MKLDNNPadNode::created() const { - return getType() == Pad; +bool Pad::created() const { + return getType() == Type::Pad; } -REG_MKLDNN_PRIM_FOR(MKLDNNPadNode, Pad); + +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/pad.h b/src/plugins/intel_cpu/src/nodes/pad.h index 47ff506fd3c..3766a1d0e60 100644 --- a/src/plugins/intel_cpu/src/nodes/pad.h +++ b/src/plugins/intel_cpu/src/nodes/pad.h @@ -10,10 +10,11 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNPadNode : public MKLDNNNode { +class Pad : public Node { public: - MKLDNNPadNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + Pad(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; void getSupportedDescriptors() override; @@ -50,22 +51,22 @@ private: struct PadExecutor { PadExecutor(const PadAttrs& params, const VectorDims& srcDims, const VectorDims& dstDims); - void exec(MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr); + void exec(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr); ~PadExecutor() = default; private: - void padConstant(MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr); - template void padConstantCommon(MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr); - void padConstantZero(MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr); - void padEdge(MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr); - void padReflectOrSymmetric(MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr, const bool isSymmetric = false); + void padConstant(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr); + template void padConstantCommon(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr); + void padConstantZero(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr); + void padEdge(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr); + void padReflectOrSymmetric(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr, const bool isSymmetric = false); inline void getDstIdx(const VectorDims& indexes, size_t& dstIdx) const; struct PadContext { PadExecutor* executor; - MKLDNNMemoryPtr srcMemPtr; - MKLDNNMemoryPtr dstMemPtr; + MemoryPtr srcMemPtr; + MemoryPtr dstMemPtr; }; template @@ -106,5 +107,6 @@ private: executorPtr execPtr = nullptr; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/pooling.cpp b/src/plugins/intel_cpu/src/nodes/pooling.cpp index 0d76d00861a..a7edeae0cd2 100644 --- a/src/plugins/intel_cpu/src/nodes/pooling.cpp +++ b/src/plugins/intel_cpu/src/nodes/pooling.cpp @@ -11,23 +11,26 @@ #include #include #include -#include +#include #include #include #include "memory_desc/dnnl_blocked_memory_desc.h" #include using namespace mkldnn; -using namespace ov::intel_cpu; using namespace InferenceEngine; +namespace ov { +namespace intel_cpu { +namespace node { namespace { + struct PoolingKey { DnnlMemoryDescCPtr inp; DnnlMemoryDescCPtr out; std::vector stride; std::vector kernel; - /// Effective padding. Used to define correct output shape by MKLDNN + /// Effective padding. Used to define correct output shape by oneDNN /// reshape formula: (iw - kernel + pad_l + pad_r) / strides[i - 2] + 1 /// should be passed into pooling desc constructor. std::vector effective_pad_begin; @@ -120,7 +123,7 @@ std::shared_ptr createDescriptorHelper(const mkldnn::m } // namespace -bool MKLDNNPoolingNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool Pooling::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { if (ov::is_type(op)) { if (!op->get_output_target_inputs(1).empty()) { @@ -137,8 +140,8 @@ bool MKLDNNPoolingNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) - : MKLDNNNode(op, eng, cache) { +Pooling::Pooling(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache) + : Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -152,7 +155,7 @@ MKLDNNPoolingNode::MKLDNNPoolingNode(const std::shared_ptr& op, const if (auto maxPoolOp_v8 = ov::as_type_ptr(op)) { isMaxPool8 = true; - algorithm = PoolingMax; + algorithm = Algorithm::PoolingMax; exclude_pad = false; get_attributes(dilation, maxPoolOp_v8->get_dilations()); @@ -163,7 +166,7 @@ MKLDNNPoolingNode::MKLDNNPoolingNode(const std::shared_ptr& op, const auto_pad = (maxPoolOp_v8->get_auto_pad() == ov::op::PadType::SAME_LOWER || maxPoolOp_v8->get_auto_pad() == ov::op::PadType::SAME_UPPER); } else if (auto maxPoolOp_v1 = ov::as_type_ptr(op)) { - algorithm = PoolingMax; + algorithm = Algorithm::PoolingMax; exclude_pad = false; get_attributes(stride, maxPoolOp_v1->get_strides()); @@ -174,7 +177,7 @@ MKLDNNPoolingNode::MKLDNNPoolingNode(const std::shared_ptr& op, const auto_pad = (maxPoolOp_v1->get_auto_pad() == ov::op::PadType::SAME_LOWER || maxPoolOp_v1->get_auto_pad() == ov::op::PadType::SAME_UPPER); } else if (auto avgPoolOp = ov::as_type_ptr(op)) { - algorithm = PoolingAvg; + algorithm = Algorithm::PoolingAvg; exclude_pad = avgPoolOp->get_exclude_pad(); get_attributes(stride, avgPoolOp->get_strides()); @@ -187,7 +190,7 @@ MKLDNNPoolingNode::MKLDNNPoolingNode(const std::shared_ptr& op, const } } -std::vector MKLDNNPoolingNode::getAvailableFormatsForDims(const Shape &dims) const { +std::vector Pooling::getAvailableFormatsForDims(const Shape &dims) const { if (dims.getRank() == 0) return {memory::format_tag::x}; else if (dims.getRank() == 1) @@ -203,7 +206,7 @@ std::vector MKLDNNPoolingNode::getAvailableFormatsForDims(co return {memory::format_tag::any}; } -void MKLDNNPoolingNode::initEffectiveAttributes(const Shape &inShape, const Shape &outShape) { +void Pooling::initEffectiveAttributes(const Shape &inShape, const Shape &outShape) { effective_pad_begin = data_pad_begin; effective_pad_end.resize(data_pad_end.size()); effective_dilation.resize(dilation.size(), 0); @@ -223,7 +226,7 @@ void MKLDNNPoolingNode::initEffectiveAttributes(const Shape &inShape, const Shap } } -void MKLDNNPoolingNode::getSupportedDescriptors() { +void Pooling::getSupportedDescriptors() { if (!descs.empty()) return; @@ -238,10 +241,10 @@ void MKLDNNPoolingNode::getSupportedDescriptors() { // WA: LPT transformation has WA which allows average pooling has I8/U8 output precision instead of FP32, // so we explicitly set output precision as FP32 if (outputPrecision != Precision::I8 && inputPrecision != Precision::BF16) { - if (getAlgorithm() == PoolingMax) { - // MKLDNN supports only equal precisions for input and output + if (getAlgorithm() == Algorithm::PoolingMax) { + // oneDNN supports only equal precisions for input and output outputPrecision = inputPrecision; - } else if (getAlgorithm() == PoolingAvg) { + } else if (getAlgorithm() == Algorithm::PoolingAvg) { outputPrecision = Precision::FP32; } } @@ -253,8 +256,8 @@ void MKLDNNPoolingNode::getSupportedDescriptors() { outputPrecision = fusedWith.back()->getOriginalOutputPrecisionAtPort(0); } - auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(inputPrecision); - auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(outputPrecision); + auto inputDataType = DnnlExtensionUtils::IEPrecisionToDataType(inputPrecision); + auto outputDataType = DnnlExtensionUtils::IEPrecisionToDataType(outputPrecision); const auto &parentShape = getInputShapeAtPort(0); const auto &childShape = getOutputShapeAtPort(0); @@ -311,7 +314,7 @@ void MKLDNNPoolingNode::getSupportedDescriptors() { } } -void MKLDNNPoolingNode::prepareParams() { +void Pooling::prepareParams() { const NodeDesc *selected_pd = getSelectedPrimitiveDescriptor(); if (selected_pd == nullptr) IE_THROW() << "Pooling node with name '" << getName() << "' did not set preferable primitive descriptor"; @@ -360,7 +363,7 @@ void MKLDNNPoolingNode::prepareParams() { key.effective_pad_end, key.effective_dilation, key.data_pad_end); - MKLDNNDescriptor desc{desc_ptr}; + DnnlDesriptor desc{desc_ptr}; pooling_v2_forward::primitive_desc prim_desc; primitive_desc_iterator itpd = desc.createPrimitiveDescriptorIterator(engine, key.attr); while (static_cast(itpd)) { @@ -389,19 +392,19 @@ void MKLDNNPoolingNode::prepareParams() { auto dst = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive(); primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_DST, dst}}; - MKLDNNNode::appendPostOpArgs(*attr, primArgs, postOpsArgs); + Node::appendPostOpArgs(*attr, primArgs, postOpsArgs); } -void MKLDNNPoolingNode::executeDynamicImpl(mkldnn::stream strm) { +void Pooling::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -bool MKLDNNPoolingNode::created() const { - return getType() == Pooling; +bool Pooling::created() const { + return getType() == Type::Pooling; } -mkldnn::algorithm MKLDNNPoolingNode::getPoolingAlgorithm() const { - if (algorithm == PoolingAvg) { +mkldnn::algorithm Pooling::getPoolingAlgorithm() const { + if (algorithm == Algorithm::PoolingAvg) { bool not_zero_l = false; for (auto lr : data_pad_begin) { if (lr) { @@ -420,14 +423,14 @@ mkldnn::algorithm MKLDNNPoolingNode::getPoolingAlgorithm() const { return mkldnn::algorithm::pooling_avg_include_padding; else return mkldnn::algorithm::pooling_avg_exclude_padding; - } else if (algorithm == PoolingMax) { + } else if (algorithm == Algorithm::PoolingMax) { return mkldnn::algorithm::pooling_max; } else { return mkldnn::algorithm::undef; } } -std::shared_ptr MKLDNNPoolingNode::createDescriptorInternal( +std::shared_ptr Pooling::createDescriptorInternal( const mkldnn::memory::desc& in_candidate, const mkldnn::memory::desc& out_candidate, const mkldnn::algorithm alg) const { @@ -442,7 +445,7 @@ std::shared_ptr MKLDNNPoolingNode::createDescriptorInt data_pad_end); } -void MKLDNNPoolingNode::createDescriptor(const std::vector &inputDesc, +void Pooling::createDescriptor(const std::vector &inputDesc, const std::vector &outputDesc) { auto inDesc = inputDesc[0]->isDefined() ? inputDesc[0] : inputDesc[0]->cloneWithNewDims(inShape.getStaticDims()); auto dnnlInDesc = MemoryDescUtils::convertToDnnlMemoryDesc(inDesc); @@ -465,7 +468,7 @@ void MKLDNNPoolingNode::createDescriptor(const std::vector &input descs.emplace_back(desc_ptr); } -void MKLDNNPoolingNode::initSupportedPrimitiveDescriptors() { +void Pooling::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -516,7 +519,7 @@ void MKLDNNPoolingNode::initSupportedPrimitiveDescriptors() { } } -void MKLDNNPoolingNode::initDescriptor(const NodeConfig& config) { +void Pooling::initDescriptor(const NodeConfig& config) { auto* selectedPD = getSelectedPrimitiveDescriptor(); if (!selectedPD) { return; @@ -609,7 +612,7 @@ void MKLDNNPoolingNode::initDescriptor(const NodeConfig& config) { selectedPD->setConfig(rightConfig); } -MKLDNNNode::AttrPtr MKLDNNPoolingNode::initPrimitiveAttr() { +Node::AttrPtr Pooling::initPrimitiveAttr() { auto attr = std::make_shared(mkldnn::primitive_attr()); setPostOps(*attr); @@ -617,11 +620,11 @@ MKLDNNNode::AttrPtr MKLDNNPoolingNode::initPrimitiveAttr() { return attr; } -void MKLDNNPoolingNode::setPostOps(mkldnn::primitive_attr &attr) { +void Pooling::setPostOps(mkldnn::primitive_attr &attr) { mkldnn::post_ops ops; for (auto &node : fusedWith) { - auto* fakeQuantizeNode = dynamic_cast(node.get()); + auto* fakeQuantizeNode = dynamic_cast(node.get()); if (fakeQuantizeNode) { fakeQuantizeNode->appendPostOps(ops, {}, postOpsArgs); continue; @@ -633,4 +636,6 @@ void MKLDNNPoolingNode::setPostOps(mkldnn::primitive_attr &attr) { attr.set_post_ops(ops); } -REG_MKLDNN_PRIM_FOR(MKLDNNPoolingNode, Pooling); +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/pooling.h b/src/plugins/intel_cpu/src/nodes/pooling.h index 983ed713376..4d63abd7ee6 100644 --- a/src/plugins/intel_cpu/src/nodes/pooling.h +++ b/src/plugins/intel_cpu/src/nodes/pooling.h @@ -12,10 +12,11 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNPoolingNode : public MKLDNNNode { +class Pooling : public Node { public: - MKLDNNPoolingNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + Pooling(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void createDescriptor(const std::vector& inputDesc, const std::vector& outputDesc) override; @@ -56,7 +57,7 @@ private: std::vector stride; std::vector kernel; - /// Effective padding. Used to define correct output shape by MKLDNN + /// Effective padding. Used to define correct output shape by oneDNN /// reshape formula: (iw - kernel + pad_l + pad_r) / strides[i - 2] + 1 /// should be passed into pooling desc constructor. std::vector effective_pad_begin; @@ -74,5 +75,6 @@ private: std::vector data_pad_end; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/priorbox.cpp b/src/plugins/intel_cpu/src/nodes/priorbox.cpp index d497716c605..31c0bb2dedb 100644 --- a/src/plugins/intel_cpu/src/nodes/priorbox.cpp +++ b/src/plugins/intel_cpu/src/nodes/priorbox.cpp @@ -14,12 +14,15 @@ #include #include -using namespace ov::intel_cpu; using namespace InferenceEngine; #define THROW_ERROR IE_THROW() << "PriorBox layer with name '" << getName() << "': " +namespace ov { +namespace intel_cpu { +namespace node { namespace { + float clip_great(float x, float threshold) { return x < threshold ? x : threshold; } @@ -27,9 +30,10 @@ float clip_great(float x, float threshold) { float clip_less(float x, float threshold) { return x > threshold ? x : threshold; } -} -bool MKLDNNPriorBoxNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +} // namespace + +bool PriorBox::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { const auto priorBox = std::dynamic_pointer_cast(op); if (!priorBox) { @@ -42,10 +46,10 @@ bool MKLDNNPriorBoxNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { + WeightsSharing::Ptr &cache) : Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -107,7 +111,7 @@ MKLDNNPriorBoxNode::MKLDNNPriorBoxNode( } } -bool MKLDNNPriorBoxNode::needShapeInfer() const { +bool PriorBox::needShapeInfer() const { auto& memory = getChildEdgeAt(0)->getMemoryPtr(); if (memory->GetShape().isDynamic()) { return true; @@ -122,7 +126,7 @@ bool MKLDNNPriorBoxNode::needShapeInfer() const { return outputShape[1] != output; } -std::vector MKLDNNPriorBoxNode::shapeInfer() const { +std::vector PriorBox::shapeInfer() const { const int* in_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); const int H = in_data[0]; const int W = in_data[1]; @@ -130,11 +134,11 @@ std::vector MKLDNNPriorBoxNode::shapeInfer() const { return {{2, output}}; } -bool MKLDNNPriorBoxNode::needPrepareParams() const { +bool PriorBox::needPrepareParams() const { return false; } -void MKLDNNPriorBoxNode::initSupportedPrimitiveDescriptors() { +void PriorBox::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -144,7 +148,7 @@ void MKLDNNPriorBoxNode::initSupportedPrimitiveDescriptors() { impl_desc_type::ref_any); } -void MKLDNNPriorBoxNode::createPrimitive() { +void PriorBox::createPrimitive() { if (inputShapesDefined()) { if (needPrepareParams()) prepareParams(); @@ -152,7 +156,7 @@ void MKLDNNPriorBoxNode::createPrimitive() { } } -void MKLDNNPriorBoxNode::execute(mkldnn::stream strm) { +void PriorBox::execute(mkldnn::stream strm) { const int* in_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); const int H = in_data[0]; const int W = in_data[1]; @@ -317,8 +321,10 @@ void MKLDNNPriorBoxNode::execute(mkldnn::stream strm) { } } -bool MKLDNNPriorBoxNode::created() const { - return getType() == PriorBox; +bool PriorBox::created() const { + return getType() == Type::PriorBox; } -REG_MKLDNN_PRIM_FOR(MKLDNNPriorBoxNode, PriorBox) +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/priorbox.h b/src/plugins/intel_cpu/src/nodes/priorbox.h index 4d9623903c3..27f6c0547b8 100644 --- a/src/plugins/intel_cpu/src/nodes/priorbox.h +++ b/src/plugins/intel_cpu/src/nodes/priorbox.h @@ -12,10 +12,11 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNPriorBoxNode : public MKLDNNNode { +class PriorBox : public Node { public: - MKLDNNPriorBoxNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + PriorBox(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; @@ -50,5 +51,6 @@ private: int number_of_priors; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/priorbox_clustered.cpp b/src/plugins/intel_cpu/src/nodes/priorbox_clustered.cpp index 515ef10d1f1..8064ee9eca3 100644 --- a/src/plugins/intel_cpu/src/nodes/priorbox_clustered.cpp +++ b/src/plugins/intel_cpu/src/nodes/priorbox_clustered.cpp @@ -14,10 +14,13 @@ #include #include -using namespace ov::intel_cpu; using namespace InferenceEngine; -bool MKLDNNPriorBoxClusteredNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +namespace ov { +namespace intel_cpu { +namespace node { + +bool PriorBoxClustered::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { const auto priorBox = std::dynamic_pointer_cast(op); if (!priorBox) { @@ -30,10 +33,10 @@ bool MKLDNNPriorBoxClusteredNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { + WeightsSharing::Ptr &cache) : Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -58,7 +61,7 @@ MKLDNNPriorBoxClusteredNode::MKLDNNPriorBoxClusteredNode( } } -bool MKLDNNPriorBoxClusteredNode::needShapeInfer() const { +bool PriorBoxClustered::needShapeInfer() const { auto& memory = getChildEdgeAt(0)->getMemoryPtr(); if (memory->GetShape().isDynamic()) { return true; @@ -73,7 +76,7 @@ bool MKLDNNPriorBoxClusteredNode::needShapeInfer() const { return outputShape[1] != output; } -std::vector MKLDNNPriorBoxClusteredNode::shapeInfer() const { +std::vector PriorBoxClustered::shapeInfer() const { const int* in_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); const int H = in_data[0]; const int W = in_data[1]; @@ -81,11 +84,11 @@ std::vector MKLDNNPriorBoxClusteredNode::shapeInfer() const { return {{2, output}}; } -bool MKLDNNPriorBoxClusteredNode::needPrepareParams() const { +bool PriorBoxClustered::needPrepareParams() const { return false; } -void MKLDNNPriorBoxClusteredNode::initSupportedPrimitiveDescriptors() { +void PriorBoxClustered::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -95,7 +98,7 @@ void MKLDNNPriorBoxClusteredNode::initSupportedPrimitiveDescriptors() { impl_desc_type::ref_any); } -void MKLDNNPriorBoxClusteredNode::createPrimitive() { +void PriorBoxClustered::createPrimitive() { if (inputShapesDefined()) { if (needPrepareParams()) prepareParams(); @@ -103,7 +106,7 @@ void MKLDNNPriorBoxClusteredNode::createPrimitive() { } } -void MKLDNNPriorBoxClusteredNode::execute(mkldnn::stream strm) { +void PriorBoxClustered::execute(mkldnn::stream strm) { const int* in_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); const int layer_height = in_data[0]; const int layer_width = in_data[1]; @@ -163,8 +166,10 @@ void MKLDNNPriorBoxClusteredNode::execute(mkldnn::stream strm) { }); } -bool MKLDNNPriorBoxClusteredNode::created() const { - return getType() == PriorBoxClustered; +bool PriorBoxClustered::created() const { + return getType() == Type::PriorBoxClustered; } -REG_MKLDNN_PRIM_FOR(MKLDNNPriorBoxClusteredNode, PriorBoxClustered) +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/priorbox_clustered.h b/src/plugins/intel_cpu/src/nodes/priorbox_clustered.h index 50799f67758..ff17187c7e8 100644 --- a/src/plugins/intel_cpu/src/nodes/priorbox_clustered.h +++ b/src/plugins/intel_cpu/src/nodes/priorbox_clustered.h @@ -12,10 +12,11 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNPriorBoxClusteredNode : public MKLDNNNode { +class PriorBoxClustered : public Node { public: - MKLDNNPriorBoxClusteredNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + PriorBoxClustered(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; @@ -44,5 +45,6 @@ private: int number_of_priors; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/proposal.cpp b/src/plugins/intel_cpu/src/nodes/proposal.cpp index 96992636b10..bc94b34e5c7 100644 --- a/src/plugins/intel_cpu/src/nodes/proposal.cpp +++ b/src/plugins/intel_cpu/src/nodes/proposal.cpp @@ -9,6 +9,12 @@ #include "ie_parallel.hpp" #include "proposal.h" +using namespace InferenceEngine; + +namespace ov { +namespace intel_cpu { +namespace node { + static std::vector generate_anchors(proposal_conf &conf) { auto base_size = conf.base_size_; auto coordinates_offset = conf.coordinates_offset; @@ -68,10 +74,7 @@ static std::vector generate_anchors(proposal_conf &conf) { return anchors; } -using namespace ov::intel_cpu; -using namespace InferenceEngine; - -bool MKLDNNProposalNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool Proposal::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { const auto proposal0Op = ngraph::as_type_ptr(op); const auto proposal4Op = ngraph::as_type_ptr(op); @@ -90,8 +93,8 @@ bool MKLDNNProposalNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { +Proposal::Proposal(const std::shared_ptr& op, const mkldnn::engine& eng, + WeightsSharing::Ptr &cache) : Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -135,7 +138,7 @@ MKLDNNProposalNode::MKLDNNProposalNode(const std::shared_ptr& op, store_prob = op->get_output_size() == 2; } -void MKLDNNProposalNode::initSupportedPrimitiveDescriptors() { +void Proposal::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -155,11 +158,11 @@ void MKLDNNProposalNode::initSupportedPrimitiveDescriptors() { } } -void MKLDNNProposalNode::executeDynamicImpl(mkldnn::stream strm) { +void Proposal::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -void MKLDNNProposalNode::execute(mkldnn::stream strm) { +void Proposal::execute(mkldnn::stream strm) { try { const float* probabilitiesData = reinterpret_cast(getParentEdgeAt(PROBABILITIES_IN_IDX)->getMemoryPtr()->GetPtr()); const float* anchorsData = reinterpret_cast(getParentEdgeAt(ANCHORS_IN_IDX)->getMemoryPtr()->GetPtr()); @@ -194,8 +197,10 @@ void MKLDNNProposalNode::execute(mkldnn::stream strm) { } } -bool MKLDNNProposalNode::created() const { - return getType() == Proposal; +bool Proposal::created() const { + return getType() == Type::Proposal; } -REG_MKLDNN_PRIM_FOR(MKLDNNProposalNode, Proposal) +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/proposal.h b/src/plugins/intel_cpu/src/nodes/proposal.h index ed6e866e562..a7c17f1e7ee 100644 --- a/src/plugins/intel_cpu/src/nodes/proposal.h +++ b/src/plugins/intel_cpu/src/nodes/proposal.h @@ -12,10 +12,11 @@ using proposal_conf = InferenceEngine::Extensions::Cpu::proposal_conf; namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNProposalNode : public MKLDNNNode { +class Proposal : public Node { public: - MKLDNNProposalNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + Proposal(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; @@ -40,5 +41,6 @@ private: bool store_prob; // store blob with proposal probabilities }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/psroi_pooling.cpp b/src/plugins/intel_cpu/src/nodes/psroi_pooling.cpp index e75e4741adb..77f40680172 100644 --- a/src/plugins/intel_cpu/src/nodes/psroi_pooling.cpp +++ b/src/plugins/intel_cpu/src/nodes/psroi_pooling.cpp @@ -14,14 +14,17 @@ #include #include -using namespace ov::intel_cpu; using namespace InferenceEngine; using namespace mkldnn; using namespace mkldnn::impl; using namespace mkldnn::impl::cpu::x64; using namespace mkldnn::impl::utils; -bool MKLDNNPSROIPoolingNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +namespace ov { +namespace intel_cpu { +namespace node { + +bool PSROIPooling::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { if (isDynamicNgraphNode(op)) { errorMessage = "Doesn't support op with dynamic shapes"; @@ -54,8 +57,8 @@ bool MKLDNNPSROIPoolingNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { +PSROIPooling::PSROIPooling(const std::shared_ptr& op, const mkldnn::engine& eng, + WeightsSharing::Ptr &cache) : Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -125,7 +128,7 @@ MKLDNNPSROIPoolingNode::MKLDNNPSROIPoolingNode(const std::shared_ptr(outDims[3]); } -void MKLDNNPSROIPoolingNode::initSupportedPrimitiveDescriptors() { +void PSROIPooling::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -186,7 +189,7 @@ inline float bilinearInterp(const inputType* data, const float x, const float y, return value; } -void MKLDNNPSROIPoolingNode::unpackParams(const BlockedMemoryDesc& srcDesc, const BlockedMemoryDesc& dstDesc, +void PSROIPooling::unpackParams(const BlockedMemoryDesc& srcDesc, const BlockedMemoryDesc& dstDesc, int& hInputStride, int& wInputStride, int& hOutputStride, int& wOutputStride, int& inBlockSize, int& outBlockSize, @@ -229,7 +232,7 @@ void MKLDNNPSROIPoolingNode::unpackParams(const BlockedMemoryDesc& srcDesc, cons } template -void MKLDNNPSROIPoolingNode::executeAverage(const inputType *srcData, outputType *dstData, const float *bottomRois, +void PSROIPooling::executeAverage(const inputType *srcData, outputType *dstData, const float *bottomRois, const int n, const int roiBatchInd, const BlockedMemoryDesc& srcDesc, const BlockedMemoryDesc& dstDesc) { int inBlockSize, outBlockSize, outBlockCount, hInputStride, wInputStride, hOutputStride, wOutputStride; @@ -312,7 +315,7 @@ void MKLDNNPSROIPoolingNode::executeAverage(const inputType *srcData, outputType } template -void MKLDNNPSROIPoolingNode::executeBilinear(const inputType *srcData, outputType *dstData, const float *bottomRois, +void PSROIPooling::executeBilinear(const inputType *srcData, outputType *dstData, const float *bottomRois, const int currentRoi, const int roiBatchInd, const BlockedMemoryDesc& srcDesc, const BlockedMemoryDesc& dstDesc) { int inBlockSize, outBlockSize, outBlockCount, hInputStride, wInputStride, hOutputStride, wOutputStride; @@ -415,7 +418,7 @@ void MKLDNNPSROIPoolingNode::executeBilinear(const inputType *srcData, outputTyp } template -void MKLDNNPSROIPoolingNode::executeBilinearDeformable(const inputType *srcData, outputType *dstData, const float *bottomRois, +void PSROIPooling::executeBilinearDeformable(const inputType *srcData, outputType *dstData, const float *bottomRois, const float *bottomTrans, const int numClasses, const int channelsEachClass, const int currentRoi, const int roiBatchInd) { const float roiStartW = static_cast(round(bottomRois[1])) * spatialScale - 0.5f; @@ -478,7 +481,7 @@ void MKLDNNPSROIPoolingNode::executeBilinearDeformable(const inputType *srcData, } template -void MKLDNNPSROIPoolingNode::executeSpecified() { +void PSROIPooling::executeSpecified() { const auto *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); const auto *bottomRoisBeginning = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetPtr()); auto *dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); @@ -523,12 +526,12 @@ void MKLDNNPSROIPoolingNode::executeSpecified() { namespace { struct PSROIPoolingContext { - MKLDNNPSROIPoolingNode &node; + PSROIPooling &node; }; } template -struct MKLDNNPSROIPoolingNode::PSROIPoolingExecute { +struct PSROIPooling::PSROIPoolingExecute { using srcT = typename std::tuple_element<0, T>::type; using dstT = typename std::tuple_element<1, T>::type; @@ -537,7 +540,7 @@ struct MKLDNNPSROIPoolingNode::PSROIPoolingExecute { } }; -void MKLDNNPSROIPoolingNode::execute(mkldnn::stream strm) { +void PSROIPooling::execute(mkldnn::stream strm) { auto inputPrec = getParentEdgesAtPort(0)[0]->getMemory().getDesc().getPrecision(); auto outputPrec = getChildEdgesAtPort(0)[0]->getMemory().getDesc().getPrecision(); @@ -555,8 +558,10 @@ void MKLDNNPSROIPoolingNode::execute(mkldnn::stream strm) { OV_CASE2(Precision::BF16, Precision::BF16, bfloat16_t, bfloat16_t)) } -bool MKLDNNPSROIPoolingNode::created() const { - return getType() == PSROIPooling; +bool PSROIPooling::created() const { + return getType() == Type::PSROIPooling; } -REG_MKLDNN_PRIM_FOR(MKLDNNPSROIPoolingNode, PSROIPooling) +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/psroi_pooling.h b/src/plugins/intel_cpu/src/nodes/psroi_pooling.h index 32a5b1ea912..9715609cba1 100644 --- a/src/plugins/intel_cpu/src/nodes/psroi_pooling.h +++ b/src/plugins/intel_cpu/src/nodes/psroi_pooling.h @@ -12,10 +12,11 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNPSROIPoolingNode : public MKLDNNNode { +class PSROIPooling : public Node { public: - MKLDNNPSROIPoolingNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + PSROIPooling(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; @@ -80,5 +81,6 @@ private: struct PSROIPoolingExecute; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/range.cpp b/src/plugins/intel_cpu/src/nodes/range.cpp index 279d88a2471..02b26efdb10 100644 --- a/src/plugins/intel_cpu/src/nodes/range.cpp +++ b/src/plugins/intel_cpu/src/nodes/range.cpp @@ -8,12 +8,15 @@ #include "range.h" #include -using namespace ov::intel_cpu; using namespace InferenceEngine; -bool MKLDNNRangeNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +namespace ov { +namespace intel_cpu { +namespace node { + +bool Range::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { - if (!ov::intel_cpu::one_of(op->get_type_info(), ngraph::op::v0::Range::get_type_info_static(), ngraph::op::v4::Range::get_type_info_static())) { + if (!one_of(op->get_type_info(), ngraph::op::v0::Range::get_type_info_static(), ngraph::op::v4::Range::get_type_info_static())) { errorMessage = "Only opset1 and opset4 Range operation is supported"; return false; } @@ -23,8 +26,8 @@ bool MKLDNNRangeNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { +Range::Range(const std::shared_ptr& op, const mkldnn::engine& eng, + WeightsSharing::Ptr &cache) : Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -52,7 +55,7 @@ MKLDNNRangeNode::MKLDNNRangeNode(const std::shared_ptr& op, const IE_THROW() << errorPrefix << " has unsupported rank for output: " << dstRank; } -void MKLDNNRangeNode::initSupportedPrimitiveDescriptors() { +void Range::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -83,15 +86,15 @@ void MKLDNNRangeNode::initSupportedPrimitiveDescriptors() { } } -std::vector MKLDNNRangeNode::shapeInfer() const { - return MKLDNNNode::shapeInferGeneric(PortMask(RANGE_START, RANGE_LIMIT, RANGE_DELTA)); +std::vector Range::shapeInfer() const { + return Node::shapeInferGeneric(PortMask(RANGE_START, RANGE_LIMIT, RANGE_DELTA)); } -void MKLDNNRangeNode::executeDynamicImpl(mkldnn::stream strm) { +void Range::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -void MKLDNNRangeNode::execute(mkldnn::stream strm) { +void Range::execute(mkldnn::stream strm) { StatusCode retcode = OK; switch (getParentEdgeAt(0)->getMemory().getDesc().getPrecision()) { case Precision::FP32: @@ -110,7 +113,7 @@ void MKLDNNRangeNode::execute(mkldnn::stream strm) { } template -size_t MKLDNNRangeNode::getWorkAmount(data_t *startPtr, data_t *stopPtr, data_t *stepPtr) const { +size_t Range::getWorkAmount(data_t *startPtr, data_t *stopPtr, data_t *stepPtr) const { data_t start = 0, limit = 0, delta = 0; if (startPtr == nullptr) startPtr = &start; @@ -133,7 +136,7 @@ size_t MKLDNNRangeNode::getWorkAmount(data_t *startPtr, data_t *stopPtr, data_t } template -InferenceEngine::StatusCode MKLDNNRangeNode::rangeKernel() { +InferenceEngine::StatusCode Range::rangeKernel() { data_t start = 0, delta = 0; size_t work_amount_dst = getWorkAmount(&start, nullptr, &delta); if (isDynamicNode()) { @@ -151,8 +154,11 @@ InferenceEngine::StatusCode MKLDNNRangeNode::rangeKernel() { }); return OK; } -bool MKLDNNRangeNode::created() const { - return getType() == Range; + +bool Range::created() const { + return getType() == Type::Range; } -REG_MKLDNN_PRIM_FOR(MKLDNNRangeNode, Range) +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/range.h b/src/plugins/intel_cpu/src/nodes/range.h index cd147193d05..b7b0ec7d05e 100644 --- a/src/plugins/intel_cpu/src/nodes/range.h +++ b/src/plugins/intel_cpu/src/nodes/range.h @@ -9,10 +9,11 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNRangeNode : public MKLDNNNode { +class Range : public Node { public: - MKLDNNRangeNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + Range(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; @@ -37,5 +38,6 @@ private: std::string errorPrefix; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/reduce.cpp b/src/plugins/intel_cpu/src/nodes/reduce.cpp index 34dadc7349d..afcb219f4cd 100644 --- a/src/plugins/intel_cpu/src/nodes/reduce.cpp +++ b/src/plugins/intel_cpu/src/nodes/reduce.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include "utils/bfloat16.hpp" #include "emitters/jit_bf16_emitters.hpp" #include "ie_parallel.hpp" @@ -27,7 +27,6 @@ #include using namespace mkldnn; -using namespace ov::intel_cpu; using namespace InferenceEngine; using namespace mkldnn::impl; using namespace mkldnn::impl::cpu::x64; @@ -71,7 +70,11 @@ using namespace Xbyak; #define GET_PTR_NCD_BASE_PTR_N_BLK const uint8_t *in_ptr_ncd = in_ptr_n + src_data_size * (icb * ID + id) * IH * IW * blk_size; \ uint8_t *out_ptr_ncd = out_ptr_n + dst_data_size * (ocb * OD + od) * OH * OW * blk_size; +namespace ov { +namespace intel_cpu { +namespace node { namespace { + struct ReduceKey { jit_reduce_config_params jcp; mkldnn::post_ops postOps; @@ -118,7 +121,7 @@ struct jit_uni_reduce_kernel_f32 : public jit_uni_reduce_kernel, public jit_gene } void generate() override { - if (jcp_.reduce_mode == ReduceLogSumExp) { + if (jcp_.reduce_mode == Algorithm::ReduceLogSumExp) { exp_injector = std::make_shared>(this, alg_kind::eltwise_exp, 0.f, 0.f, 1); } @@ -136,15 +139,15 @@ struct jit_uni_reduce_kernel_f32 : public jit_uni_reduce_kernel, public jit_gene if (planar_layout) mov(reg_reduce_w, ptr[reg_params + GET_OFF(reduce_w)]); - if (jcp_.reduce_mode == ReduceAnd || jcp_.reduce_mode == ReduceL1 || jcp_.reduce_mode == ReduceMax || - jcp_.reduce_mode == ReduceMin || jcp_.reduce_mode == ReduceProd || jcp_.reduce_mode == ReduceOr) { + if (jcp_.reduce_mode == Algorithm::ReduceAnd || jcp_.reduce_mode == Algorithm::ReduceL1 || jcp_.reduce_mode == Algorithm::ReduceMax || + jcp_.reduce_mode == Algorithm::ReduceMin || jcp_.reduce_mode == Algorithm::ReduceProd || jcp_.reduce_mode == Algorithm::ReduceOr) { mov(reg_table, l_table); } - if (isa == cpu::x64::avx512_common || jcp_.reduce_mode == ReduceAnd || jcp_.reduce_mode == ReduceOr) + if (isa == cpu::x64::avx512_common || jcp_.reduce_mode == Algorithm::ReduceAnd || jcp_.reduce_mode == Algorithm::ReduceOr) uni_vpxor(vmm_zero, vmm_zero, vmm_zero); - if ((isa == cpu::x64::avx512_common && jcp_.reduce_mode == ReduceAnd) || jcp_.reduce_mode == ReduceOr) { + if ((isa == cpu::x64::avx512_common && jcp_.reduce_mode == Algorithm::ReduceAnd) || jcp_.reduce_mode == Algorithm::ReduceOr) { uni_vmovups(vmm_aux, table_val(0)); } @@ -156,10 +159,10 @@ struct jit_uni_reduce_kernel_f32 : public jit_uni_reduce_kernel, public jit_gene if (!mayiuse(avx512_core_bf16) && mayiuse(avx512_core)) emu_vcvtneps2bf16->emit_data(); - if (jcp_.reduce_mode == ReduceAnd || jcp_.reduce_mode == ReduceL1 || jcp_.reduce_mode == ReduceMax || - jcp_.reduce_mode == ReduceMin || jcp_.reduce_mode == ReduceProd || jcp_.reduce_mode == ReduceOr) { + if (jcp_.reduce_mode == Algorithm::ReduceAnd || jcp_.reduce_mode == Algorithm::ReduceL1 || jcp_.reduce_mode == Algorithm::ReduceMax || + jcp_.reduce_mode == Algorithm::ReduceMin || jcp_.reduce_mode == Algorithm::ReduceProd || jcp_.reduce_mode == Algorithm::ReduceOr) { prepare_aux_table(); - } else if (jcp_.reduce_mode == ReduceLogSumExp) { + } else if (jcp_.reduce_mode == Algorithm::ReduceLogSumExp) { exp_injector->prepare_table(); } } @@ -288,7 +291,7 @@ private: cmp(reg_work_amount, step); jl(reduce_main_end_label, T_NEAR); //avoid illegal loading and storing - if (jcp_.reduce_mode == ReduceL1) { + if (jcp_.reduce_mode == Algorithm::ReduceL1) { uni_vmovups(vmm_aux, table_val(1)); } @@ -310,30 +313,30 @@ private: { // init dst, dst loading is embedded in horiz_reduce_store switch (jcp_.reduce_mode) { - case ReduceAnd: - case ReduceProd: + case Algorithm::ReduceAnd: + case Algorithm::ReduceProd: uni_vmovups(vmm_dst, table_val(0)); break; - case ReduceL1: + case Algorithm::ReduceL1: uni_vmovups(vmm_aux, table_val(1)); uni_vpxor(vmm_dst, vmm_dst, vmm_dst); break; - case ReduceL2: - case ReduceLogSum: - case ReduceLogSumExp: - case ReduceMean: - case ReduceOr: - case ReduceSum: - case ReduceSumSquare: + case Algorithm::ReduceL2: + case Algorithm::ReduceLogSum: + case Algorithm::ReduceLogSumExp: + case Algorithm::ReduceMean: + case Algorithm::ReduceOr: + case Algorithm::ReduceSum: + case Algorithm::ReduceSumSquare: uni_vpxor(vmm_dst, vmm_dst, vmm_dst); break; - case ReduceMax: + case Algorithm::ReduceMax: if (isFloatCompatible(jcp_.dst_dt)) uni_vmovups(vmm_dst, table_val(2)); else uni_vmovups(vmm_dst, table_val(4)); break; - case ReduceMin: + case Algorithm::ReduceMin: if (isFloatCompatible(jcp_.dst_dt)) uni_vmovups(vmm_dst, table_val(3)); else @@ -344,7 +347,7 @@ private: } // reduce reduce_main_loop(); - if (jcp_.reduce_mode == ReduceOr && isa != cpu::x64::avx512_common) { + if (jcp_.reduce_mode == Algorithm::ReduceOr && isa != cpu::x64::avx512_common) { uni_cmpneqps(vmm_dst, vmm_dst, vmm_zero); uni_vandps(vmm_dst, vmm_dst, vmm_aux); } @@ -366,7 +369,7 @@ private: mov(reg_idx, ptr[reg_params + GET_OFF(idx)]); uni_vmovdqu(vmm_idx, ptr[reg_idx]); - if (jcp_.reduce_mode == ReduceL1) { + if (jcp_.reduce_mode == Algorithm::ReduceL1) { uni_vmovups(vmm_aux, table_val(1)); } @@ -402,7 +405,7 @@ private: } inline void reduce_tail() { - if (jcp_.reduce_mode == ReduceL1) { + if (jcp_.reduce_mode == Algorithm::ReduceL1) { uni_vmovups(xmm_aux, table_val(1)); } @@ -443,7 +446,7 @@ private: load_scalar(xmm_src, ptr[reg_src], jcp_.src_dt); reduce_kernel_scalar(xmm_src, xmm_dst); - if (jcp_.reduce_mode == ReduceOr) { + if (jcp_.reduce_mode == Algorithm::ReduceOr) { uni_cmpneqps(xmm_dst, xmm_dst, xmm_zero); uni_vandps(xmm_dst, xmm_dst, xmm_aux); } @@ -677,7 +680,7 @@ private: inline void reduce_once_tail() { load_scalar(xmm_src, ptr[reg_src], jcp_.src_dt); reduce_kernel_scalar(xmm_src, xmm_dst); - if (jcp_.reduce_mode == ReduceOr) { + if (jcp_.reduce_mode == Algorithm::ReduceOr) { uni_cmpneqps(xmm_dst, xmm_dst, xmm_zero); uni_vandps(xmm_dst, xmm_dst, xmm_aux); } @@ -696,7 +699,7 @@ private: load_scalar(xmm_src, ptr[reg_src_aux], jcp_.src_dt); reduce_kernel_scalar(xmm_src, xmm_dst); - if (jcp_.reduce_mode == ReduceOr) { + if (jcp_.reduce_mode == Algorithm::ReduceOr) { uni_cmpneqps(xmm_dst, xmm_dst, xmm_zero); uni_vandps(xmm_dst, xmm_dst, xmm_aux); } @@ -736,7 +739,7 @@ private: inline void reduce_kernel(Vmm vmm_src, Vmm vmm_dst) { switch (jcp_.reduce_mode) { - case ReduceAnd: + case Algorithm::ReduceAnd: if (isa == cpu::x64::avx512_common) { vcmpps(k_mask, vmm_src, vmm_zero, _cmp_neq_uq); vblendmps(vmm_src | k_mask, vmm_zero, vmm_aux); @@ -745,38 +748,38 @@ private: } uni_vandps(vmm_dst, vmm_dst, vmm_src); break; - case ReduceL1: + case Algorithm::ReduceL1: uni_vandps(vmm_src, vmm_src, vmm_aux); uni_vaddps(vmm_dst, vmm_dst, vmm_src); break; - case ReduceLogSum: - case ReduceMean: - case ReduceSum: + case Algorithm::ReduceLogSum: + case Algorithm::ReduceMean: + case Algorithm::ReduceSum: uni_vaddps(vmm_dst, vmm_dst, vmm_src); break; - case ReduceMax: + case Algorithm::ReduceMax: uni_vmaxps(vmm_dst, vmm_dst, vmm_src); break; - case ReduceMin: + case Algorithm::ReduceMin: uni_vminps(vmm_dst, vmm_dst, vmm_src); break; - case ReduceL2: - case ReduceSumSquare: + case Algorithm::ReduceL2: + case Algorithm::ReduceSumSquare: uni_vmulps(vmm_src, vmm_src, vmm_src); uni_vaddps(vmm_dst, vmm_dst, vmm_src); break; - case ReduceLogSumExp: + case Algorithm::ReduceLogSumExp: exp_injector->compute_vector_range(vmm_src.getIdx(), vmm_src.getIdx() + 1); uni_vaddps(vmm_dst, vmm_dst, vmm_src); break; - case ReduceOr: + case Algorithm::ReduceOr: if (isa == cpu::x64::avx512_common) { vcmpps(k_mask, vmm_src, vmm_zero, _cmp_neq_uq); vblendmps(vmm_src | k_mask, vmm_zero, vmm_aux); } uni_vorps(vmm_dst, vmm_dst, vmm_src); break; - case ReduceProd: + case Algorithm::ReduceProd: uni_vmulps(vmm_dst, vmm_dst, vmm_src); break; default: @@ -786,38 +789,38 @@ private: inline void reduce_kernel_scalar(Xmm xmm_src, Xmm xmm_dst) { switch (jcp_.reduce_mode) { - case ReduceAnd: + case Algorithm::ReduceAnd: uni_cmpneqps(xmm_src, xmm_src, xmm_zero); uni_vandps(xmm_dst, xmm_dst, xmm_src); break; - case ReduceL1: + case Algorithm::ReduceL1: uni_vandps(xmm_src, xmm_src, xmm_aux); uni_vaddps(xmm_dst, xmm_dst, xmm_src); break; - case ReduceLogSum: - case ReduceMean: - case ReduceSum: + case Algorithm::ReduceLogSum: + case Algorithm::ReduceMean: + case Algorithm::ReduceSum: uni_vaddps(xmm_dst, xmm_dst, xmm_src); break; - case ReduceMax: + case Algorithm::ReduceMax: uni_vmaxps(xmm_dst, xmm_dst, xmm_src); break; - case ReduceMin: + case Algorithm::ReduceMin: uni_vminps(xmm_dst, xmm_dst, xmm_src); break; - case ReduceL2: - case ReduceSumSquare: + case Algorithm::ReduceL2: + case Algorithm::ReduceSumSquare: uni_vmulps(xmm_src, xmm_src, xmm_src); uni_vaddps(xmm_dst, xmm_dst, xmm_src); break; - case ReduceLogSumExp: + case Algorithm::ReduceLogSumExp: exp_injector->compute_vector_range(xmm_src.getIdx(), xmm_src.getIdx() + 1); uni_vaddps(xmm_dst, xmm_dst, xmm_src); break; - case ReduceOr: + case Algorithm::ReduceOr: uni_vorps(xmm_dst, xmm_dst, xmm_src); break; - case ReduceProd: + case Algorithm::ReduceProd: uni_vmulps(xmm_dst, xmm_dst, xmm_src); break; default: @@ -832,7 +835,7 @@ private: } inline void store_dst_vector() { - if (jcp_.reduce_mode == ReduceOr && isa != cpu::x64::avx512_common) { + if (jcp_.reduce_mode == Algorithm::ReduceOr && isa != cpu::x64::avx512_common) { uni_cmpneqps(vmm_dst, vmm_dst, vmm_zero); uni_vandps(vmm_dst, vmm_dst, vmm_aux); @@ -1018,28 +1021,28 @@ private: inline void horiz_ps(const Xmm& xmm, const Operand& op) { switch (jcp_.reduce_mode) { - case ReduceAnd: + case Algorithm::ReduceAnd: uni_vandps(xmm, xmm, op); break; - case ReduceL1: - case ReduceL2: - case ReduceLogSum: - case ReduceMean: - case ReduceSum: - case ReduceSumSquare: - case ReduceLogSumExp: + case Algorithm::ReduceL1: + case Algorithm::ReduceL2: + case Algorithm::ReduceLogSum: + case Algorithm::ReduceMean: + case Algorithm::ReduceSum: + case Algorithm::ReduceSumSquare: + case Algorithm::ReduceLogSumExp: uni_vaddps(xmm, xmm, op); break; - case ReduceMax: + case Algorithm::ReduceMax: uni_vmaxps(xmm, xmm, op); break; - case ReduceMin: + case Algorithm::ReduceMin: uni_vminps(xmm, xmm, op); break; - case ReduceOr: + case Algorithm::ReduceOr: uni_vorps(xmm, xmm, op); break; - case ReduceProd: + case Algorithm::ReduceProd: uni_vmulps(xmm, xmm, op); break; default: @@ -1103,7 +1106,7 @@ struct jit_uni_reduce_post_kernel_f32 : public jit_uni_reduce_post_kernel, publi } } - if (jcp_.reduce_mode == ReduceLogSum || jcp_.reduce_mode == ReduceLogSumExp) { + if (jcp_.reduce_mode == Algorithm::ReduceLogSum || jcp_.reduce_mode == Algorithm::ReduceLogSumExp) { log_injector = std::make_shared>(this, alg_kind::eltwise_log, 0.f, 0.f, 1.f); } @@ -1159,7 +1162,7 @@ struct jit_uni_reduce_post_kernel_f32 : public jit_uni_reduce_post_kernel, publi if (!mayiuse(avx512_core_bf16) && mayiuse(avx512_core)) emu_vcvtneps2bf16->emit_data(); - if (jcp_.reduce_mode == ReduceLogSum || jcp_.reduce_mode == ReduceLogSumExp) { + if (jcp_.reduce_mode == Algorithm::ReduceLogSum || jcp_.reduce_mode == Algorithm::ReduceLogSumExp) { log_injector->prepare_table(); } @@ -1259,9 +1262,9 @@ private: // cases: [ReduceL2] [ReduceLogSum] [ReduceLogSumExp] [ReduceMean] L(reduce_map_label); { - if (jcp_.reduce_mode == ReduceL2 || jcp_.reduce_mode == ReduceMean || - jcp_.reduce_mode == ReduceLogSum || jcp_.reduce_mode == ReduceLogSumExp) { - if (jcp_.reduce_mode == ReduceMean) + if (jcp_.reduce_mode == Algorithm::ReduceL2 || jcp_.reduce_mode == Algorithm::ReduceMean || + jcp_.reduce_mode == Algorithm::ReduceLogSum || jcp_.reduce_mode == Algorithm::ReduceLogSumExp) { + if (jcp_.reduce_mode == Algorithm::ReduceMean) uni_vbroadcastss(vmm_aux, ptr[reg_divisor]); Xbyak::Label reduce_loop_label; @@ -1341,9 +1344,9 @@ private: inline void reduce_post_tail() { // reduce map for tail in dst memory // cases: [ReduceL2] [ReduceLogSum] [ReduceLogSumExp] [ReduceMean] in planar layout - if (jcp_.reduce_mode == ReduceL2 || jcp_.reduce_mode == ReduceMean || - jcp_.reduce_mode == ReduceLogSum || jcp_.reduce_mode == ReduceLogSumExp) { - if (jcp_.reduce_mode == ReduceMean) + if (jcp_.reduce_mode == Algorithm::ReduceL2 || jcp_.reduce_mode == Algorithm::ReduceMean || + jcp_.reduce_mode == Algorithm::ReduceLogSum || jcp_.reduce_mode == Algorithm::ReduceLogSumExp) { + if (jcp_.reduce_mode == Algorithm::ReduceMean) uni_vbroadcastss(xmm_aux, ptr[reg_divisor]); Xbyak::Label reduce_loop_label; @@ -1448,20 +1451,20 @@ private: } inline void reduce_map_kernel(Vmm vmm_dst) { - if (jcp_.reduce_mode == ReduceMean) + if (jcp_.reduce_mode == Algorithm::ReduceMean) uni_vdivps(vmm_dst, vmm_dst, vmm_aux); - else if (jcp_.reduce_mode == ReduceL2) + else if (jcp_.reduce_mode == Algorithm::ReduceL2) uni_vsqrtps(vmm_dst, vmm_dst); - else if (jcp_.reduce_mode == ReduceLogSum || jcp_.reduce_mode == ReduceLogSumExp) + else if (jcp_.reduce_mode == Algorithm::ReduceLogSum || jcp_.reduce_mode == Algorithm::ReduceLogSumExp) log_injector->compute_vector_range(vmm_dst.getIdx(), vmm_dst.getIdx() + 1); } inline void reduce_map_kernel_scalar(Xmm xmm_dst) { - if (jcp_.reduce_mode == ReduceMean) + if (jcp_.reduce_mode == Algorithm::ReduceMean) uni_vdivps(xmm_dst, xmm_dst, xmm_aux); - else if (jcp_.reduce_mode == ReduceL2) + else if (jcp_.reduce_mode == Algorithm::ReduceL2) uni_vsqrtps(xmm_dst, xmm_dst); - else if (jcp_.reduce_mode == ReduceLogSum || jcp_.reduce_mode == ReduceLogSumExp) + else if (jcp_.reduce_mode == Algorithm::ReduceLogSum || jcp_.reduce_mode == Algorithm::ReduceLogSumExp) log_injector->compute_vector_range(xmm_dst.getIdx(), xmm_dst.getIdx() + 1); } @@ -1637,28 +1640,28 @@ private: inline void horiz_ps(const Xmm& xmm, const Operand& op) { switch (jcp_.reduce_mode) { - case ReduceAnd: + case Algorithm::ReduceAnd: uni_vandps(xmm, xmm, op); break; - case ReduceL1: - case ReduceL2: - case ReduceLogSum: - case ReduceMean: - case ReduceSum: - case ReduceSumSquare: - case ReduceLogSumExp: + case Algorithm::ReduceL1: + case Algorithm::ReduceL2: + case Algorithm::ReduceLogSum: + case Algorithm::ReduceMean: + case Algorithm::ReduceSum: + case Algorithm::ReduceSumSquare: + case Algorithm::ReduceLogSumExp: uni_vaddps(xmm, xmm, op); break; - case ReduceMax: + case Algorithm::ReduceMax: uni_vmaxps(xmm, xmm, op); break; - case ReduceMin: + case Algorithm::ReduceMin: uni_vminps(xmm, xmm, op); break; - case ReduceOr: + case Algorithm::ReduceOr: uni_vorps(xmm, xmm, op); break; - case ReduceProd: + case Algorithm::ReduceProd: uni_vmulps(xmm, xmm, op); break; default: @@ -1667,37 +1670,37 @@ private: } }; -const std::map&, MKLDNNReduceNode&)>> MKLDNNReduceNode::initializers = { - {ngraph::opset4::ReduceL1::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNReduceNode& node) { - node.algorithm = ReduceL1; +const std::map&, Reduce&)>> Reduce::initializers = { + {ngraph::opset4::ReduceL1::get_type_info_static(), [](const std::shared_ptr& op, Reduce& node) { + node.algorithm = Algorithm::ReduceL1; }}, - {ngraph::opset4::ReduceL2::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNReduceNode& node) { - node.algorithm = ReduceL2; + {ngraph::opset4::ReduceL2::get_type_info_static(), [](const std::shared_ptr& op, Reduce& node) { + node.algorithm = Algorithm::ReduceL2; }}, - {ngraph::opset1::ReduceLogicalAnd::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNReduceNode& node) { - node.algorithm = ReduceAnd; + {ngraph::opset1::ReduceLogicalAnd::get_type_info_static(), [](const std::shared_ptr& op, Reduce& node) { + node.algorithm = Algorithm::ReduceAnd; }}, - {ngraph::opset1::ReduceLogicalOr::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNReduceNode& node) { - node.algorithm = ReduceOr; + {ngraph::opset1::ReduceLogicalOr::get_type_info_static(), [](const std::shared_ptr& op, Reduce& node) { + node.algorithm = Algorithm::ReduceOr; }}, - {ngraph::opset1::ReduceMax::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNReduceNode& node) { - node.algorithm = ReduceMax; + {ngraph::opset1::ReduceMax::get_type_info_static(), [](const std::shared_ptr& op, Reduce& node) { + node.algorithm = Algorithm::ReduceMax; }}, - {ngraph::opset1::ReduceMean::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNReduceNode& node) { - node.algorithm = ReduceMean; + {ngraph::opset1::ReduceMean::get_type_info_static(), [](const std::shared_ptr& op, Reduce& node) { + node.algorithm = Algorithm::ReduceMean; }}, - {ngraph::opset1::ReduceMin::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNReduceNode& node) { - node.algorithm = ReduceMin; + {ngraph::opset1::ReduceMin::get_type_info_static(), [](const std::shared_ptr& op, Reduce& node) { + node.algorithm = Algorithm::ReduceMin; }}, - {ngraph::opset1::ReduceProd::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNReduceNode& node) { - node.algorithm = ReduceProd; + {ngraph::opset1::ReduceProd::get_type_info_static(), [](const std::shared_ptr& op, Reduce& node) { + node.algorithm = Algorithm::ReduceProd; }}, - {ngraph::opset1::ReduceSum::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNReduceNode& node) { - node.algorithm = ReduceSum; + {ngraph::opset1::ReduceSum::get_type_info_static(), [](const std::shared_ptr& op, Reduce& node) { + node.algorithm = Algorithm::ReduceSum; }} }; -bool MKLDNNReduceNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool Reduce::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { if (std::dynamic_pointer_cast(op) == nullptr && std::dynamic_pointer_cast(op) == nullptr) { @@ -1732,8 +1735,8 @@ bool MKLDNNReduceNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) - : MKLDNNNode(op, eng, cache) { +Reduce::Reduce(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache) + : Node(op, eng, cache) { std::string errorMessage; if (isSupportedOperation(op, errorMessage)) { errorPrefix = "Reduce node with name '" + getName() + "'"; @@ -1757,7 +1760,7 @@ MKLDNNReduceNode::MKLDNNReduceNode(const std::shared_ptr& op, cons } } -void MKLDNNReduceNode::getSupportedDescriptors() { +void Reduce::getSupportedDescriptors() { if (!descs.empty()) return; @@ -1782,7 +1785,7 @@ void MKLDNNReduceNode::getSupportedDescriptors() { } } -void MKLDNNReduceNode::initSupportedPrimitiveDescriptors() { +void Reduce::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -1801,8 +1804,8 @@ void MKLDNNReduceNode::initSupportedPrimitiveDescriptors() { if (Precision::BF16 == output_prec) { if (!mayiuse(avx512_core)) { output_prec = Precision::FP32; - } else if (algorithm != ReduceAnd && algorithm != ReduceOr && - algorithm != ReduceMin && algorithm != ReduceMax) { + } else if (algorithm != Algorithm::ReduceAnd && algorithm != Algorithm::ReduceOr && + algorithm != Algorithm::ReduceMin && algorithm != Algorithm::ReduceMax) { output_prec = Precision::FP32; } } @@ -1867,15 +1870,15 @@ void MKLDNNReduceNode::initSupportedPrimitiveDescriptors() { } } -bool MKLDNNReduceNode::isExecutable() const { +bool Reduce::isExecutable() const { return !isInputTensorAtPortEmpty(REDUCE_DATA); } -std::vector MKLDNNReduceNode::shapeInfer() const { - return MKLDNNNode::shapeInferGeneric(PortMask(REDUCE_INDEXES)); +std::vector Reduce::shapeInfer() const { + return Node::shapeInferGeneric(PortMask(REDUCE_INDEXES)); } -void MKLDNNReduceNode::prepareParams() { +void Reduce::prepareParams() { src_dims = getParentEdgesAtPort(REDUCE_DATA)[0]->getMemory().getDesc().getShape().getDims(); std::vector reduce_axes; if (jit_mode && jit_beyond_5D) { @@ -1927,7 +1930,7 @@ void MKLDNNReduceNode::prepareParams() { } } -void MKLDNNReduceNode::createPrimitive() { +void Reduce::createPrimitive() { if (!isExecutable()) { return; } @@ -1956,10 +1959,10 @@ void MKLDNNReduceNode::createPrimitive() { auto selectedPD = getSelectedPrimitiveDescriptor(); jcp = jit_reduce_config_params(); - jcp.src_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(selectedPD->getConfig().inConfs[REDUCE_DATA].getMemDesc()->getPrecision()); - jcp.dst_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(selectedPD->getConfig().outConfs[0].getMemDesc()->getPrecision()); - jcp.src_data_size = MKLDNNExtensionUtils::sizeOfDataType(jcp.src_dt); - jcp.dst_data_size = MKLDNNExtensionUtils::sizeOfDataType(jcp.dst_dt); + jcp.src_dt = DnnlExtensionUtils::IEPrecisionToDataType(selectedPD->getConfig().inConfs[REDUCE_DATA].getMemDesc()->getPrecision()); + jcp.dst_dt = DnnlExtensionUtils::IEPrecisionToDataType(selectedPD->getConfig().outConfs[0].getMemDesc()->getPrecision()); + jcp.src_data_size = DnnlExtensionUtils::sizeOfDataType(jcp.src_dt); + jcp.dst_data_size = DnnlExtensionUtils::sizeOfDataType(jcp.dst_dt); jcp.layout = layout; jcp.reduce_mode = getAlgorithm(); @@ -1986,11 +1989,11 @@ void MKLDNNReduceNode::createPrimitive() { jit_mode = jit_mode && reduce_kernel; } -void MKLDNNReduceNode::executeDynamicImpl(mkldnn::stream strm) { +void Reduce::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -void MKLDNNReduceNode::execute(mkldnn::stream strm) { +void Reduce::execute(mkldnn::stream strm) { auto &dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); auto &srcMemPtr = getParentEdgeAt(REDUCE_DATA)->getMemoryPtr(); @@ -2013,7 +2016,7 @@ void MKLDNNReduceNode::execute(mkldnn::stream strm) { } } -void MKLDNNReduceNode::reduce_type(const uint8_t *in_ptr, uint8_t *out_ptr, size_t dst_size) { +void Reduce::reduce_type(const uint8_t *in_ptr, uint8_t *out_ptr, size_t dst_size) { init_dst_data(out_ptr, dst_size); reduce_stride = IW; @@ -2039,7 +2042,7 @@ void MKLDNNReduceNode::reduce_type(const uint8_t *in_ptr, uint8_t *out_ptr, size } } -void MKLDNNReduceNode::reduce_PLN(const uint8_t *in_ptr, uint8_t *out_ptr) { +void Reduce::reduce_PLN(const uint8_t *in_ptr, uint8_t *out_ptr) { if (ReduceN && !ReduceC && !ReduceD && !ReduceH && !ReduceW) { size_t IA = IC * ID * IH * IW; reduce_stride = IA; @@ -2193,7 +2196,7 @@ void MKLDNNReduceNode::reduce_PLN(const uint8_t *in_ptr, uint8_t *out_ptr) { reduce_kernel_post_process(out_ptr); } -void MKLDNNReduceNode::reduce_BLK(const uint8_t *in_ptr, uint8_t *out_ptr) { +void Reduce::reduce_BLK(const uint8_t *in_ptr, uint8_t *out_ptr) { size_t ICB = div_up(IC, blk_size); size_t OCB = div_up(OC, blk_size); @@ -2205,8 +2208,8 @@ void MKLDNNReduceNode::reduce_BLK(const uint8_t *in_ptr, uint8_t *out_ptr) { reduce_kernel_process(in_ptr_ncd, out_ptr_ncd, IH * IW * blk_size); }); } else if (ReduceC && ReduceD && ReduceH && ReduceW) { - if (input_prec != output_prec || getAlgorithm() == ReduceL2 || - algorithm == ReduceLogSumExp || algorithm == ReduceSumSquare) { + if (input_prec != output_prec || getAlgorithm() == Algorithm::ReduceL2 || + algorithm == Algorithm::ReduceLogSumExp || algorithm == Algorithm::ReduceSumSquare) { reduce_kernel_process(in_ptr_n, out_ptr_n, ICB * ID * IH * IW * blk_size); } else { // reduce parallelly @@ -2263,7 +2266,7 @@ void MKLDNNReduceNode::reduce_BLK(const uint8_t *in_ptr, uint8_t *out_ptr) { reduce_kernel_post_process(out_ptr); } -void MKLDNNReduceNode::reduce_BLK_concern_padding(const uint8_t *in_ptr, uint8_t *out_ptr) { +void Reduce::reduce_BLK_concern_padding(const uint8_t *in_ptr, uint8_t *out_ptr) { size_t ICB = div_up(IC, blk_size); size_t OCB = div_up(OC, blk_size); @@ -2347,7 +2350,7 @@ void MKLDNNReduceNode::reduce_BLK_concern_padding(const uint8_t *in_ptr, uint8_t reduce_kernel_post_process(out_ptr); } -inline void MKLDNNReduceNode::reduce_kernel_process(const uint8_t *in_p, uint8_t *out_p, size_t work_amount, +inline void Reduce::reduce_kernel_process(const uint8_t *in_p, uint8_t *out_p, size_t work_amount, size_t reduce_w, size_t work_batch, const int *tab_idx) { auto arg = jit_reduce_call_args(); arg.src = static_cast(in_p); @@ -2361,7 +2364,7 @@ inline void MKLDNNReduceNode::reduce_kernel_process(const uint8_t *in_p, uint8_t (*reduce_kernel)(&arg); } -inline void MKLDNNReduceNode::reduce_kernel_post_process(uint8_t *out_ptr) { +inline void Reduce::reduce_kernel_post_process(uint8_t *out_ptr) { const size_t integerDivisor = IB * IC * ID * IH * IW / (OB * OC * OD * OH * OW); const float divisor = static_cast(integerDivisor); if (layout == ReduceLayoutType::reduce_ncsp || layout == ReduceLayoutType::reduce_nspc) { @@ -2392,7 +2395,7 @@ inline void MKLDNNReduceNode::reduce_kernel_post_process(uint8_t *out_ptr) { } } -void MKLDNNReduceNode::nspc2ncsp(uint8_t *proc_ptr, uint8_t *out_ptr) { +void Reduce::nspc2ncsp(uint8_t *proc_ptr, uint8_t *out_ptr) { // dimension reinterpret after nspc reusing routine reduce_PLN // demote -- nspc -- ncsp // DIM0 -- B -- B @@ -2447,7 +2450,7 @@ void MKLDNNReduceNode::nspc2ncsp(uint8_t *proc_ptr, uint8_t *out_ptr) { } } -void MKLDNNReduceNode::blocked2ncsp(uint8_t *proc_ptr, uint8_t *out_ptr) { +void Reduce::blocked2ncsp(uint8_t *proc_ptr, uint8_t *out_ptr) { const size_t DIM0 = OB; const size_t DIM1 = OC; const size_t DIM2 = OD; @@ -2523,20 +2526,20 @@ void MKLDNNReduceNode::blocked2ncsp(uint8_t *proc_ptr, uint8_t *out_ptr) { } } -inline void MKLDNNReduceNode::init_dst_data(uint8_t *out_ptr, size_t dst_size) { +inline void Reduce::init_dst_data(uint8_t *out_ptr, size_t dst_size) { switch (algorithm) { - case ReduceL1: - case ReduceL2: - case ReduceLogSum: - case ReduceLogSumExp: - case ReduceMean: - case ReduceOr: - case ReduceSum: - case ReduceSumSquare: + case Algorithm::ReduceL1: + case Algorithm::ReduceL2: + case Algorithm::ReduceLogSum: + case Algorithm::ReduceLogSumExp: + case Algorithm::ReduceMean: + case Algorithm::ReduceOr: + case Algorithm::ReduceSum: + case Algorithm::ReduceSumSquare: memset(out_ptr, 0, dst_size); break; - case ReduceAnd: - case ReduceProd: + case Algorithm::ReduceAnd: + case Algorithm::ReduceProd: if (output_prec == Precision::FP32) { auto out_p = reinterpret_cast(out_ptr); parallel_for(dst_size / dst_data_size, [&](size_t i) { out_p[i] = static_cast(1); }); @@ -2554,7 +2557,7 @@ inline void MKLDNNReduceNode::init_dst_data(uint8_t *out_ptr, size_t dst_size) { parallel_for(dst_size / dst_data_size, [&](size_t i) { out_p[i] = static_cast(1); }); } break; - case ReduceMax: + case Algorithm::ReduceMax: if (output_prec == Precision::FP32) { auto out_p = reinterpret_cast(out_ptr); parallel_for(dst_size / dst_data_size, [&](size_t i) { out_p[i] = std::numeric_limits::lowest(); }); @@ -2572,7 +2575,7 @@ inline void MKLDNNReduceNode::init_dst_data(uint8_t *out_ptr, size_t dst_size) { parallel_for(dst_size / dst_data_size, [&](size_t i) { out_p[i] = std::numeric_limits::min(); }); } break; - case ReduceMin: + case Algorithm::ReduceMin: if (output_prec == Precision::FP32) { auto out_p = reinterpret_cast(out_ptr); parallel_for(dst_size / dst_data_size, [&](size_t i) { out_p[i] = std::numeric_limits::max(); }); @@ -2595,18 +2598,18 @@ inline void MKLDNNReduceNode::init_dst_data(uint8_t *out_ptr, size_t dst_size) { } } -inline void MKLDNNReduceNode::create_working_memory() { +inline void Reduce::create_working_memory() { auto rank = getInputShapeAtPort(REDUCE_DATA).getRank(); memory::format_tag format = (layout == ReduceLayoutType::reduce_nspc) ? (rank == 4 ? memory::format_tag::nhwc : memory::format_tag::ndhwc) : (rank == 4 ? (mayiuse(cpu::x64::avx512_common) ? memory::format_tag::nChw16c : memory::format_tag::nChw8c) : (mayiuse(cpu::x64::avx512_common) ? memory::format_tag::nCdhw16c : memory::format_tag::nCdhw8c)); auto prc_dims = rank == 4 ? std::vector{OB, OC, OH, OW} : std::vector{OB, OC, OD, OH, OW}; - auto desc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(prc_dims), MKLDNNExtensionUtils::IEPrecisionToDataType(output_prec), format); + auto desc = mkldnn::memory::desc(DnnlExtensionUtils::convertToDnnlDims(prc_dims), DnnlExtensionUtils::IEPrecisionToDataType(output_prec), format); prc_mem = std::make_shared(desc, getEngine()); dst_size = desc.get_size(); } -inline void MKLDNNReduceNode::calc_process_dst_dims(std::vector &reduce_axes, const SizeVector &dst_dims) { +inline void Reduce::calc_process_dst_dims(std::vector &reduce_axes, const SizeVector &dst_dims) { std::set axes; SizeVector out_dims; process_dst_dims.clear(); @@ -2647,7 +2650,7 @@ inline void MKLDNNReduceNode::calc_process_dst_dims(std::vector &reduce_axe } } -inline void MKLDNNReduceNode::set_reduce_dim_flags() { +inline void Reduce::set_reduce_dim_flags() { size_t dims_size = src_dims.size(); if (dims_size == 5) { SET_SRC_DIM_VALUE(src_dims[0], src_dims[1], src_dims[2], src_dims[3], src_dims[4]); @@ -2698,44 +2701,44 @@ inline void MKLDNNReduceNode::set_reduce_dim_flags() { } } -inline void MKLDNNReduceNode::reduce_ref(const float *in_ptr, float *out_ptr) { +inline void Reduce::reduce_ref(const float *in_ptr, float *out_ptr) { switch (algorithm) { - case ReduceAnd: + case Algorithm::ReduceAnd: reduce_ref_process(in_ptr, out_ptr, 1, [](float x, float y)->float { return x && y; }); break; - case ReduceL1: + case Algorithm::ReduceL1: reduce_ref_process(in_ptr, out_ptr, 0, [](float old, float y)->float { return old + (y >= 0 ? y : -y); }); break; - case ReduceL2: + case Algorithm::ReduceL2: reduce_ref_process(in_ptr, out_ptr, 0, [](float old, float y)->float { return old + y * y; }); break; - case ReduceLogSum: + case Algorithm::ReduceLogSum: reduce_ref_process(in_ptr, out_ptr, 0, [](float x, float y)->float { return x + y; }); break; - case ReduceLogSumExp: + case Algorithm::ReduceLogSumExp: reduce_ref_process(in_ptr, out_ptr, 0, [](float old, float y)->float { return old + expf(y); }); break; - case ReduceMax: + case Algorithm::ReduceMax: reduce_ref_process(in_ptr, out_ptr, std::numeric_limits::lowest(), [](float x, float y)->float { return x > y ? x : y; }); break; - case ReduceMean: + case Algorithm::ReduceMean: reduce_ref_process(in_ptr, out_ptr, 0, [](float x, float y)->float { return x + y; }); break; - case ReduceMin: + case Algorithm::ReduceMin: reduce_ref_process(in_ptr, out_ptr, std::numeric_limits::max(), [](float x, float y)->float { return x < y ? x : y; }); break; - case ReduceOr: + case Algorithm::ReduceOr: reduce_ref_process(in_ptr, out_ptr, 0, [](float x, float y)->float { return x || y; }); break; - case ReduceProd: + case Algorithm::ReduceProd: reduce_ref_process(in_ptr, out_ptr, 1, [](float x, float y)->float { return x * y; }); break; - case ReduceSum: + case Algorithm::ReduceSum: reduce_ref_process(in_ptr, out_ptr, 0, [](float x, float y)->float { return x + y; }); break; - case ReduceSumSquare: + case Algorithm::ReduceSumSquare: reduce_ref_process(in_ptr, out_ptr, 0, [](float old, float y)->float { return old + y * y; }); break; default: @@ -2743,7 +2746,7 @@ inline void MKLDNNReduceNode::reduce_ref(const float *in_ptr, float *out_ptr) { } } -void MKLDNNReduceNode::reduce_ref_process(const float *in_ptr, float *out_ptr, float init_value, std::function func) { +void Reduce::reduce_ref_process(const float *in_ptr, float *out_ptr, float init_value, std::function func) { size_t work_amount_dst = 1, reduced_dims_work_amount = 1; for (size_t i = 0; i < process_dst_dims.size(); i++) work_amount_dst *= process_dst_dims[i]; @@ -2798,29 +2801,29 @@ void MKLDNNReduceNode::reduce_ref_process(const float *in_ptr, float *out_ptr, f reduce_ref_map(out_ptr, work_amount_dst, reduced_dims_work_amount); } -inline void MKLDNNReduceNode::reduce_ref_map(float *out_ptr, size_t work_amount_dst, size_t reduced_dims_work_amount) { +inline void Reduce::reduce_ref_map(float *out_ptr, size_t work_amount_dst, size_t reduced_dims_work_amount) { switch (algorithm) { - case ReduceAnd: - case ReduceL1: - case ReduceMax: - case ReduceMin: - case ReduceOr: - case ReduceProd: - case ReduceSum: - case ReduceSumSquare: + case Algorithm::ReduceAnd: + case Algorithm::ReduceL1: + case Algorithm::ReduceMax: + case Algorithm::ReduceMin: + case Algorithm::ReduceOr: + case Algorithm::ReduceProd: + case Algorithm::ReduceSum: + case Algorithm::ReduceSumSquare: break; - case ReduceL2: + case Algorithm::ReduceL2: parallel_for(work_amount_dst, [&](size_t i) { out_ptr[i] = std::sqrt(out_ptr[i]); }); break; - case ReduceLogSum: - case ReduceLogSumExp: + case Algorithm::ReduceLogSum: + case Algorithm::ReduceLogSumExp: parallel_for(work_amount_dst, [&](size_t i) { out_ptr[i] = logf(out_ptr[i]); }); break; - case ReduceMean: + case Algorithm::ReduceMean: parallel_for(work_amount_dst, [&](size_t i) { out_ptr[i] /= reduced_dims_work_amount; }); @@ -2830,17 +2833,17 @@ inline void MKLDNNReduceNode::reduce_ref_map(float *out_ptr, size_t work_amount_ } } -void MKLDNNReduceNode::setPostOps(mkldnn::primitive_attr &attr, const VectorDims &postOpDims, bool initWeights) { +void Reduce::setPostOps(mkldnn::primitive_attr &attr, const VectorDims &postOpDims, bool initWeights) { mkldnn::post_ops ops; postOpsDataPtrs.clear(); for (auto &node : fusedWith) { - auto* fakeQuantizeNode = dynamic_cast(node.get()); + auto* fakeQuantizeNode = dynamic_cast(node.get()); if (fakeQuantizeNode) { fakeQuantizeNode->appendPostOps(ops, {}, postOpsDataPtrs); continue; } - auto* eltwiseNode = dynamic_cast(node.get()); + auto* eltwiseNode = dynamic_cast(node.get()); if (eltwiseNode) { eltwiseNode->appendPostOps(ops, postOpDims, postOpsDataPtrs); continue; @@ -2851,7 +2854,7 @@ void MKLDNNReduceNode::setPostOps(mkldnn::primitive_attr &attr, const VectorDims attr.set_post_ops(ops); } -void MKLDNNReduceNode::setJITBeyond5D() { +void Reduce::setJITBeyond5D() { jit_beyond_5D = false; if (getInputShapeAtPort(REDUCE_DATA).getRank() > 5) { for (auto &axis : raw_axes) { @@ -2873,7 +2876,7 @@ void MKLDNNReduceNode::setJITBeyond5D() { } } -std::vector MKLDNNReduceNode::update_src_dims() { +std::vector Reduce::update_src_dims() { std::vector reduce_axes = raw_axes; if (reduce_axes.size() < 1) @@ -2905,7 +2908,7 @@ std::vector MKLDNNReduceNode::update_src_dims() { return reduce_axes; } -bool MKLDNNReduceNode::canApplyJIT(const Precision &input_prec, const Precision &output_prec) const { +bool Reduce::canApplyJIT(const Precision &input_prec, const Precision &output_prec) const { static const Precision supportedPrecisions[] = { Precision::FP32, Precision::BF16, @@ -2919,10 +2922,10 @@ bool MKLDNNReduceNode::canApplyJIT(const Precision &input_prec, const Precision std::find(std::begin(supportedPrecisions), std::end(supportedPrecisions), output_prec) != std::end(supportedPrecisions); } -bool MKLDNNReduceNode::canFuse(const MKLDNNNodePtr& node) const { +bool Reduce::canFuse(const NodePtr& node) const { Precision input_prec = getOriginalInputPrecisionAtPort(REDUCE_DATA); Precision output_prec = getOriginalOutputPrecisionAtPort(0); - if (!canApplyJIT(input_prec, output_prec) || jit_beyond_5D || algorithm == ReduceAnd || algorithm == ReduceOr) { + if (!canApplyJIT(input_prec, output_prec) || jit_beyond_5D || algorithm == Algorithm::ReduceAnd || algorithm == Algorithm::ReduceOr) { return false; } @@ -2930,8 +2933,8 @@ bool MKLDNNReduceNode::canFuse(const MKLDNNNodePtr& node) const { // If the post ops node has a lower precision for such modes, post ops fusing won't be supposted, in order to avoid accuracy loss. if (output_prec == Precision::FP32 && !node->getOriginalOutputPrecisions().empty() && node->getOriginalOutputPrecisionAtPort(0) != Precision::FP32) { - if (algorithm != ReduceAnd && algorithm != ReduceOr && - algorithm != ReduceMin && algorithm != ReduceMax) { + if (algorithm != Algorithm::ReduceAnd && algorithm != Algorithm::ReduceOr && + algorithm != Algorithm::ReduceMin && algorithm != Algorithm::ReduceMax) { return false; } } @@ -2939,8 +2942,10 @@ bool MKLDNNReduceNode::canFuse(const MKLDNNNodePtr& node) const { return canFuseSimpleOperation(node); } -bool MKLDNNReduceNode::created() const { - return getType() == Reduce; +bool Reduce::created() const { + return getType() == Type::Reduce; } -REG_MKLDNN_PRIM_FOR(MKLDNNReduceNode, Reduce); +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/reduce.h b/src/plugins/intel_cpu/src/nodes/reduce.h index 8b22b02d90d..aeaaa64b4b2 100644 --- a/src/plugins/intel_cpu/src/nodes/reduce.h +++ b/src/plugins/intel_cpu/src/nodes/reduce.h @@ -12,6 +12,7 @@ namespace ov { namespace intel_cpu { +namespace node { enum ReduceLayoutType { reduce_ncsp, @@ -82,9 +83,9 @@ struct jit_uni_reduce_post_kernel { const mkldnn_primitive_attr &attr_; }; -class MKLDNNReduceNode : public MKLDNNNode { +class Reduce : public Node { public: - MKLDNNReduceNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + Reduce(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; @@ -94,7 +95,7 @@ public: void execute(mkldnn::stream strm) override; std::vector shapeInfer() const override; void executeDynamicImpl(mkldnn::stream strm) override; - bool canFuse(const MKLDNNNodePtr& node) const override; + bool canFuse(const NodePtr& node) const override; bool canBeInPlace() const override { return false; } @@ -156,10 +157,11 @@ private: std::shared_ptr reduce_kernel; std::shared_ptr reduce_post_kernel; - static const std::map& op, MKLDNNReduceNode& node)>> initializers; + static const std::map& op, Reduce& node)>> initializers; std::string errorPrefix; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/reference.cpp b/src/plugins/intel_cpu/src/nodes/reference.cpp index fe50f592bd9..d778f378a47 100644 --- a/src/plugins/intel_cpu/src/nodes/reference.cpp +++ b/src/plugins/intel_cpu/src/nodes/reference.cpp @@ -4,26 +4,29 @@ #include "reference.h" #include -#include +#include #include "openvino/runtime/tensor.hpp" #include "common/blocked_desc_creator.h" #include using namespace mkldnn; -using namespace ov::intel_cpu; using namespace InferenceEngine; using namespace InferenceEngine::details; -MKLDNNReferenceNode::MKLDNNReferenceNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache, +namespace ov { +namespace intel_cpu { +namespace node { + +Reference::Reference(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache, const std::string& errorMessage) : - MKLDNNNode(op, eng, cache), ngraphOp(op), additionalErrorMessage(errorMessage) { + Node(op, eng, cache), ngraphOp(op), additionalErrorMessage(errorMessage) { if (!op->has_evaluate()) { IE_THROW(NotImplemented) << "Cannot fallback on ngraph reference implementation (Ngraph::Node::evaluate() is not implemented)"; } - setType(Reference); + setType(Type::Reference); setTypeStr("Reference"); - // RandomUniform should generate new sequence each run even if all inputs are constants. So that method MKLDNNNode::IsConstant() + // RandomUniform should generate new sequence each run even if all inputs are constants. So that method Node::IsConstant() // doesn't return 'True' for RandomUniform with all constant inputs and the node generates new values for each inference, // we set 'NoConst' value for 'ConstantType' in ctor if (ov::is_type(ngraphOp)) { @@ -31,9 +34,9 @@ MKLDNNReferenceNode::MKLDNNReferenceNode(const std::shared_ptr& op } } -void MKLDNNReferenceNode::getSupportedDescriptors() {} +void Reference::getSupportedDescriptors() {} -void MKLDNNReferenceNode::initSupportedPrimitiveDescriptors() { +void Reference::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -52,9 +55,9 @@ void MKLDNNReferenceNode::initSupportedPrimitiveDescriptors() { addSupportedPrimDesc(inputConfigurators, outputConfigurators, impl_desc_type::ref); } -void MKLDNNReferenceNode::createPrimitive() {} +void Reference::createPrimitive() {} -void MKLDNNReferenceNode::execute(mkldnn::stream strm) { +void Reference::execute(mkldnn::stream strm) { ov::TensorVector inputs; for (size_t i = 0; i < inputShapes.size(); i++) { void *srcDataPtr = getParentEdgesAtPort(i)[0]->getMemory().GetPtr(); @@ -74,18 +77,22 @@ void MKLDNNReferenceNode::execute(mkldnn::stream strm) { } } -std::vector MKLDNNReferenceNode::shapeInfer() const { - return MKLDNNNode::shapeInferGeneric(0xFFFFFFFF); +std::vector Reference::shapeInfer() const { + return Node::shapeInferGeneric(0xFFFFFFFF); } -void MKLDNNReferenceNode::executeDynamicImpl(mkldnn::stream strm) { +void Reference::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -bool MKLDNNReferenceNode::created() const { - return getType() == Reference; +bool Reference::created() const { + return getType() == Type::Reference; } -bool MKLDNNReferenceNode::needShapeInfer() const { +bool Reference::needShapeInfer() const { return true; } + +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/reference.h b/src/plugins/intel_cpu/src/nodes/reference.h index 48302932dd8..117b1aff5f5 100644 --- a/src/plugins/intel_cpu/src/nodes/reference.h +++ b/src/plugins/intel_cpu/src/nodes/reference.h @@ -8,10 +8,11 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNReferenceNode : public MKLDNNNode { +class Reference : public Node { public: - MKLDNNReferenceNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache, const std::string& errorMessage); + Reference(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache, const std::string& errorMessage); void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; @@ -29,5 +30,6 @@ private: const std::string additionalErrorMessage; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/region_yolo.cpp b/src/plugins/intel_cpu/src/nodes/region_yolo.cpp index 197c01a6238..b7a3ed2db80 100644 --- a/src/plugins/intel_cpu/src/nodes/region_yolo.cpp +++ b/src/plugins/intel_cpu/src/nodes/region_yolo.cpp @@ -16,7 +16,6 @@ #include #include "utils/bfloat16.hpp" -using namespace ov::intel_cpu; using namespace InferenceEngine; using namespace mkldnn::impl::cpu; using namespace mkldnn::impl::cpu::x64; @@ -24,6 +23,10 @@ using namespace mkldnn::impl::utils; #define GET_OFF(field) offsetof(jit_args_logistic, field) +namespace ov { +namespace intel_cpu { +namespace node { + template struct jit_uni_logistic_kernel_f32 : public jit_uni_logistic_kernel, public jit_generator { DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_uni_logistic_kernel_f32) @@ -227,7 +230,7 @@ private: } }; -bool MKLDNNRegionYoloNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool RegionYolo::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { const auto regionYolo = std::dynamic_pointer_cast(op); if (!regionYolo) { @@ -240,12 +243,12 @@ bool MKLDNNRegionYoloNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { +RegionYolo::RegionYolo(const std::shared_ptr& op, const mkldnn::engine& eng, + WeightsSharing::Ptr &cache) : Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -264,7 +267,7 @@ MKLDNNRegionYoloNode::MKLDNNRegionYoloNode(const std::shared_ptr& block_size = 1; } -void MKLDNNRegionYoloNode::initSupportedPrimitiveDescriptors() { +void RegionYolo::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -301,7 +304,7 @@ void MKLDNNRegionYoloNode::initSupportedPrimitiveDescriptors() { impl_type); } -void MKLDNNRegionYoloNode::createPrimitive() { +void RegionYolo::createPrimitive() { if (inputShapesDefined()) { updateLastInputDims(); } @@ -328,7 +331,7 @@ void MKLDNNRegionYoloNode::createPrimitive() { logistic_kernel->create_ker(); } -inline float MKLDNNRegionYoloNode::logistic_scalar(float src) { +inline float RegionYolo::logistic_scalar(float src) { U aux2; aux2.as_float_value = src; int sign = aux2.as_int_value >> 31; @@ -344,7 +347,7 @@ inline float MKLDNNRegionYoloNode::logistic_scalar(float src) { return src; } -inline void MKLDNNRegionYoloNode::calculate_logistic(size_t start_index, int count, uint8_t * dst_data) { +inline void RegionYolo::calculate_logistic(size_t start_index, int count, uint8_t * dst_data) { auto dst_data_size = output_prec.size(); if (logistic_kernel) { int blocks_num = div_up(count, block_size); @@ -375,7 +378,7 @@ inline void MKLDNNRegionYoloNode::calculate_logistic(size_t start_index, int cou } } -void MKLDNNRegionYoloNode::execute(mkldnn::stream strm) { +void RegionYolo::execute(mkldnn::stream strm) { const auto &inShape = getParentEdgeAt(0)->getMemory().GetShape(); const auto &inDims = inShape.getStaticDims(); size_t B = (inShape.getRank() > 0) ? inDims[0] : 1; @@ -432,8 +435,10 @@ void MKLDNNRegionYoloNode::execute(mkldnn::stream strm) { } } -bool MKLDNNRegionYoloNode::created() const { - return getType() == RegionYolo; +bool RegionYolo::created() const { + return getType() == Type::RegionYolo; } -REG_MKLDNN_PRIM_FOR(MKLDNNRegionYoloNode, RegionYolo) +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/region_yolo.h b/src/plugins/intel_cpu/src/nodes/region_yolo.h index 0e4220bb48b..fb950a541eb 100644 --- a/src/plugins/intel_cpu/src/nodes/region_yolo.h +++ b/src/plugins/intel_cpu/src/nodes/region_yolo.h @@ -13,6 +13,7 @@ namespace ov { namespace intel_cpu { +namespace node { struct jit_args_logistic { const void* src; @@ -38,9 +39,9 @@ struct jit_uni_logistic_kernel { virtual ~jit_uni_logistic_kernel() {} }; -class MKLDNNRegionYoloNode : public MKLDNNNode { +class RegionYolo : public Node { public: - MKLDNNRegionYoloNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + RegionYolo(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; @@ -77,5 +78,6 @@ private: inline void calculate_logistic(size_t start_index, int count, uint8_t * dst_data); }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/reorder.cpp b/src/plugins/intel_cpu/src/nodes/reorder.cpp index 73d12389821..49405f12221 100644 --- a/src/plugins/intel_cpu/src/nodes/reorder.cpp +++ b/src/plugins/intel_cpu/src/nodes/reorder.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include "ie_parallel.hpp" #include "utils/general_utils.h" #include @@ -17,10 +17,13 @@ #include using namespace mkldnn; -using namespace ov::intel_cpu; using namespace InferenceEngine; +namespace ov { +namespace intel_cpu { +namespace node { namespace { + struct ReorderKey { mkldnn::memory::desc src; mkldnn::memory::desc dest; @@ -47,26 +50,26 @@ bool ReorderKey::operator==(const ReorderKey& rhs) const { } // namespace -bool MKLDNNReorderNode::isExecutable() const { - return MKLDNNNode::isExecutable() && !isOptimized; +bool Reorder::isExecutable() const { + return Node::isExecutable() && !isOptimized; } -MKLDNNReorderNode::MKLDNNReorderNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &w_cache) : - MKLDNNNode(op, eng, w_cache) { +Reorder::Reorder(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &w_cache) : + Node(op, eng, w_cache) { IE_THROW() << "Can't create reorder node from ngraph node"; } -MKLDNNReorderNode::MKLDNNReorderNode(const std::string& name, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &w_cache) : - MKLDNNNode("Reorder", name, eng, w_cache) {} +Reorder::Reorder(const std::string& name, const mkldnn::engine& eng, WeightsSharing::Ptr &w_cache) : + Node("Reorder", name, eng, w_cache) {} -void MKLDNNReorderNode::getSupportedDescriptors() { +void Reorder::getSupportedDescriptors() { if (getParentEdges().size() != 1) IE_THROW() << "Incorrect number of input edges for layer " << getName(); if (getChildEdges().empty()) IE_THROW() << "Incorrect number of output edges for layer " << getName(); } -void MKLDNNReorderNode::initSupportedPrimitiveDescriptors() { +void Reorder::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -105,7 +108,7 @@ void MKLDNNReorderNode::initSupportedPrimitiveDescriptors() { IE_THROW() << "Reorder node doesn't support case when input and output shapes have different rank and dynamic"; if (!isOptimized) { const auto &inShape = getInputShapeAtPort(0); - if (ov::intel_cpu::one_of(inShape.getRank(), 4, 5) && + if (one_of(inShape.getRank(), 4, 5) && config.inConfs[0].getMemDesc()->hasLayoutType(LayoutType::nspc) && config.outConfs[0].getMemDesc()->hasLayoutType(LayoutType::ncsp) && config.inConfs[0].getMemDesc()->getPrecision() == Precision::FP32 && @@ -113,7 +116,7 @@ void MKLDNNReorderNode::initSupportedPrimitiveDescriptors() { // oneDNN JIT reorder shows bad perf for nspc to ncsp reorder case so we fallback on simple c++ implementation isNspc2NcspCase = true; } else if (!impl::cpu::x64::mayiuse(impl::cpu::x64::avx2) && - ov::intel_cpu::one_of(inShape.getRank(), 4, 5) && + one_of(inShape.getRank(), 4, 5) && config.inConfs[0].getMemDesc()->hasLayoutType(LayoutType::ncsp) && config.outConfs[0].getMemDesc()->hasLayoutType(LayoutType::nspc) && config.inConfs[0].getMemDesc()->getPrecision() == config.outConfs[0].getMemDesc()->getPrecision() && @@ -124,7 +127,7 @@ void MKLDNNReorderNode::initSupportedPrimitiveDescriptors() { } } -void MKLDNNReorderNode::createPrimitive() { +void Reorder::createPrimitive() { if (inputShapesDefined()) { if (needPrepareParams()) prepareParams(); @@ -132,11 +135,11 @@ void MKLDNNReorderNode::createPrimitive() { } } -void MKLDNNReorderNode::executeDynamicImpl(mkldnn::stream strm) { +void Reorder::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -void MKLDNNReorderNode::prepareParams() { +void Reorder::prepareParams() { if (!isOptimized) { auto &srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); auto &dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); @@ -200,7 +203,7 @@ void MKLDNNReorderNode::prepareParams() { } } -void MKLDNNReorderNode::createReorderPrimitive(const mkldnn::memory::desc& srcDesc, +void Reorder::createReorderPrimitive(const mkldnn::memory::desc& srcDesc, void* srcPtr, const mkldnn::memory::desc& dstDesc, void* dstPtr) { @@ -209,11 +212,11 @@ void MKLDNNReorderNode::createReorderPrimitive(const mkldnn::memory::desc& srcDe IE_THROW() << "Preferable primitive descriptor is not set."; const auto engine = getEngine(); - src_blocked = std::make_shared(engine); - src_blocked->Create(MKLDNNExtensionUtils::makeDescriptor(srcDesc), srcPtr, false); + src_blocked = std::make_shared(engine); + src_blocked->Create(DnnlExtensionUtils::makeDescriptor(srcDesc), srcPtr, false); - dst_blocked = std::make_shared(engine); - dst_blocked->Create(MKLDNNExtensionUtils::makeDescriptor(dstDesc), dstPtr, false); + dst_blocked = std::make_shared(engine); + dst_blocked->Create(DnnlExtensionUtils::makeDescriptor(dstDesc), dstPtr, false); impl_desc_type impl_type = selectedPD->getImplementationType(); ReorderKey key = {src_blocked->GetPrimitive().get_desc(), dst_blocked->GetPrimitive().get_desc()}; @@ -240,19 +243,19 @@ void MKLDNNReorderNode::createReorderPrimitive(const mkldnn::memory::desc& srcDe // split group dimension in separate shape dimension. IE use OIHW, but mkldnn expect GOIHW. // So we will perform implicit reshape to dst shape. // - // MKLDNN doesn't support direct reorders for tensors of different rank. The code below tries to + // oneDNN doesn't support direct reorders for tensors of different rank. The code below tries to // perform such conversion if the source tensor can be reshaped to the destination rank. This is // useful in situations when rank in IR does not much rank that is required by the oneDNN primitive, // but the input tensor can be reshaped (e.g. weights for grouped convolutions, biases etc.) if (src_blocked->getDesc().hasLayoutType(LayoutType::ncsp) && src_blocked->GetShape().getRank() != dst_blocked->GetShape().getRank()) { const auto newDims = dst_blocked->getStaticDims(); - const auto newFormat = MKLDNNExtensionUtils::GetPlainFormatByRank(newDims.size()); + const auto newFormat = DnnlExtensionUtils::GetPlainFormatByRank(newDims.size()); - auto newDesc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(newDims), + auto newDesc = mkldnn::memory::desc(DnnlExtensionUtils::convertToDnnlDims(newDims), src_blocked->GetDataType(), newFormat); - src_blocked->Create(MKLDNNExtensionUtils::makeDescriptor(newDesc), srcPtr, false); + src_blocked->Create(DnnlExtensionUtils::makeDescriptor(newDesc), srcPtr, false); key.src = src_blocked->GetPrimitive().get_desc(); result = cache->getOrCreate(key, builder); @@ -270,16 +273,16 @@ void MKLDNNReorderNode::createReorderPrimitive(const mkldnn::memory::desc& srcDe primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_DST, dst}}; } -const std::vector& MKLDNNReorderNode::getPrimitivesPriority() { +const std::vector& Reorder::getPrimitivesPriority() { implPriorities = {impl_desc_type::reorder}; return implPriorities; } -bool MKLDNNReorderNode::created() const { - return getType() == Reorder; +bool Reorder::created() const { + return getType() == Type::Reorder; } -void MKLDNNReorderNode::optimizedNcsp2Nspc() { +void Reorder::optimizedNcsp2Nspc() { auto parentEdge = getParentEdgeAt(0); auto childEdge = getChildEdgeAt(0); @@ -313,7 +316,7 @@ void MKLDNNReorderNode::optimizedNcsp2Nspc() { }); } -void MKLDNNReorderNode::optimizedNspc2Ncsp() { +void Reorder::optimizedNspc2Ncsp() { auto parentEdge = getParentEdgeAt(0); auto childEdge = getChildEdgeAt(0); @@ -343,7 +346,7 @@ void MKLDNNReorderNode::optimizedNspc2Ncsp() { }); } -void MKLDNNReorderNode::execute(mkldnn::stream strm) { +void Reorder::execute(mkldnn::stream strm) { if (isOptimized) return; @@ -355,11 +358,11 @@ void MKLDNNReorderNode::execute(mkldnn::stream strm) { src_blocked->setDataHandle(getParentEdgeAt(0)->getMemory().GetData()); dst_blocked->setDataHandle(getChildEdgeAt(0)->getMemory().GetData()); - MKLDNNNode::execute(strm); + Node::execute(strm); } } -void MKLDNNReorderNode::setDynamicBatchLim(int lim) { +void Reorder::setDynamicBatchLim(int lim) { dynBatchLim = lim; if (prim) { auto &dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); @@ -379,7 +382,7 @@ void MKLDNNReorderNode::setDynamicBatchLim(int lim) { } } -std::string MKLDNNReorderNode::getReorderArgs(const MemoryDesc &parentDesc, const MemoryDesc &childDesc) { +std::string Reorder::getReorderArgs(const MemoryDesc &parentDesc, const MemoryDesc &childDesc) { std::string inArgs, outArgs; if (parentDesc.getPrecision() != childDesc.getPrecision()) { inArgs += (inArgs.empty() ? "" : "_") + std::string(parentDesc.getPrecision().name()); @@ -394,7 +397,7 @@ std::string MKLDNNReorderNode::getReorderArgs(const MemoryDesc &parentDesc, cons return inArgs + "_" + outArgs; } -void MKLDNNReorderNode::reorderData(const MKLDNNMemory &input, const MKLDNNMemory &output) { +void Reorder::reorderData(const Memory &input, const Memory &output) { if (!input.getDesc().isDefined() || !output.getDesc().isDefined()) IE_THROW() << "Can't reorder data with dynamic shapes"; @@ -418,18 +421,18 @@ void MKLDNNReorderNode::reorderData(const MKLDNNMemory &input, const MKLDNNMemor srcMemory = input.GetPrimitive(); } catch (const mkldnn::error& err) { - if (mkldnn_unimplemented == err.status && output.GetDataType() != input.GetDataType() && MKLDNNConvertNode::isSupportedDesc(input.getDesc()) && - MKLDNNConvertNode::isSupportedDesc(output.getDesc())) { + if (mkldnn_unimplemented == err.status && output.GetDataType() != input.GetDataType() && Convert::isSupportedDesc(input.getDesc()) && + Convert::isSupportedDesc(output.getDesc())) { //we probably could not make the reorder because there is no one supporting this precision conversion //lets try to convert data first using cpu_convert auto data = static_cast(input.GetPtr()); tmpBuff.resize(input.GetSize()); - const auto outPrc = MKLDNNExtensionUtils::DataTypeToIEPrecision(output.GetDataType()); - cpu_convert(data, tmpBuff.data(), MKLDNNExtensionUtils::DataTypeToIEPrecision(input.GetDataType()), + const auto outPrc = DnnlExtensionUtils::DataTypeToIEPrecision(output.GetDataType()); + cpu_convert(data, tmpBuff.data(), DnnlExtensionUtils::DataTypeToIEPrecision(input.GetDataType()), outPrc, input.GetSize() / input.getDesc().getPrecision().size()); - MKLDNNMemory tmpMem(output.getEngine()); + Memory tmpMem(output.getEngine()); auto tmpDesc = input.getDesc().cloneWithNewPrecision(outPrc); tmpMem.Create(std::move(tmpDesc), tmpBuff.data()); @@ -449,8 +452,10 @@ void MKLDNNReorderNode::reorderData(const MKLDNNMemory &input, const MKLDNNMemor } } -std::vector MKLDNNReorderNode::shapeInfer() const { +std::vector Reorder::shapeInfer() const { return {getParentEdgesAtPort(0)[0]->getMemory().getStaticDims()}; } -REG_MKLDNN_PRIM_FOR(MKLDNNReorderNode, Reorder); +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/reorder.h b/src/plugins/intel_cpu/src/nodes/reorder.h index b196bf5ddb9..3ba2ccfda29 100644 --- a/src/plugins/intel_cpu/src/nodes/reorder.h +++ b/src/plugins/intel_cpu/src/nodes/reorder.h @@ -13,11 +13,12 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNReorderNode : public MKLDNNNode { +class Reorder : public Node { public: - MKLDNNReorderNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); - MKLDNNReorderNode(const std::string& name, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + Reorder(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); + Reorder(const std::string& name, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; @@ -60,14 +61,14 @@ public: static std::string getReorderArgs(const MemoryDesc &parentDesc, const MemoryDesc &childDesc); - static void reorderData(const MKLDNNMemory &input, const MKLDNNMemory &output); + static void reorderData(const Memory &input, const Memory &output); private: std::shared_ptr input; std::shared_ptr output; - MKLDNNMemoryPtr dst_blocked; - MKLDNNMemoryPtr src_blocked; + MemoryPtr dst_blocked; + MemoryPtr src_blocked; bool isOptimized = false; @@ -81,5 +82,6 @@ private: void createReorderPrimitive(const mkldnn::memory::desc &srcDesc, void* srcPtr, const mkldnn::memory::desc &dstDesc, void* dstPtr); }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/reorg_yolo.cpp b/src/plugins/intel_cpu/src/nodes/reorg_yolo.cpp index 2662ddf18da..7698af2e2f3 100644 --- a/src/plugins/intel_cpu/src/nodes/reorg_yolo.cpp +++ b/src/plugins/intel_cpu/src/nodes/reorg_yolo.cpp @@ -8,10 +8,13 @@ #include "ie_parallel.hpp" #include "reorg_yolo.h" -using namespace ov::intel_cpu; using namespace InferenceEngine; -bool MKLDNNReorgYoloNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +namespace ov { +namespace intel_cpu { +namespace node { + +bool ReorgYolo::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { const auto reorgYolo = std::dynamic_pointer_cast(op); if (!reorgYolo) { @@ -24,8 +27,8 @@ bool MKLDNNReorgYoloNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { +ReorgYolo::ReorgYolo(const std::shared_ptr& op, const mkldnn::engine& eng, + WeightsSharing::Ptr &cache) : Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -42,7 +45,7 @@ MKLDNNReorgYoloNode::MKLDNNReorgYoloNode(const std::shared_ptr& op stride = strides[0]; } -void MKLDNNReorgYoloNode::initSupportedPrimitiveDescriptors() { +void ReorgYolo::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -51,11 +54,11 @@ void MKLDNNReorgYoloNode::initSupportedPrimitiveDescriptors() { impl_desc_type::ref_any); } -void MKLDNNReorgYoloNode::executeDynamicImpl(mkldnn::stream strm) { +void ReorgYolo::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -void MKLDNNReorgYoloNode::execute(mkldnn::stream strm) { +void ReorgYolo::execute(mkldnn::stream strm) { const auto *src_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); auto *dst_data = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); @@ -89,8 +92,10 @@ void MKLDNNReorgYoloNode::execute(mkldnn::stream strm) { } } -bool MKLDNNReorgYoloNode::created() const { - return getType() == ReorgYolo; +bool ReorgYolo::created() const { + return getType() == Type::ReorgYolo; } -REG_MKLDNN_PRIM_FOR(MKLDNNReorgYoloNode, ReorgYolo) +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/reorg_yolo.h b/src/plugins/intel_cpu/src/nodes/reorg_yolo.h index 6bbb5ac4918..eb36b2d14c2 100644 --- a/src/plugins/intel_cpu/src/nodes/reorg_yolo.h +++ b/src/plugins/intel_cpu/src/nodes/reorg_yolo.h @@ -9,10 +9,11 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNReorgYoloNode : public MKLDNNNode { +class ReorgYolo : public Node { public: - MKLDNNReorgYoloNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + ReorgYolo(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; @@ -29,5 +30,6 @@ private: std::string errorPrefix; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/reshape.cpp b/src/plugins/intel_cpu/src/nodes/reshape.cpp index b37d82b12cc..9eadf1e5e40 100644 --- a/src/plugins/intel_cpu/src/nodes/reshape.cpp +++ b/src/plugins/intel_cpu/src/nodes/reshape.cpp @@ -5,7 +5,7 @@ #include "reshape.h" #include #include -#include +#include #include #include #include @@ -14,10 +14,13 @@ #include "common/cpu_memcpy.h" using namespace mkldnn; -using namespace ov::intel_cpu; using namespace InferenceEngine; -bool MKLDNNReshapeNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +namespace ov { +namespace intel_cpu { +namespace node { + +bool Reshape::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { if (!std::dynamic_pointer_cast(op) && !std::dynamic_pointer_cast(op) && @@ -31,8 +34,8 @@ bool MKLDNNReshapeNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : - MKLDNNNode(op, eng, cache) { +Reshape::Reshape(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache) : + Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -61,7 +64,7 @@ MKLDNNReshapeNode::MKLDNNReshapeNode(const std::shared_ptr& op, co } } -bool MKLDNNReshapeNode::needShapeInfer() const { +bool Reshape::needShapeInfer() const { if (inputShapesModified()) { return true; } @@ -75,7 +78,7 @@ bool MKLDNNReshapeNode::needShapeInfer() const { return false; } -std::vector MKLDNNReshapeNode::shapeInfer() const { +std::vector Reshape::shapeInfer() const { const auto &memPtr = getParentEdgesAtPort(1)[0]->getMemory(); const int32_t *sndInput = reinterpret_cast(memPtr.GetPtr()); @@ -88,14 +91,14 @@ std::vector MKLDNNReshapeNode::shapeInfer() const { return shapeInferGeneric(PortMask(1)); } -void MKLDNNReshapeNode::getSupportedDescriptors() { +void Reshape::getSupportedDescriptors() { if (getParentEdges().size() != 1 && getParentEdges().size() != 2) IE_THROW() << "Incorrect number of input edges for layer " << getName(); if (getChildEdges().empty()) IE_THROW() << "Incorrect number of output edges for layer " << getName(); } -void MKLDNNReshapeNode::initSupportedPrimitiveDescriptors() { +void Reshape::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -124,11 +127,14 @@ void MKLDNNReshapeNode::initSupportedPrimitiveDescriptors() { supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown); } -void MKLDNNReshapeNode::executeDynamicImpl(mkldnn::stream strm) { +void Reshape::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -bool MKLDNNReshapeNode::created() const { - return getType() == Reshape; +bool Reshape::created() const { + return getType() == Type::Reshape; } -REG_MKLDNN_PRIM_FOR(MKLDNNReshapeNode, Reshape); + +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/reshape.h b/src/plugins/intel_cpu/src/nodes/reshape.h index b717e8d55dd..f02372994bd 100644 --- a/src/plugins/intel_cpu/src/nodes/reshape.h +++ b/src/plugins/intel_cpu/src/nodes/reshape.h @@ -13,10 +13,11 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNReshapeNode : public MKLDNNNode { +class Reshape : public Node { public: - MKLDNNReshapeNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + Reshape(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; @@ -38,5 +39,6 @@ private: std::string errorPrefix; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/reverse_sequence.cpp b/src/plugins/intel_cpu/src/nodes/reverse_sequence.cpp index cd95e28cf97..b81958862cb 100644 --- a/src/plugins/intel_cpu/src/nodes/reverse_sequence.cpp +++ b/src/plugins/intel_cpu/src/nodes/reverse_sequence.cpp @@ -9,10 +9,13 @@ #include "ie_parallel.hpp" #include "reverse_sequence.h" -using namespace ov::intel_cpu; using namespace InferenceEngine; -bool MKLDNNReverseSequenceNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +namespace ov { +namespace intel_cpu { +namespace node { + +bool ReverseSequence::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { if (isDynamicNgraphNode(op)) { errorMessage = "Doesn't support op with dynamic shapes"; @@ -29,8 +32,8 @@ bool MKLDNNReverseSequenceNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { +ReverseSequence::ReverseSequence(const std::shared_ptr& op, const mkldnn::engine& eng, + WeightsSharing::Ptr &cache) : Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -82,7 +85,7 @@ MKLDNNReverseSequenceNode::MKLDNNReverseSequenceNode(const std::shared_ptr(getParentEdgeAt(REVERSESEQUENCE_DATA)->getMemoryPtr()->GetPtr()); float* dst_data = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); @@ -180,8 +183,10 @@ void MKLDNNReverseSequenceNode::execute(mkldnn::stream strm) { } } -bool MKLDNNReverseSequenceNode::created() const { - return getType() == ReverseSequence; +bool ReverseSequence::created() const { + return getType() == Type::ReverseSequence; } -REG_MKLDNN_PRIM_FOR(MKLDNNReverseSequenceNode, ReverseSequence) +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/reverse_sequence.h b/src/plugins/intel_cpu/src/nodes/reverse_sequence.h index aba32eeb70f..4aa1f35c411 100644 --- a/src/plugins/intel_cpu/src/nodes/reverse_sequence.h +++ b/src/plugins/intel_cpu/src/nodes/reverse_sequence.h @@ -9,10 +9,11 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNReverseSequenceNode : public MKLDNNNode { +class ReverseSequence : public Node { public: - MKLDNNReverseSequenceNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + ReverseSequence(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; @@ -36,5 +37,6 @@ private: std::string errorPrefix; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/rnn.cpp b/src/plugins/intel_cpu/src/nodes/rnn.cpp index 08fc91c0ae1..e1d9f27c57b 100644 --- a/src/plugins/intel_cpu/src/nodes/rnn.cpp +++ b/src/plugins/intel_cpu/src/nodes/rnn.cpp @@ -8,7 +8,7 @@ #include "nodes/common/cpu_convert.h" #include "utils/bfloat16.hpp" #include "input.h" -#include +#include #include "memory_desc/dnnl_blocked_memory_desc.h" #include @@ -24,6 +24,7 @@ using namespace InferenceEngine; namespace ov { namespace intel_cpu { +namespace node { static rnn_direction ieDirection2dnnl(const std::shared_ptr& op) { ov::op::RecurrentSequenceDirection direction = ov::op::RecurrentSequenceDirection::FORWARD; @@ -103,7 +104,7 @@ inline bool haveCellState(const mkldnn::algorithm& alg) { return alg == mkldnn::algorithm::vanilla_lstm; } -const std::map MKLDNNRNN::weightsByLayerPrec { +const std::map RNN::weightsByLayerPrec { // layer precision, weights precision {Precision::FP32, Precision::FP32}, {Precision::BF16, Precision::BF16}, @@ -167,7 +168,7 @@ bool RNNKey::operator==(const RNNKey& rhs) const { return true; } -bool MKLDNNRNN::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool RNN::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { if (!one_of(op->get_type_info(), ov::op::v3::GRUCell::get_type_info_static(), @@ -244,21 +245,21 @@ bool MKLDNNRNN::isSupportedOperation(const std::shared_ptr& op, return true; } -MKLDNNRNN::MKLDNNRNN(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : - MKLDNNNode(op, eng, cache) { +RNN::RNN(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache) : + Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; } internalBlobDesc.emplace_back([&](primitive_desc_iterator& primitive_desc_it, size_t idx) -> DnnlMemoryDescPtr { - return MKLDNNExtensionUtils::makeDescriptor(primitive_desc_it.weights_desc(0)); + return DnnlExtensionUtils::makeDescriptor(primitive_desc_it.weights_desc(0)); }); internalBlobDesc.emplace_back([&](primitive_desc_iterator& primitive_desc_it, size_t idx) -> DnnlMemoryDescPtr { - return MKLDNNExtensionUtils::makeDescriptor(primitive_desc_it.weights_desc(1)); + return DnnlExtensionUtils::makeDescriptor(primitive_desc_it.weights_desc(1)); }); internalBlobDesc.emplace_back([&](primitive_desc_iterator& primitive_desc_it, size_t idx) -> DnnlMemoryDescPtr { - return MKLDNNExtensionUtils::makeDescriptor(primitive_desc_it.weights_desc(2)); + return DnnlExtensionUtils::makeDescriptor(primitive_desc_it.weights_desc(2)); }); is_cell = one_of(op->get_type_info(), @@ -312,18 +313,18 @@ MKLDNNRNN::MKLDNNRNN(const std::shared_ptr& op, const mkldnn::engine& } } -bool MKLDNNRNN::created() const { - return getType() == (is_cell ? RNNCell : RNNSeq); +bool RNN::created() const { + return getType() == (is_cell ? Type::RNNCell : Type::RNNSeq); } -void MKLDNNRNN::getSupportedDescriptors() { +void RNN::getSupportedDescriptors() { if (is_cell) fillCellDesc(); else fillSequenceDesc(); } -void MKLDNNRNN::initCell() { +void RNN::initCell() { if (getInputShapeAtPort(0).getRank() != 2lu || getInputShapeAtPort(1).getRank() != 2lu) THROW_ERROR << "has incorrect input ranks. Data rank: " << getInputShapeAtPort(0).getRank() << "; Hidden state rank: " << getInputShapeAtPort(1).getRank(); @@ -351,8 +352,8 @@ void MKLDNNRNN::initCell() { } } -void MKLDNNRNN::fillCellDesc() { - const auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(0)); +void RNN::fillCellDesc() { + const auto dataType = DnnlExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(0)); const Shape shapeS_4D = MemoryDescUtils::makeDummyShape({{L, D, N.minVal, SC}, {L, D, N.maxVal, SC}}), inShape = MemoryDescUtils::makeDummyShape({{T.minVal, N.minVal, DC}, {T.maxVal, N.maxVal, DC}}), outShape = MemoryDescUtils::makeDummyShape({{T.minVal, N.minVal, SC}, {T.maxVal, N.maxVal, SC}}); @@ -397,7 +398,7 @@ void MKLDNNRNN::fillCellDesc() { createDescriptor(inCandidate, outCandidate); } -void MKLDNNRNN::initSequence() { +void RNN::initSequence() { const auto& inDataShape = getInputShapeAtPort(0); const auto& outDataShape = getOutputShapeAtPort(0); @@ -421,8 +422,8 @@ void MKLDNNRNN::initSequence() { outDataDescs.reserve(S + 1); } -void MKLDNNRNN::fillSequenceDesc() { - const auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(0)); +void RNN::fillSequenceDesc() { + const auto dataType = DnnlExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(0)); const Shape shapeS_4D = MemoryDescUtils::makeDummyShape({{L, D, N.minVal, SC}, {L, D, N.maxVal, SC}}), inShape = MemoryDescUtils::makeDummyShape({{T.minVal, N.minVal, DC}, {T.maxVal, N.maxVal, DC}}), outShape = MemoryDescUtils::makeDummyShape({{T.minVal, N.minVal, SC}, {T.maxVal, N.maxVal, SC}}), @@ -505,14 +506,14 @@ void MKLDNNRNN::fillSequenceDesc() { createDescriptor(inCandidate, outCandidate); } -bool MKLDNNRNN::verifyWeightsPrecision(const Precision &layerPrec, const Precision &weightsPrec) { +bool RNN::verifyWeightsPrecision(const Precision &layerPrec, const Precision &weightsPrec) { if (!weightsByLayerPrec.count(layerPrec)) THROW_ERROR << "has unsupported layer precision " << layerPrec; return weightsPrec == weightsByLayerPrec.at(layerPrec); } template -void MKLDNNRNN::fillWeights(const int *gate_map, const size_t wIdx, const size_t rIdx) { +void RNN::fillWeights(const int *gate_map, const size_t wIdx, const size_t rIdx) { const auto& dataPrecision = getOriginalInputPrecisionAtPort(0); const auto& weightPrec = getOriginalInputPrecisionAtPort(wIdx); if (!verifyWeightsPrecision(dataPrecision, weightPrec) && dataPrecision != Precision::BF16 && weightPrec != Precision::FP32) { @@ -538,10 +539,10 @@ void MKLDNNRNN::fillWeights(const int *gate_map, const size_t wIdx, const size_t const size_t ie_w_vec_size = getInputShapeAtPort(wIdx).getElementsCount(); const size_t ie_r_vec_size = getInputShapeAtPort(rIdx).getElementsCount(); - auto *wInputNode = dynamic_cast(getParentEdgesAtPort(wIdx)[0]->getParent().get()); + auto *wInputNode = dynamic_cast(getParentEdgesAtPort(wIdx)[0]->getParent().get()); auto wConstBlob = wInputNode->getMemoryPtr(); - auto *rInputNode = dynamic_cast(getParentEdgesAtPort(rIdx)[0]->getParent().get()); + auto *rInputNode = dynamic_cast(getParentEdgesAtPort(rIdx)[0]->getParent().get()); auto rConstBlob = rInputNode->getMemoryPtr(); std::vector ie_w_vec(ie_w_vec_size), ie_r_vec(ie_r_vec_size); @@ -576,7 +577,7 @@ void MKLDNNRNN::fillWeights(const int *gate_map, const size_t wIdx, const size_t } template -void MKLDNNRNN::fillBiases(const int *gate_map) { +void RNN::fillBiases(const int *gate_map) { using dataType = typename PrecisionTrait::value_type; if (getOriginalInputPrecisionAtPort(bIdx) != Precision::FP32) { @@ -591,14 +592,14 @@ void MKLDNNRNN::fillBiases(const int *gate_map) { if (b_ptr == nullptr) IE_THROW(NotAllocated) << "Internal blob was not allocated for node " << getName() << "."; - auto *constInputNode = dynamic_cast(getParentEdgesAtPort(bIdx)[0]->getParent().get()); + auto *constInputNode = dynamic_cast(getParentEdgesAtPort(bIdx)[0]->getParent().get()); auto constBlob = constInputNode->getMemoryPtr(); auto const elementsCount = constBlob->GetSize() / constBlob->getDesc().getPrecision().size(); std::vector ie_b_vec(elementsCount); cpu_convert(constBlob->GetPtr(), &ie_b_vec[0], - MKLDNNExtensionUtils::DataTypeToIEPrecision(constBlob->GetDataType()), + DnnlExtensionUtils::DataTypeToIEPrecision(constBlob->GetDataType()), Prec, elementsCount); @@ -610,7 +611,7 @@ void MKLDNNRNN::fillBiases(const int *gate_map) { internalBlobs.push_back(w_bias_data_mem); } -void MKLDNNRNN::copyWeightsData() { +void RNN::copyWeightsData() { /* Copy Weight data * IE format: * W - [gates, out_state_size, in_data_size] @@ -680,12 +681,12 @@ void MKLDNNRNN::copyWeightsData() { fillBiases(gate_map); } -void MKLDNNRNN::fillDescs() { +void RNN::fillDescs() { descs.clear(); switch (cell_type) { case mkldnn::algorithm::vanilla_rnn: { - MKLDNNDescriptor desc(std::make_shared( + DnnlDesriptor desc(std::make_shared( prop_kind::forward_scoring, cell_act, direction, @@ -699,7 +700,7 @@ void MKLDNNRNN::fillDescs() { descs.push_back(desc); } break; case mkldnn::algorithm::vanilla_gru: { - MKLDNNDescriptor desc(std::make_shared( + DnnlDesriptor desc(std::make_shared( prop_kind::forward_scoring, direction, /* In Data */ inDataDescs[RNNInOutKind::Layer]->getDnnlDesc(), @@ -712,7 +713,7 @@ void MKLDNNRNN::fillDescs() { descs.push_back(desc); } break; case mkldnn::algorithm::lbr_gru: { - MKLDNNDescriptor desc(std::make_shared( + DnnlDesriptor desc(std::make_shared( prop_kind::forward_scoring, direction, /* In Data */ inDataDescs[RNNInOutKind::Layer]->getDnnlDesc(), @@ -725,7 +726,7 @@ void MKLDNNRNN::fillDescs() { descs.push_back(desc); } break; case mkldnn::algorithm::vanilla_lstm: { - MKLDNNDescriptor desc(std::make_shared( + DnnlDesriptor desc(std::make_shared( prop_kind::forward_scoring, direction, /* In Data */ inDataDescs[RNNInOutKind::Layer]->getDnnlDesc(), @@ -744,17 +745,17 @@ void MKLDNNRNN::fillDescs() { } } -void MKLDNNRNN::createDescriptor(const std::vector &inputDesc, +void RNN::createDescriptor(const std::vector &inputDesc, const std::vector &outputDesc) { if (descs.empty()) { wDescs.resize(3); const auto& dataPrecision = getOriginalInputPrecisionAtPort(0); - auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(dataPrecision); - auto weightsDims = MKLDNNExtensionUtils::convertToDnnlDims(VectorDims{ L, D, DC, G, SC }); + auto dataType = DnnlExtensionUtils::IEPrecisionToDataType(dataPrecision); + auto weightsDims = DnnlExtensionUtils::convertToDnnlDims(VectorDims{ L, D, DC, G, SC }); wDescs[0] = mkldnn::memory::desc(weightsDims, dataType, wFormat); - auto statesDims = MKLDNNExtensionUtils::convertToDnnlDims(VectorDims{ L, D, SC, G, SC }); + auto statesDims = DnnlExtensionUtils::convertToDnnlDims(VectorDims{ L, D, SC, G, SC }); wDescs[1] = mkldnn::memory::desc(statesDims, dataType, wFormat); - auto biasDims = MKLDNNExtensionUtils::convertToDnnlDims(VectorDims{ L, D, Gb, SC }); + auto biasDims = DnnlExtensionUtils::convertToDnnlDims(VectorDims{ L, D, Gb, SC }); wDescs[2] = mkldnn::memory::desc(biasDims, memory::data_type::f32, memory::format_tag::ldgo); fillDescs(); @@ -782,7 +783,7 @@ void MKLDNNRNN::createDescriptor(const std::vector &inputDesc, supportedPrimitiveDescriptors.emplace_back(config, ref_any); } -void MKLDNNRNN::prepareParams() { +void RNN::prepareParams() { for (size_t i = 0; i < wIdx; i++) { auto memPtr = getParentEdgesAtPort(i).front()->getMemoryPtr(); if (!memPtr || !memPtr->isAllocated()) @@ -790,7 +791,7 @@ void MKLDNNRNN::prepareParams() { } const auto& dataPrecision = getOriginalInputPrecisionAtPort(0); - const auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(dataPrecision); + const auto dataType = DnnlExtensionUtils::IEPrecisionToDataType(dataPrecision); auto dataMemPtr = getParentEdgesAtPort(0).front()->getMemoryPtr(); const size_t B = dataMemPtr->GetShape().getStaticDims()[0]; @@ -820,9 +821,9 @@ void MKLDNNRNN::prepareParams() { wFormatWasChanged = true; } if (wFormatWasChanged) { - auto weightsDims = MKLDNNExtensionUtils::convertToDnnlDims(VectorDims{ L, D, DC, G, SC }); + auto weightsDims = DnnlExtensionUtils::convertToDnnlDims(VectorDims{ L, D, DC, G, SC }); wDescs[0] = mkldnn::memory::desc(weightsDims, dataType, wFormat); - auto statesDims = MKLDNNExtensionUtils::convertToDnnlDims(VectorDims{ L, D, SC, G, SC }); + auto statesDims = DnnlExtensionUtils::convertToDnnlDims(VectorDims{ L, D, SC, G, SC }); wDescs[1] = mkldnn::memory::desc(statesDims, dataType, wFormat); } @@ -864,15 +865,15 @@ void MKLDNNRNN::prepareParams() { } } -std::shared_ptr MKLDNNRNN::getSrcMemDesc(mkldnn::primitive_desc_iterator& primitive_desc_it, size_t idx) { +std::shared_ptr RNN::getSrcMemDesc(mkldnn::primitive_desc_iterator& primitive_desc_it, size_t idx) { return supportedPrimitiveDescriptors[0].getConfig().inConfs[idx].getMemDesc(); } -std::shared_ptr MKLDNNRNN::getDstMemDesc(mkldnn::primitive_desc_iterator& primitive_desc_it, size_t idx) { +std::shared_ptr RNN::getDstMemDesc(mkldnn::primitive_desc_iterator& primitive_desc_it, size_t idx) { return supportedPrimitiveDescriptors[0].getConfig().outConfs[idx].getMemDesc(); } -void MKLDNNRNN::execute(mkldnn::stream strm) { +void RNN::execute(mkldnn::stream strm) { if (!prim) THROW_ERROR << "does not have initialized primitive to execute."; @@ -913,25 +914,25 @@ void MKLDNNRNN::execute(mkldnn::stream strm) { (*prim).execute(strm, args); } -void MKLDNNRNN::executeDynamicImpl(mkldnn::stream strm) { +void RNN::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -std::vector MKLDNNRNN::shapeInfer() const { +std::vector RNN::shapeInfer() const { if ((is_cell && DC != getParentEdgesAtPort(0)[0]->getMemory().getDesc().getShape().getStaticDims()[1]) || (!is_cell && DC != getParentEdgesAtPort(0)[0]->getMemory().getDesc().getShape().getStaticDims()[2])) THROW_ERROR << "has incorrect input size value in the first input."; - auto originOutputShapes = MKLDNNNode::shapeInfer(); + auto originOutputShapes = Node::shapeInfer(); // Graph optimizer makes the same optimization. So this is required to make shapes compatible. - if (getType() == RNNSeq && originOutputShapes[0].size() == 4lu && originOutputShapes[0][1] == 1lu) { + if (getType() == Type::RNNSeq && originOutputShapes[0].size() == 4lu && originOutputShapes[0][1] == 1lu) { originOutputShapes[0].erase(originOutputShapes[0].begin() + 1); } return originOutputShapes; } -void MKLDNNRNN::cleanup() { +void RNN::cleanup() { if (!isDynamicNode()) { internalBlobs.clear(); } @@ -945,8 +946,6 @@ void MKLDNNRNN::cleanup() { } } +} // namespace node } // namespace intel_cpu } // namespace ov - -REG_MKLDNN_PRIM_FOR(MKLDNNRNN, RNNCell); -REG_MKLDNN_PRIM_FOR(MKLDNNRNN, RNNSeq); diff --git a/src/plugins/intel_cpu/src/nodes/rnn.h b/src/plugins/intel_cpu/src/nodes/rnn.h index 23c2a486862..810fae5aa47 100644 --- a/src/plugins/intel_cpu/src/nodes/rnn.h +++ b/src/plugins/intel_cpu/src/nodes/rnn.h @@ -13,10 +13,11 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNRNN : public MKLDNNNode { +class RNN : public Node { public: - MKLDNNRNN(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + RNN(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; void getSupportedDescriptors() override; @@ -121,5 +122,6 @@ private: bool wasMemoryPrepared = false; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/roi_align.cpp b/src/plugins/intel_cpu/src/nodes/roi_align.cpp index a95dbd1f695..9c7da434433 100644 --- a/src/plugins/intel_cpu/src/nodes/roi_align.cpp +++ b/src/plugins/intel_cpu/src/nodes/roi_align.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include @@ -18,7 +18,6 @@ #include #include "emitters/jit_load_store_emitters.hpp" -using namespace ov::intel_cpu; using namespace InferenceEngine; using namespace mkldnn; using namespace mkldnn::impl; @@ -27,6 +26,10 @@ using namespace mkldnn::impl::cpu::x64; using namespace mkldnn::impl::utils; using namespace Xbyak; +namespace ov { +namespace intel_cpu { +namespace node { + using ngPoolingMode = ngraph::op::v3::ROIAlign::PoolingMode; #define GET_OFF(field) offsetof(jit_roi_align_call_args, field) @@ -629,7 +632,7 @@ private: } }; -bool MKLDNNROIAlignNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool ROIAlign::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { auto roiAlign = ngraph::as_type_ptr(op); if (!roiAlign) { @@ -648,8 +651,8 @@ bool MKLDNNROIAlignNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { +ROIAlign::ROIAlign(const std::shared_ptr& op, const mkldnn::engine& eng, + WeightsSharing::Ptr &cache) : Node(op, eng, cache) { std::string errorMessage; if (isSupportedOperation(op, errorMessage)) { errorPrefix = "ROIPooling layer with name '" + getName() + "' "; @@ -670,7 +673,7 @@ MKLDNNROIAlignNode::MKLDNNROIAlignNode(const std::shared_ptr& op, } } -void MKLDNNROIAlignNode::getSupportedDescriptors() { +void ROIAlign::getSupportedDescriptors() { if (!descs.empty()) return; @@ -707,7 +710,7 @@ void MKLDNNROIAlignNode::getSupportedDescriptors() { } } -void MKLDNNROIAlignNode::createJitKernel(const InferenceEngine::Precision& dataPrec, const ROIAlignLayoutType& selectLayout) { +void ROIAlign::createJitKernel(const InferenceEngine::Precision& dataPrec, const ROIAlignLayoutType& selectLayout) { auto jcp = jit_roi_align_params(); jcp.alg = algorithm; jcp.data_prc = dataPrec; @@ -728,7 +731,7 @@ void MKLDNNROIAlignNode::createJitKernel(const InferenceEngine::Precision& dataP roi_align_kernel->create_ker(); } -void MKLDNNROIAlignNode::initSupportedPrimitiveDescriptors() { +void ROIAlign::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -781,7 +784,7 @@ void MKLDNNROIAlignNode::initSupportedPrimitiveDescriptors() { } } -void MKLDNNROIAlignNode::createPrimitive() { +void ROIAlign::createPrimitive() { auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); if (!srcMemPtr || !srcMemPtr->isAllocated()) @@ -804,12 +807,12 @@ void MKLDNNROIAlignNode::createPrimitive() { namespace { struct ROIAlignContext { - MKLDNNROIAlignNode &node; + ROIAlign &node; }; } template -struct MKLDNNROIAlignNode::ROIAlignExecute { +struct ROIAlign::ROIAlignExecute { using srcT = typename std::tuple_element<0, T>::type; using dstT = typename std::tuple_element<1, T>::type; @@ -817,7 +820,7 @@ struct MKLDNNROIAlignNode::ROIAlignExecute { ctx.node.executeSpecified(); } }; -void MKLDNNROIAlignNode::execute(mkldnn::stream strm) { +void ROIAlign::execute(mkldnn::stream strm) { auto inputPrec = getParentEdgeAt(0)->getMemory().GetDataType(); auto outputPrec = getChildEdgeAt(0)->getMemory().GetDataType(); if (!((inputPrec == mkldnn_bf16 && outputPrec == mkldnn_bf16) || @@ -834,7 +837,7 @@ void MKLDNNROIAlignNode::execute(mkldnn::stream strm) { } template -void MKLDNNROIAlignNode::executeSpecified() { +void ROIAlign::executeSpecified() { auto &srcMemory0 = getParentEdgeAt(0)->getMemory(); auto &srcMemory1 = getParentEdgeAt(1)->getMemory(); auto &dstMemory = getChildEdgeAt(0)->getMemory(); @@ -1103,16 +1106,18 @@ void MKLDNNROIAlignNode::executeSpecified() { } } -bool MKLDNNROIAlignNode::created() const { - return getType() == ROIAlign; +bool ROIAlign::created() const { + return getType() == Type::ROIAlign; } -bool MKLDNNROIAlignNode::needPrepareParams() const { +bool ROIAlign::needPrepareParams() const { return false; } -void MKLDNNROIAlignNode::executeDynamicImpl(mkldnn::stream strm) { +void ROIAlign::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -REG_MKLDNN_PRIM_FOR(MKLDNNROIAlignNode, ROIAlign) +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/roi_align.h b/src/plugins/intel_cpu/src/nodes/roi_align.h index e811675de2b..60b71d9ace1 100644 --- a/src/plugins/intel_cpu/src/nodes/roi_align.h +++ b/src/plugins/intel_cpu/src/nodes/roi_align.h @@ -9,10 +9,11 @@ #include #include #include -#include +#include namespace ov { namespace intel_cpu { +namespace node { enum ROIAlignLayoutType { ncsp, @@ -58,9 +59,9 @@ struct jit_uni_roi_align_kernel { jit_roi_align_params jcp_; }; -class MKLDNNROIAlignNode : public MKLDNNNode { +class ROIAlign : public Node { public: - MKLDNNROIAlignNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + ROIAlign(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; @@ -89,5 +90,6 @@ private: std::string errorPrefix; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/roi_pooling.cpp b/src/plugins/intel_cpu/src/nodes/roi_pooling.cpp index 5db94de3f15..16e35fe2bf7 100644 --- a/src/plugins/intel_cpu/src/nodes/roi_pooling.cpp +++ b/src/plugins/intel_cpu/src/nodes/roi_pooling.cpp @@ -5,7 +5,7 @@ #include "roi_pooling.h" #include -#include +#include #include #include @@ -23,7 +23,6 @@ #include #include -using namespace ov::intel_cpu; using namespace InferenceEngine; using namespace mkldnn; using namespace mkldnn::impl; @@ -33,6 +32,10 @@ using namespace Xbyak; #define GET_OFF(field) offsetof(jit_roi_pooling_call_args, field) +namespace ov { +namespace intel_cpu { +namespace node { + template struct jit_uni_roi_pooling_kernel_f32 : public jit_uni_roi_pooling_kernel, public jit_generator { DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_uni_roi_pooling_kernel_f32); @@ -345,7 +348,7 @@ bool RoiPoolingKey::operator==(const RoiPoolingKey &rhs) const { } } // namespace -bool ov::intel_cpu::jit_roi_pooling_params::operator==(const ov::intel_cpu::jit_roi_pooling_params &rhs) const noexcept { +bool jit_roi_pooling_params::operator==(const jit_roi_pooling_params &rhs) const noexcept { return mb == rhs.mb && c == rhs.c && ih == rhs.ih && @@ -363,7 +366,7 @@ bool ov::intel_cpu::jit_roi_pooling_params::operator==(const ov::intel_cpu::jit_ alg == rhs.alg; } -bool MKLDNNROIPoolingNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool ROIPooling::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { auto roiPooling = ngraph::as_type_ptr(op); if (!roiPooling) { @@ -381,8 +384,8 @@ bool MKLDNNROIPoolingNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { +ROIPooling::ROIPooling(const std::shared_ptr& op, const mkldnn::engine& eng, + WeightsSharing::Ptr &cache) : Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -402,7 +405,7 @@ MKLDNNROIPoolingNode::MKLDNNROIPoolingNode(const std::shared_ptr& } } -void MKLDNNROIPoolingNode::getSupportedDescriptors() { +void ROIPooling::getSupportedDescriptors() { if (!descs.empty()) return; @@ -429,7 +432,7 @@ void MKLDNNROIPoolingNode::getSupportedDescriptors() { } } -void MKLDNNROIPoolingNode::initSupportedPrimitiveDescriptors() { +void ROIPooling::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -458,7 +461,7 @@ void MKLDNNROIPoolingNode::initSupportedPrimitiveDescriptors() { impl_type); } -void MKLDNNROIPoolingNode::createPrimitive() { +void ROIPooling::createPrimitive() { auto selectedPD = getSelectedPrimitiveDescriptor(); if (!selectedPD) IE_THROW() << "CPU ROI Pooling node with name '" << getName() << "' doesn't have primitive descriptors."; @@ -478,7 +481,7 @@ void MKLDNNROIPoolingNode::createPrimitive() { } } -void MKLDNNROIPoolingNode::execute(mkldnn::stream strm) { +void ROIPooling::execute(mkldnn::stream strm) { if (execPtr) { const auto &srcMemory0 = getParentEdgeAt(0)->getMemory(); const auto &srcMemory1 = getParentEdgeAt(1)->getMemory(); @@ -489,11 +492,11 @@ void MKLDNNROIPoolingNode::execute(mkldnn::stream strm) { } } -void MKLDNNROIPoolingNode::executeDynamicImpl(mkldnn::stream strm) { +void ROIPooling::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -void MKLDNNROIPoolingNode::prepareParams() { +void ROIPooling::prepareParams() { const auto& srcMemPtr0 = getParentEdgeAt(0)->getMemoryPtr(); const auto& srcMemPtr1 = getParentEdgeAt(0)->getMemoryPtr(); const auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); @@ -527,7 +530,7 @@ void MKLDNNROIPoolingNode::prepareParams() { } template -class MKLDNNROIPoolingNode::ROIPoolingJitExecutor : public MKLDNNROIPoolingNode::ROIPoolingExecutor { +class ROIPooling::ROIPoolingJitExecutor : public ROIPooling::ROIPoolingExecutor { public: ROIPoolingJitExecutor(const jit_roi_pooling_params &jpp) { if (mayiuse(cpu::x64::avx512_common)) { @@ -545,9 +548,9 @@ public: } void exec( - const ov::intel_cpu::MKLDNNMemory& srcData, - const ov::intel_cpu::MKLDNNMemory& srcRoi, - const ov::intel_cpu::MKLDNNMemory& dst) override { + const Memory& srcData, + const Memory& srcRoi, + const Memory& dst) override { if (!roi_pooling_kernel) IE_THROW() << "Could not execute. Kernel for RoiPooling node was not compiled."; @@ -664,13 +667,13 @@ private: }; template -class MKLDNNROIPoolingNode::ROIPoolingRefExecutor : public MKLDNNROIPoolingNode::ROIPoolingExecutor { +class ROIPooling::ROIPoolingRefExecutor : public ROIPooling::ROIPoolingExecutor { public: ROIPoolingRefExecutor(const jit_roi_pooling_params &_jpp) : jpp(_jpp) {} void exec( - const ov::intel_cpu::MKLDNNMemory& srcData, - const ov::intel_cpu::MKLDNNMemory& srcRoi, - const ov::intel_cpu::MKLDNNMemory& dst) override { + const Memory& srcData, + const Memory& srcRoi, + const Memory& dst) override { auto src_strides = srcData.GetDescWithType()->getStrides(); auto src_roi_step = srcRoi.GetDescWithType()->getStrides()[0]; auto dst_strides = dst.GetDescWithType()->getStrides(); @@ -817,7 +820,7 @@ private: jit_roi_pooling_params jpp; }; -std::shared_ptr MKLDNNROIPoolingNode::ROIPoolingExecutor::createROIPoolingNewExecutor( +std::shared_ptr ROIPooling::ROIPoolingExecutor::createROIPoolingNewExecutor( const jit_roi_pooling_params& jpp) { ROIPoolingContext ctx = { nullptr, jpp }; @@ -828,7 +831,7 @@ std::shared_ptr MKLDNNROIPoolingNode:: return ctx.executor; } -std::tuple MKLDNNROIPoolingNode::ROIPoolingExecutor::getBordersForMaxMode( +std::tuple ROIPooling::ROIPoolingExecutor::getBordersForMaxMode( const int roi_start_h, const int roi_end_h, const int roi_start_w, const int roi_end_w, const int ih, const int oh, const int iw, const int ow, const int pooled_h, const int pooled_w) { int roi_height = std::max(roi_end_h - roi_start_h + 1, 1); @@ -861,7 +864,7 @@ std::tuple MKLDNNROIPoolingNode::ROIPoolingExecutor::getBord return std::make_tuple(hstart, hend, wstart, wend); } -std::pair MKLDNNROIPoolingNode::ROIPoolingExecutor::getXYForBilinearMode( +std::pair ROIPooling::ROIPoolingExecutor::getXYForBilinearMode( const float roi_start_h, const float roi_end_h, const float roi_start_w, const float roi_end_w, const int ih, const int oh, const int iw, const int ow, const int pooled_h, const int pooled_w) { float height_scale = (pooled_h > 1 ? ((roi_end_h - roi_start_h) * (ih - 1)) / (pooled_h - 1) : 0); @@ -887,7 +890,7 @@ std::pair MKLDNNROIPoolingNode::ROIPoolingExecutor::getXYForBiline } template -std::shared_ptr MKLDNNROIPoolingNode::ROIPoolingExecutor::makeExecutor( +std::shared_ptr ROIPooling::ROIPoolingExecutor::makeExecutor( const jit_roi_pooling_params& jpp) { if (mayiuse(cpu::x64::sse41)) return std::make_shared>(jpp); @@ -895,8 +898,10 @@ std::shared_ptr MKLDNNROIPoolingNode:: return std::make_shared>(jpp); } -bool MKLDNNROIPoolingNode::created() const { - return getType() == ROIPooling; +bool ROIPooling::created() const { + return getType() == Type::ROIPooling; } -REG_MKLDNN_PRIM_FOR(MKLDNNROIPoolingNode, ROIPooling); +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/roi_pooling.h b/src/plugins/intel_cpu/src/nodes/roi_pooling.h index ebd36554840..bd21b044565 100644 --- a/src/plugins/intel_cpu/src/nodes/roi_pooling.h +++ b/src/plugins/intel_cpu/src/nodes/roi_pooling.h @@ -13,6 +13,7 @@ namespace ov { namespace intel_cpu { +namespace node { struct jit_roi_pooling_params { int mb, c; @@ -65,9 +66,9 @@ struct jit_uni_roi_pooling_kernel { jit_roi_pooling_params jpp_; }; -class MKLDNNROIPoolingNode : public MKLDNNNode { +class ROIPooling : public Node { public: - MKLDNNROIPoolingNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + ROIPooling(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; @@ -92,9 +93,9 @@ private: public: ROIPoolingExecutor() = default; virtual void exec( - const ov::intel_cpu::MKLDNNMemory& srcData, - const ov::intel_cpu::MKLDNNMemory& srcRoi, - const ov::intel_cpu::MKLDNNMemory& dst) = 0; + const ov::intel_cpu::Memory& srcData, + const ov::intel_cpu::Memory& srcRoi, + const ov::intel_cpu::Memory& dst) = 0; virtual ~ROIPoolingExecutor() = default; static std::shared_ptr createROIPoolingNewExecutor(const jit_roi_pooling_params& jpp); @@ -131,5 +132,6 @@ private: executorPtr execPtr = nullptr; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/roll.cpp b/src/plugins/intel_cpu/src/nodes/roll.cpp index 57da2c2aa30..47b836276fa 100644 --- a/src/plugins/intel_cpu/src/nodes/roll.cpp +++ b/src/plugins/intel_cpu/src/nodes/roll.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include "roll.h" #include "ie_parallel.hpp" @@ -16,10 +16,13 @@ #include using namespace mkldnn; -using namespace ov::intel_cpu; using namespace InferenceEngine; -bool MKLDNNRollNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +namespace ov { +namespace intel_cpu { +namespace node { + +bool Roll::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { if (isDynamicNgraphNode(op)) { errorMessage = "Doesn't support op with dynamic shapes"; @@ -36,8 +39,8 @@ bool MKLDNNRollNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : - MKLDNNNode(op, eng, cache) { +Roll::Roll(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache) : + Node(op, eng, cache) { std::string errorMessage; if (isSupportedOperation(op, errorMessage)) { layerErrorPrefix = "Roll layer with name '" + getName() + "'"; @@ -86,9 +89,9 @@ MKLDNNRollNode::MKLDNNRollNode(const std::shared_ptr& op, const mk } } -void MKLDNNRollNode::getSupportedDescriptors() {} +void Roll::getSupportedDescriptors() {} -void MKLDNNRollNode::initSupportedPrimitiveDescriptors() { +void Roll::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -104,7 +107,7 @@ void MKLDNNRollNode::initSupportedPrimitiveDescriptors() { } -void MKLDNNRollNode::execute(mkldnn::stream strm) { +void Roll::execute(mkldnn::stream strm) { const auto dataPrecision = getParentEdgeAt(DATA_INDEX)->getMemory().getDesc().getPrecision(); const auto& dataTypeSize = dataPrecision.size(); switch (dataTypeSize) { @@ -125,14 +128,14 @@ void MKLDNNRollNode::execute(mkldnn::stream strm) { } } -size_t MKLDNNRollNode::calculateShiftOffset(size_t dataOffset, size_t dimShift, size_t segmentSize, size_t dimSize) { +size_t Roll::calculateShiftOffset(size_t dataOffset, size_t dimShift, size_t segmentSize, size_t dimSize) { size_t pos = dataOffset / segmentSize % dimSize; size_t shift = (pos + dimShift) % dimSize - pos; return dataOffset + shift * segmentSize; } template -void MKLDNNRollNode::rollImpl() { +void Roll::rollImpl() { const auto dataEdge = getParentEdgeAt(DATA_INDEX); const auto axesEdge = getParentEdgeAt(AXES_INDEX); const auto shiftsEdge = getParentEdgeAt(SHIFT_INDEX); @@ -183,12 +186,14 @@ void MKLDNNRollNode::rollImpl() { }); } -bool MKLDNNRollNode::created() const { - return getType() == Roll; +bool Roll::created() const { + return getType() == Type::Roll; } -void MKLDNNRollNode::createPrimitive() {} +void Roll::createPrimitive() {} -const std::vector MKLDNNRollNode::supportedPrecisionSizes = {1, 2, 4}; +const std::vector Roll::supportedPrecisionSizes = {1, 2, 4}; -REG_MKLDNN_PRIM_FOR(MKLDNNRollNode, Roll) +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/roll.h b/src/plugins/intel_cpu/src/nodes/roll.h index 2c7742680ff..6ef9a0aba3b 100644 --- a/src/plugins/intel_cpu/src/nodes/roll.h +++ b/src/plugins/intel_cpu/src/nodes/roll.h @@ -10,10 +10,11 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNRollNode : public MKLDNNNode { +class Roll : public Node { public: - MKLDNNRollNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + Roll(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; @@ -40,5 +41,6 @@ private: const size_t numberOfInputs = 3ul; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/scatter_update.cpp b/src/plugins/intel_cpu/src/nodes/scatter_update.cpp index d4b2de9e67f..9d16b456e7a 100644 --- a/src/plugins/intel_cpu/src/nodes/scatter_update.cpp +++ b/src/plugins/intel_cpu/src/nodes/scatter_update.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include "ie_parallel.hpp" #include #include "common/cpu_memcpy.h" @@ -16,10 +16,13 @@ #include using namespace mkldnn; -using namespace ov::intel_cpu; using namespace InferenceEngine; -bool MKLDNNScatterUpdateNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +namespace ov { +namespace intel_cpu { +namespace node { + +bool ScatterUpdate::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { auto scatterElemUpd = ngraph::as_type_ptr(op); auto scatterUpd = ngraph::as_type_ptr(op); @@ -35,12 +38,12 @@ bool MKLDNNScatterUpdateNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) - : MKLDNNNode(op, eng, cache), dataSize(0lu), indicesSize(0lu), axisSize(0lu), dataPrec(Precision::UNSPECIFIED), indicesPrec(Precision::UNSPECIFIED), +ScatterUpdate::ScatterUpdate(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache) + : Node(op, eng, cache), dataSize(0lu), indicesSize(0lu), axisSize(0lu), dataPrec(Precision::UNSPECIFIED), indicesPrec(Precision::UNSPECIFIED), axisPrec(Precision::UNSPECIFIED) { std::string errorMessage; if (isSupportedOperation(op, errorMessage)) { @@ -50,7 +53,7 @@ MKLDNNScatterUpdateNode::MKLDNNScatterUpdateNode(const std::shared_ptr= 8) { indicesPrec = Precision::I64; indicesSize = 8; @@ -169,8 +172,8 @@ void MKLDNNScatterUpdateNode::initSupportedPrimitiveDescriptors() { if (axisRelaxed) { axisPrec = getOriginalInputPrecisionAtPort(AXIS_ID); - auto axisType = MKLDNNExtensionUtils::IEPrecisionToDataType(axisPrec); - axisSize = MKLDNNExtensionUtils::sizeOfDataType(axisType); + auto axisType = DnnlExtensionUtils::IEPrecisionToDataType(axisPrec); + axisSize = DnnlExtensionUtils::sizeOfDataType(axisType); if (axisSize >= 8) { axisPrec = Precision::I64; axisSize = 8; @@ -215,15 +218,15 @@ void MKLDNNScatterUpdateNode::initSupportedPrimitiveDescriptors() { impl_desc_type::unknown); } -bool MKLDNNScatterUpdateNode::needPrepareParams() const { +bool ScatterUpdate::needPrepareParams() const { return false; } -void MKLDNNScatterUpdateNode::executeDynamicImpl(mkldnn::stream strm) { +void ScatterUpdate::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -int64_t MKLDNNScatterUpdateNode::getIndicesValue(uint8_t *indices, size_t offset) { +int64_t ScatterUpdate::getIndicesValue(uint8_t *indices, size_t offset) { auto *indicesPtr = indices + offset * indicesSize; int64_t ret = 0; if (indicesSize == 4) { @@ -249,7 +252,7 @@ static std::vector getBlockND(const VectorDims& shape) { return blockND; } -void MKLDNNScatterUpdateNode::execute(mkldnn::stream strm) { +void ScatterUpdate::execute(mkldnn::stream strm) { auto &srcMemPtr = getParentEdgeAt(DATA_ID)->getMemoryPtr(); auto &dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); auto &indicesMemPtr = getParentEdgeAt(INDICES_ID)->getMemoryPtr(); @@ -363,7 +366,7 @@ void MKLDNNScatterUpdateNode::execute(mkldnn::stream strm) { // For the data tensor of shape [d_0, d_1, ..., d_n], // and indices tensor of shape [i_0, i_1, ..., i_k]. // Updates tensor shape should be [d_0, d_1, ... d_(axis - 1), i_0, i_1, ..., i_k, d_(axis + 1), ..., d_n]. -void MKLDNNScatterUpdateNode::scatterUpdate(uint8_t *indices, uint8_t *update, int axis, uint8_t *dstData) { +void ScatterUpdate::scatterUpdate(uint8_t *indices, uint8_t *update, int axis, uint8_t *dstData) { const auto& srcDataDim = getParentEdgeAt(DATA_ID)->getMemory().getStaticDims(); const auto& indicesDim = getParentEdgeAt(INDICES_ID)->getMemory().getStaticDims(); const auto& updateDim = getParentEdgeAt(UPDATE_ID)->getMemory().getStaticDims(); @@ -396,7 +399,7 @@ void MKLDNNScatterUpdateNode::scatterUpdate(uint8_t *indices, uint8_t *update, i // indices is a (q-1)-dimension tensor of k-tuple, // k is indices.shape[-1] and should not be greater than rank of input, q is rank of indicies. // updates is a (q-1)-dimension tensor of replacement-slice-values -void MKLDNNScatterUpdateNode::scatterNDUpdate(uint8_t *indices, uint8_t *update, uint8_t *dstData) { +void ScatterUpdate::scatterNDUpdate(uint8_t *indices, uint8_t *update, uint8_t *dstData) { const auto& srcDataDim = getParentEdgeAt(DATA_ID)->getMemory().getStaticDims(); const auto& indicesDim = getParentEdgeAt(INDICES_ID)->getMemory().getStaticDims(); size_t indicesRank = indicesDim.size(); @@ -426,7 +429,7 @@ void MKLDNNScatterUpdateNode::scatterNDUpdate(uint8_t *indices, uint8_t *update, // output[indices[i][j][k]][j][k] = updates[i][j][k] if axis = 0, // output[i][indices[i][j][k]][k] = updates[i][j][k] if axis = 1, // output[i][j][indices[i][j][k]] = updates[i][j][k] if axis = 2. -void MKLDNNScatterUpdateNode::scatterElementsUpdate(uint8_t *indices, uint8_t *update, int axis, uint8_t *dstData) { +void ScatterUpdate::scatterElementsUpdate(uint8_t *indices, uint8_t *update, int axis, uint8_t *dstData) { const auto& srcDataDim = getParentEdgeAt(DATA_ID)->getMemory().getStaticDims(); const auto& updateDim = getParentEdgeAt(UPDATE_ID)->getMemory().getStaticDims(); size_t updateRank = updateDim.size(); @@ -473,10 +476,12 @@ void MKLDNNScatterUpdateNode::scatterElementsUpdate(uint8_t *indices, uint8_t *u }); } -bool MKLDNNScatterUpdateNode::created() const { - return getType() == ScatterUpdate || getType() == ScatterElementsUpdate || getType() == ScatterNDUpdate; +bool ScatterUpdate::created() const { + return getType() == Type::ScatterUpdate + || getType() == Type::ScatterElementsUpdate + || getType() == Type::ScatterNDUpdate; } -REG_MKLDNN_PRIM_FOR(MKLDNNScatterUpdateNode, ScatterUpdate); -REG_MKLDNN_PRIM_FOR(MKLDNNScatterUpdateNode, ScatterElementsUpdate); -REG_MKLDNN_PRIM_FOR(MKLDNNScatterUpdateNode, ScatterNDUpdate); +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/scatter_update.h b/src/plugins/intel_cpu/src/nodes/scatter_update.h index c27a45bcf19..ecf0c38b57b 100644 --- a/src/plugins/intel_cpu/src/nodes/scatter_update.h +++ b/src/plugins/intel_cpu/src/nodes/scatter_update.h @@ -12,6 +12,7 @@ namespace ov { namespace intel_cpu { +namespace node { enum class ScatterUpdateMode { ScatterUpdate, @@ -19,9 +20,9 @@ enum class ScatterUpdateMode { ScatterElementsUpdate }; -class MKLDNNScatterUpdateNode : public MKLDNNNode { +class ScatterUpdate : public Node { public: - MKLDNNScatterUpdateNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + ScatterUpdate(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; @@ -54,5 +55,6 @@ private: std::string errorPrefix; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/select.cpp b/src/plugins/intel_cpu/src/nodes/select.cpp index 3a60cad01ff..2f66624cdb5 100644 --- a/src/plugins/intel_cpu/src/nodes/select.cpp +++ b/src/plugins/intel_cpu/src/nodes/select.cpp @@ -12,10 +12,13 @@ #include #include "common/cpu_memcpy.h" -using namespace ov::intel_cpu; using namespace InferenceEngine; -bool MKLDNNSelectNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +namespace ov { +namespace intel_cpu { +namespace node { + +bool Select::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { const auto select = std::dynamic_pointer_cast(op); if (!select) { @@ -23,7 +26,7 @@ bool MKLDNNSelectNode::isSupportedOperation(const std::shared_ptrget_auto_broadcast(); - if (!ov::intel_cpu::one_of(broadcast.m_type, ngraph::op::AutoBroadcastType::NONE, ngraph::op::AutoBroadcastType::NUMPY)) { + if (!one_of(broadcast.m_type, ngraph::op::AutoBroadcastType::NONE, ngraph::op::AutoBroadcastType::NUMPY)) { errorMessage = "Does not support broadcast type: " + ngraph::as_string(broadcast.m_type); return false; } @@ -33,8 +36,8 @@ bool MKLDNNSelectNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { +Select::Select(const std::shared_ptr& op, const mkldnn::engine& eng, + WeightsSharing::Ptr &cache) : Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -95,7 +98,7 @@ MKLDNNSelectNode::MKLDNNSelectNode(const std::shared_ptr& op, cons } } -void MKLDNNSelectNode::initSupportedPrimitiveDescriptors() { +void Select::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -123,7 +126,7 @@ void MKLDNNSelectNode::initSupportedPrimitiveDescriptors() { impl_desc_type::ref_any); } -void MKLDNNSelectNode::prepareParams() { +void Select::prepareParams() { const auto &_conditionDims = getParentEdgesAtPort(CONDITION)[0]->getMemory().getStaticDims(); const auto &_thenDims = getParentEdgesAtPort(THEN)[0]->getMemory().getStaticDims(); const auto &_elseDims = getParentEdgesAtPort(ELSE)[0]->getMemory().getStaticDims(); @@ -152,7 +155,7 @@ void MKLDNNSelectNode::prepareParams() { } } -void MKLDNNSelectNode::calcOutOffset(VectorDims& offset, const VectorDims& dims) { +void Select::calcOutOffset(VectorDims& offset, const VectorDims& dims) { int k = 1; for (int i = dims.size() - 1; i >= 0; i--) { offset[i] = k; @@ -160,7 +163,7 @@ void MKLDNNSelectNode::calcOutOffset(VectorDims& offset, const VectorDims& dims) } } -void MKLDNNSelectNode::calcInOffset(VectorDims& offset, const VectorDims& inDims, const VectorDims& outDims) { +void Select::calcInOffset(VectorDims& offset, const VectorDims& inDims, const VectorDims& outDims) { int k = 1; for (int i = inDims.size() - 1; i >= 0; i--) { offset[i] = (inDims[i] == outDims[i]) ? k : 0; @@ -169,7 +172,7 @@ void MKLDNNSelectNode::calcInOffset(VectorDims& offset, const VectorDims& inDims } template -void MKLDNNSelectNode::execute_impl() { +void Select::execute_impl() { const auto *conditionData = reinterpret_cast(getParentEdgeAt(CONDITION)->getMemoryPtr()->GetPtr()); const auto *thenData = reinterpret_cast(getParentEdgeAt(THEN)->getMemoryPtr()->GetPtr()); const auto *elseData = reinterpret_cast(getParentEdgeAt(ELSE)->getMemoryPtr()->GetPtr()); @@ -193,11 +196,11 @@ void MKLDNNSelectNode::execute_impl() { } } -void MKLDNNSelectNode::executeDynamicImpl(mkldnn::stream strm) { +void Select::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -void MKLDNNSelectNode::execute(mkldnn::stream strm) { +void Select::execute(mkldnn::stream strm) { const size_t condPrecSize = getParentEdgeAt(CONDITION)->getMemory().getDesc().getPrecision().size(); const size_t inputsPrecSize = getParentEdgeAt(THEN)->getMemory().getDesc().getPrecision().size(); @@ -233,8 +236,10 @@ void MKLDNNSelectNode::execute(mkldnn::stream strm) { } } -bool MKLDNNSelectNode::created() const { - return getType() == Select; +bool Select::created() const { + return getType() == Type::Select; } -REG_MKLDNN_PRIM_FOR(MKLDNNSelectNode, Select) +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/select.h b/src/plugins/intel_cpu/src/nodes/select.h index 471d2a63a7c..62a7a69e0fd 100644 --- a/src/plugins/intel_cpu/src/nodes/select.h +++ b/src/plugins/intel_cpu/src/nodes/select.h @@ -12,10 +12,11 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNSelectNode : public MKLDNNNode { +class Select : public Node { public: - MKLDNNSelectNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + Select(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; @@ -54,5 +55,6 @@ private: void execute_impl(); }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/shapeof.cpp b/src/plugins/intel_cpu/src/nodes/shapeof.cpp index acfdb9dde69..0fc6a9bdccf 100644 --- a/src/plugins/intel_cpu/src/nodes/shapeof.cpp +++ b/src/plugins/intel_cpu/src/nodes/shapeof.cpp @@ -5,10 +5,13 @@ #include "shapeof.h" #include -using namespace ov::intel_cpu; using namespace InferenceEngine; -bool MKLDNNShapeOfNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +namespace ov { +namespace intel_cpu { +namespace node { + +bool ShapeOf::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { if (!one_of(op->get_type_info(), ngraph::op::v0::ShapeOf::get_type_info_static(), @@ -22,8 +25,8 @@ bool MKLDNNShapeOfNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { +ShapeOf::ShapeOf(const std::shared_ptr& op, const mkldnn::engine& eng, + WeightsSharing::Ptr &cache) : Node(op, eng, cache) { std::string errorMessage; if (isSupportedOperation(op, errorMessage)) { errorPrefix = "ShapeOf layer with name '" + getName() + "' "; @@ -34,7 +37,7 @@ MKLDNNShapeOfNode::MKLDNNShapeOfNode(const std::shared_ptr& op, co } } -void MKLDNNShapeOfNode::getSupportedDescriptors() { +void ShapeOf::getSupportedDescriptors() { if (!descs.empty()) return; if (getParentEdges().size() != 1) @@ -43,7 +46,7 @@ void MKLDNNShapeOfNode::getSupportedDescriptors() { IE_THROW() << errorPrefix << "has incorrect number of output edges: " << getChildEdges().size(); } -void MKLDNNShapeOfNode::initSupportedPrimitiveDescriptors() { +void ShapeOf::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -57,11 +60,11 @@ void MKLDNNShapeOfNode::initSupportedPrimitiveDescriptors() { } } -bool MKLDNNShapeOfNode::isExecutable() const { +bool ShapeOf::isExecutable() const { return true; } -void MKLDNNShapeOfNode::execute(mkldnn::stream strm) { +void ShapeOf::execute(mkldnn::stream strm) { auto inPtr = getParentEdgeAt(0)->getMemoryPtr(); auto outPtr = getChildEdgeAt(0)->getMemoryPtr(); auto inDims = inPtr->getStaticDims(); @@ -76,8 +79,10 @@ void MKLDNNShapeOfNode::execute(mkldnn::stream strm) { } } -bool MKLDNNShapeOfNode::created() const { - return getType() == ShapeOf; +bool ShapeOf::created() const { + return getType() == Type::ShapeOf; } -REG_MKLDNN_PRIM_FOR(MKLDNNShapeOfNode, ShapeOf) +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/shapeof.h b/src/plugins/intel_cpu/src/nodes/shapeof.h index 1cc6631d02c..efdf649b9da 100644 --- a/src/plugins/intel_cpu/src/nodes/shapeof.h +++ b/src/plugins/intel_cpu/src/nodes/shapeof.h @@ -8,14 +8,15 @@ #include #include #include -#include +#include namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNShapeOfNode : public MKLDNNNode { +class ShapeOf : public Node { public: - MKLDNNShapeOfNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + ShapeOf(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; @@ -35,5 +36,6 @@ private: std::string errorPrefix; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/shuffle_channels.cpp b/src/plugins/intel_cpu/src/nodes/shuffle_channels.cpp index cbed1326a70..87f280cd568 100644 --- a/src/plugins/intel_cpu/src/nodes/shuffle_channels.cpp +++ b/src/plugins/intel_cpu/src/nodes/shuffle_channels.cpp @@ -5,7 +5,7 @@ #include "shuffle_channels.h" #include -#include +#include #include #include "common/blocked_desc_creator.h" @@ -19,12 +19,15 @@ #define THROW_SHCH_ERROR IE_THROW() << "ShuffleChannels layer with name '" << getName() << "' " using namespace mkldnn; -using namespace ov::intel_cpu; using namespace InferenceEngine; using namespace mkldnn::impl; using namespace mkldnn::impl::cpu::x64; -size_t MKLDNNShuffleChannelsNode::ShuffleChannelsAttributes::hash() const { +namespace ov { +namespace intel_cpu { +namespace node { + +size_t ShuffleChannels::ShuffleChannelsAttributes::hash() const { using namespace dnnl::impl; using namespace dnnl::impl::primitive_hashing; @@ -41,7 +44,7 @@ size_t MKLDNNShuffleChannelsNode::ShuffleChannelsAttributes::hash() const { return seed; } -bool MKLDNNShuffleChannelsNode::ShuffleChannelsAttributes::operator==(const ShuffleChannelsAttributes& rhs) const { +bool ShuffleChannels::ShuffleChannelsAttributes::operator==(const ShuffleChannelsAttributes& rhs) const { bool result = layoutType == rhs.layoutType && dataRank == rhs.dataRank && axis == rhs.axis && spatialRank == rhs.spatialRank && group == rhs.group && dataSize == rhs.dataSize && srcDims == rhs.srcDims && @@ -49,7 +52,7 @@ bool MKLDNNShuffleChannelsNode::ShuffleChannelsAttributes::operator==(const Shuf return result; } -bool MKLDNNShuffleChannelsNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool ShuffleChannels::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { auto shuffleChannels = ov::as_type_ptr(op); if (!shuffleChannels) { @@ -62,8 +65,8 @@ bool MKLDNNShuffleChannelsNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) - : MKLDNNNode(op, eng, cache) { +ShuffleChannels::ShuffleChannels(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache) + : Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -82,7 +85,7 @@ MKLDNNShuffleChannelsNode::MKLDNNShuffleChannelsNode(const std::shared_ptrgetMemoryPtr(); auto &srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) @@ -147,7 +150,7 @@ void MKLDNNShuffleChannelsNode::createPrimitive() { } } -void MKLDNNShuffleChannelsNode::prepareParams() { +void ShuffleChannels::prepareParams() { auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); auto builder = [](const ShuffleChannelsAttributes& key) -> std::shared_ptr { return std::make_shared(key); @@ -164,11 +167,11 @@ void MKLDNNShuffleChannelsNode::prepareParams() { execPtr = result.first; } -MKLDNNShuffleChannelsNode::ShuffleChannelsExecutor::ShuffleChannelsExecutor(const ShuffleChannelsAttributes& attrs) { +ShuffleChannels::ShuffleChannelsExecutor::ShuffleChannelsExecutor(const ShuffleChannelsAttributes& attrs) { if (!one_of(attrs.layoutType, LayoutType::nCsp16c, LayoutType::nCsp8c, LayoutType::nspc, LayoutType::ncsp)) IE_THROW() << "ShuffleChannels executor supports only 'nCsp16c', 'nCsp8c', 'nspc' or 'ncsp' layouts."; - const bool isBlocked = ov::intel_cpu::one_of(attrs.layoutType, LayoutType::nCsp16c, LayoutType::nCsp8c); + const bool isBlocked = one_of(attrs.layoutType, LayoutType::nCsp16c, LayoutType::nCsp8c); const bool isChannelsLast = attrs.layoutType == LayoutType::nspc; const auto& srcDims = attrs.srcDims; const auto& srcBlockedDims = attrs.srcBlockedDims; @@ -273,7 +276,7 @@ MKLDNNShuffleChannelsNode::ShuffleChannelsExecutor::ShuffleChannelsExecutor(cons permuteKernel = std::unique_ptr(new PermuteKernel(params)); } -void MKLDNNShuffleChannelsNode::ShuffleChannelsExecutor::exec(const uint8_t* srcData, uint8_t* dstData, const int MB) { +void ShuffleChannels::ShuffleChannelsExecutor::exec(const uint8_t* srcData, uint8_t* dstData, const int MB) { if (!permuteKernel) IE_THROW() << "Could not execute. Kernel for Transpose node was not compiled."; @@ -283,11 +286,11 @@ void MKLDNNShuffleChannelsNode::ShuffleChannelsExecutor::exec(const uint8_t* src permuteKernel->execute(srcData, dstData); } -void MKLDNNShuffleChannelsNode::executeDynamicImpl(mkldnn::stream strm) { +void ShuffleChannels::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -void MKLDNNShuffleChannelsNode::execute(mkldnn::stream strm) { +void ShuffleChannels::execute(mkldnn::stream strm) { if (!execPtr) THROW_SHCH_ERROR << "doesn't have a compiled executor."; @@ -300,8 +303,10 @@ void MKLDNNShuffleChannelsNode::execute(mkldnn::stream strm) { execPtr->exec(srcData, dstData, MB); } -bool MKLDNNShuffleChannelsNode::created() const { - return getType() == ShuffleChannels; +bool ShuffleChannels::created() const { + return getType() == Type::ShuffleChannels; } -REG_MKLDNN_PRIM_FOR(MKLDNNShuffleChannelsNode, ShuffleChannels); +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/shuffle_channels.h b/src/plugins/intel_cpu/src/nodes/shuffle_channels.h index 82449b97be3..159d11fc4a0 100644 --- a/src/plugins/intel_cpu/src/nodes/shuffle_channels.h +++ b/src/plugins/intel_cpu/src/nodes/shuffle_channels.h @@ -13,11 +13,12 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNShuffleChannelsNode : public MKLDNNNode { +class ShuffleChannels : public Node { public: - MKLDNNShuffleChannelsNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); - ~MKLDNNShuffleChannelsNode() override = default; + ShuffleChannels(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); + ~ShuffleChannels() override = default; static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; void getSupportedDescriptors() override {}; @@ -60,5 +61,6 @@ private: bool supportDynamicBatch = false; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/softmax.cpp b/src/plugins/intel_cpu/src/nodes/softmax.cpp index d966ef15fd9..7c44773a75e 100644 --- a/src/plugins/intel_cpu/src/nodes/softmax.cpp +++ b/src/plugins/intel_cpu/src/nodes/softmax.cpp @@ -6,17 +6,20 @@ #include #include -#include +#include #include #include #include "memory_desc/dnnl_blocked_memory_desc.h" #include using namespace mkldnn; -using namespace ov::intel_cpu; using namespace InferenceEngine; +namespace ov { +namespace intel_cpu { +namespace node { namespace { + struct SoftmaxKey { DnnlMemoryDescCPtr inp0; impl_desc_type implType; @@ -47,9 +50,10 @@ bool SoftmaxKey::operator==(const SoftmaxKey& rhs) const { retVal = retVal && implType == rhs.implType && axis == rhs.axis; return retVal; } + } // namespace -bool MKLDNNSoftMaxNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool SoftMax::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { if (!std::dynamic_pointer_cast(op)) { errorMessage = "Only opset1 Softmax operation is supported"; @@ -61,8 +65,8 @@ bool MKLDNNSoftMaxNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : - MKLDNNNode(op, eng, cache) { +SoftMax::SoftMax(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache) : + Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -70,14 +74,14 @@ MKLDNNSoftMaxNode::MKLDNNSoftMaxNode(const std::shared_ptr& op, co axis = ngraph::as_type_ptr(op)->get_axis(); } -void MKLDNNSoftMaxNode::getSupportedDescriptors() { +void SoftMax::getSupportedDescriptors() { if (descs.size()) return; InferenceEngine::Precision precision = getOriginalInputPrecisionAtPort(0); if (precision != InferenceEngine::Precision::FP32 && precision != InferenceEngine::Precision::BF16) precision = InferenceEngine::Precision::FP32; - auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); + auto inputDataType = DnnlExtensionUtils::IEPrecisionToDataType(precision); if (getParentEdges().size() != 1) IE_THROW() << "Incorrect number of input edges for layer " << getName(); @@ -100,11 +104,11 @@ void MKLDNNSoftMaxNode::getSupportedDescriptors() { } } -bool MKLDNNSoftMaxNode::created() const { - return getType() == Softmax; +bool SoftMax::created() const { + return getType() == Type::Softmax; } -void MKLDNNSoftMaxNode::initOptimalPrimitiveDescriptor() { +void SoftMax::initOptimalPrimitiveDescriptor() { auto selected_pd = getSelectedPrimitiveDescriptor(); if (selected_pd == nullptr) IE_THROW() << "Preferable primitive descriptor is not set."; @@ -124,18 +128,18 @@ void MKLDNNSoftMaxNode::initOptimalPrimitiveDescriptor() { initDescriptor(config); } -void MKLDNNSoftMaxNode::createDescriptor(const std::vector &inputDesc, +void SoftMax::createDescriptor(const std::vector &inputDesc, const std::vector &outputDesc) { auto inpDesc = inputDesc[0]->isDefined() ? inputDesc[0] : MemoryDescUtils::makeDummyDesc(*inputDesc[0]); DnnlMemoryDescPtr definedInpMemDesc = MemoryDescUtils::convertToDnnlMemoryDesc(inpDesc); auto in_candidate = definedInpMemDesc->getDnnlDesc(); - MKLDNNDescriptor desc(std::shared_ptr( + DnnlDesriptor desc(std::shared_ptr( new softmax_forward::desc(prop_kind::forward_scoring, in_candidate, axis))); descs.push_back(desc); } -void MKLDNNSoftMaxNode::prepareParams() { +void SoftMax::prepareParams() { auto inpDesc = getParentEdgeAt(0)->getMemory().GetDescWithType(); const NodeDesc* selected_pd = getSelectedPrimitiveDescriptor(); @@ -146,7 +150,7 @@ void MKLDNNSoftMaxNode::prepareParams() { auto engine = getEngine(); auto builder = [&engine](const SoftmaxKey& key) -> std::shared_ptr { softmax_forward::primitive_desc prim_desc; - MKLDNNDescriptor desc(std::shared_ptr( + DnnlDesriptor desc(std::shared_ptr( new softmax_forward::desc(prop_kind::forward_scoring, key.inp0->getDnnlDesc(), key.axis))); primitive_desc_iterator itpd = desc.createPrimitiveDescriptorIterator(engine); @@ -181,12 +185,14 @@ void MKLDNNSoftMaxNode::prepareParams() { primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_DST, dst}}; } -void MKLDNNSoftMaxNode::executeDynamicImpl(mkldnn::stream strm) { +void SoftMax::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -std::vector MKLDNNSoftMaxNode::shapeInfer() const { +std::vector SoftMax::shapeInfer() const { return {getParentEdgesAtPort(0).front()->getMemory().getStaticDims()}; } -REG_MKLDNN_PRIM_FOR(MKLDNNSoftMaxNode, Softmax); +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/softmax.h b/src/plugins/intel_cpu/src/nodes/softmax.h index 9b7b624b111..5ed8f9abbc3 100644 --- a/src/plugins/intel_cpu/src/nodes/softmax.h +++ b/src/plugins/intel_cpu/src/nodes/softmax.h @@ -12,10 +12,11 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNSoftMaxNode : public MKLDNNNode { +class SoftMax : public Node { public: - MKLDNNSoftMaxNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + SoftMax(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void initOptimalPrimitiveDescriptor() override; void createDescriptor(const std::vector& inputDesc, @@ -33,5 +34,6 @@ private: size_t axis = 0; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/space_to_batch.cpp b/src/plugins/intel_cpu/src/nodes/space_to_batch.cpp index 580eae8e85b..4a90d856932 100644 --- a/src/plugins/intel_cpu/src/nodes/space_to_batch.cpp +++ b/src/plugins/intel_cpu/src/nodes/space_to_batch.cpp @@ -13,10 +13,13 @@ #include #include -using namespace ov::intel_cpu; using namespace InferenceEngine; -bool MKLDNNSpaceToBatchNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +namespace ov { +namespace intel_cpu { +namespace node { + +bool SpaceToBatch::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { const auto spaceToBatch = std::dynamic_pointer_cast(op); if (!spaceToBatch) { @@ -35,8 +38,8 @@ bool MKLDNNSpaceToBatchNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { +SpaceToBatch::SpaceToBatch(const std::shared_ptr& op, const mkldnn::engine& eng, + WeightsSharing::Ptr &cache) : Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -57,7 +60,7 @@ MKLDNNSpaceToBatchNode::MKLDNNSpaceToBatchNode(const std::shared_ptr(op->get_input_node_shared_ptr(2))->cast_vector(); } -void MKLDNNSpaceToBatchNode::initSupportedPrimitiveDescriptors() { +void SpaceToBatch::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -97,8 +100,8 @@ void MKLDNNSpaceToBatchNode::initSupportedPrimitiveDescriptors() { } } -std::vector MKLDNNSpaceToBatchNode::shapeInfer() const { - return MKLDNNNode::shapeInferGeneric(PortMask(1, 2, 3)); +std::vector SpaceToBatch::shapeInfer() const { + return Node::shapeInferGeneric(PortMask(1, 2, 3)); } static std::vector getShape5D(const SizeVector &shape) { @@ -112,7 +115,7 @@ static std::vector getShape5D(const SizeVector &shape) { } template -void MKLDNNSpaceToBatchNode::SpaceToBatchKernel() { +void SpaceToBatch::SpaceToBatchKernel() { const auto *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); auto *dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); @@ -233,11 +236,11 @@ void MKLDNNSpaceToBatchNode::SpaceToBatchKernel() { }); } -void MKLDNNSpaceToBatchNode::executeDynamicImpl(mkldnn::stream strm) { +void SpaceToBatch::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -void MKLDNNSpaceToBatchNode::execute(mkldnn::stream strm) { +void SpaceToBatch::execute(mkldnn::stream strm) { switch (getParentEdgeAt(0)->getMemory().getDesc().getPrecision().size()) { case 1: SpaceToBatchKernel::value_type>(); break; case 2: SpaceToBatchKernel::value_type>(); break; @@ -248,8 +251,10 @@ void MKLDNNSpaceToBatchNode::execute(mkldnn::stream strm) { } } -bool MKLDNNSpaceToBatchNode::created() const { - return getType() == SpaceToBatch; +bool SpaceToBatch::created() const { + return getType() == Type::SpaceToBatch; } -REG_MKLDNN_PRIM_FOR(MKLDNNSpaceToBatchNode, SpaceToBatch) +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/space_to_batch.h b/src/plugins/intel_cpu/src/nodes/space_to_batch.h index 6dc9dc0c471..3087246242f 100644 --- a/src/plugins/intel_cpu/src/nodes/space_to_batch.h +++ b/src/plugins/intel_cpu/src/nodes/space_to_batch.h @@ -12,10 +12,11 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNSpaceToBatchNode : public MKLDNNNode { +class SpaceToBatch : public Node { public: - MKLDNNSpaceToBatchNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + SpaceToBatch(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; @@ -38,5 +39,6 @@ private: void SpaceToBatchKernel(); }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/space_to_depth.cpp b/src/plugins/intel_cpu/src/nodes/space_to_depth.cpp index f23deece0cf..1c892dec0b2 100644 --- a/src/plugins/intel_cpu/src/nodes/space_to_depth.cpp +++ b/src/plugins/intel_cpu/src/nodes/space_to_depth.cpp @@ -4,7 +4,7 @@ #include "space_to_depth.h" -#include +#include #include #include @@ -17,12 +17,15 @@ #define THROW_ERROR IE_THROW() << "SpaceToDepth layer with name '" << getName() << "' " -using namespace ov::intel_cpu; using namespace InferenceEngine; using namespace mkldnn; using namespace mkldnn::impl; -size_t MKLDNNSpaceToDepthNode::SpaceToDepthAttrs::hash() const { +namespace ov { +namespace intel_cpu { +namespace node { + +size_t SpaceToDepth::SpaceToDepthAttrs::hash() const { using namespace dnnl::impl; using namespace dnnl::impl::primitive_hashing; @@ -39,7 +42,7 @@ size_t MKLDNNSpaceToDepthNode::SpaceToDepthAttrs::hash() const { return seed; } -bool MKLDNNSpaceToDepthNode::SpaceToDepthAttrs::operator==(const SpaceToDepthAttrs& rhs) const { +bool SpaceToDepth::SpaceToDepthAttrs::operator==(const SpaceToDepthAttrs& rhs) const { bool result = layoutType == rhs.layoutType && mode == rhs.mode && blockSize == rhs.blockSize && blockStep == rhs.blockStep && dataSize == rhs.dataSize && nSpatialDims == rhs.nSpatialDims && @@ -48,7 +51,7 @@ bool MKLDNNSpaceToDepthNode::SpaceToDepthAttrs::operator==(const SpaceToDepthAtt return result; } -bool MKLDNNSpaceToDepthNode::isSupportedOperation(const std::shared_ptr& op, +bool SpaceToDepth::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { const auto spaceToDepth = ov::as_type_ptr(op); @@ -69,10 +72,10 @@ bool MKLDNNSpaceToDepthNode::isSupportedOperation(const std::shared_ptr& op, +SpaceToDepth::SpaceToDepth(const std::shared_ptr& op, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr& cache) - : MKLDNNNode(op, eng, cache) { + WeightsSharing::Ptr& cache) + : Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -109,9 +112,9 @@ MKLDNNSpaceToDepthNode::MKLDNNSpaceToDepthNode(const std::shared_ptr(std::pow(attrs.blockSize, attrs.nSpatialDims)); } -void MKLDNNSpaceToDepthNode::getSupportedDescriptors() {} +void SpaceToDepth::getSupportedDescriptors() {} -void MKLDNNSpaceToDepthNode::initSupportedPrimitiveDescriptors() { +void SpaceToDepth::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -163,7 +166,7 @@ void MKLDNNSpaceToDepthNode::initSupportedPrimitiveDescriptors() { } } -void MKLDNNSpaceToDepthNode::createPrimitive() { +void SpaceToDepth::createPrimitive() { auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) @@ -188,7 +191,7 @@ void MKLDNNSpaceToDepthNode::createPrimitive() { } } -void MKLDNNSpaceToDepthNode::prepareParams() { +void SpaceToDepth::prepareParams() { attrs.srcBlockedDims = getParentEdgeAt(0)->getMemoryPtr()->GetDescWithType()->getBlockDims(); attrs.destBlockedDims = @@ -206,16 +209,16 @@ void MKLDNNSpaceToDepthNode::prepareParams() { execPtr = result.first; } -MKLDNNSpaceToDepthNode::SpaceToDepthExecutor::SpaceToDepthExecutor(const SpaceToDepthAttrs& attrs) { - if (!ov::intel_cpu::one_of(attrs.layoutType, - LayoutType::nCsp16c, - LayoutType::nCsp8c, - LayoutType::nspc, - LayoutType::ncsp)) +SpaceToDepth::SpaceToDepthExecutor::SpaceToDepthExecutor(const SpaceToDepthAttrs& attrs) { + if (!one_of(attrs.layoutType, + LayoutType::nCsp16c, + LayoutType::nCsp8c, + LayoutType::nspc, + LayoutType::ncsp)) IE_THROW() << "SpaceToDepth executor supports only 'nCsp16c', 'nCsp8c', " "'nspc' or 'ncsp' layouts."; - const bool isBlocked = ov::intel_cpu::one_of(attrs.layoutType, LayoutType::nCsp16c, LayoutType::nCsp8c); + const bool isBlocked = one_of(attrs.layoutType, LayoutType::nCsp16c, LayoutType::nCsp8c); const bool isChannelsFirst = attrs.layoutType == LayoutType::nspc; const auto& srcBlockedDims = attrs.srcBlockedDims; const auto& dstBlockedDims = attrs.destBlockedDims; @@ -303,13 +306,13 @@ MKLDNNSpaceToDepthNode::SpaceToDepthExecutor::SpaceToDepthExecutor(const SpaceTo permuteKernel = std::unique_ptr(new PermuteKernel(params)); } -void MKLDNNSpaceToDepthNode::SpaceToDepthExecutor::exec(const uint8_t* srcData, uint8_t* dstData, const int MB) { +void SpaceToDepth::SpaceToDepthExecutor::exec(const uint8_t* srcData, uint8_t* dstData, const int MB) { if (!permuteKernel) IE_THROW() << "Could not execute. Kernel for Transpose node was not compiled."; permuteKernel->execute(srcData, dstData, MB); } -void MKLDNNSpaceToDepthNode::execute(mkldnn::stream strm) { +void SpaceToDepth::execute(mkldnn::stream strm) { if (!execPtr) { THROW_ERROR << "doesn't have a compiled executor."; } @@ -319,11 +322,14 @@ void MKLDNNSpaceToDepthNode::execute(mkldnn::stream strm) { execPtr->exec(srcData, dstData, MB); } -void MKLDNNSpaceToDepthNode::executeDynamicImpl(mkldnn::stream strm) { +void SpaceToDepth::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -bool MKLDNNSpaceToDepthNode::created() const { - return getType() == SpaceToDepth; +bool SpaceToDepth::created() const { + return getType() == Type::SpaceToDepth; } -REG_MKLDNN_PRIM_FOR(MKLDNNSpaceToDepthNode, SpaceToDepth); \ No newline at end of file + +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/space_to_depth.h b/src/plugins/intel_cpu/src/nodes/space_to_depth.h index 1849af39c19..55550c2e18b 100644 --- a/src/plugins/intel_cpu/src/nodes/space_to_depth.h +++ b/src/plugins/intel_cpu/src/nodes/space_to_depth.h @@ -11,10 +11,11 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNSpaceToDepthNode : public MKLDNNNode { +class SpaceToDepth : public Node { public: - MKLDNNSpaceToDepthNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + SpaceToDepth(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; void getSupportedDescriptors() override; @@ -58,5 +59,6 @@ private: executorPtr execPtr = nullptr; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/split.cpp b/src/plugins/intel_cpu/src/nodes/split.cpp index 88f1b453ae2..5086a36cec0 100644 --- a/src/plugins/intel_cpu/src/nodes/split.cpp +++ b/src/plugins/intel_cpu/src/nodes/split.cpp @@ -7,7 +7,7 @@ #include "common/blocked_desc_creator.h" #include #include -#include +#include #include #include "utils/general_utils.h" #include @@ -16,12 +16,15 @@ #define THROW_ERROR IE_THROW() << "Split layer with name '" << getName() <<"' " using namespace mkldnn; -using namespace ov::intel_cpu; using namespace InferenceEngine; -bool MKLDNNSplitNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +namespace ov { +namespace intel_cpu { +namespace node { + +bool Split::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { - if (!ov::intel_cpu::one_of(op->get_type_info(), ngraph::op::v1::Split::get_type_info_static(), ngraph::op::v1::VariadicSplit::get_type_info_static())) { + if (!one_of(op->get_type_info(), ngraph::op::v1::Split::get_type_info_static(), ngraph::op::v1::VariadicSplit::get_type_info_static())) { errorMessage = "Only opset1 Split and VariadicSplit operations are supported"; return false; } @@ -43,8 +46,8 @@ bool MKLDNNSplitNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : - MKLDNNNode(op, eng, cache) { +Split::Split(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache) : + Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -68,10 +71,10 @@ MKLDNNSplitNode::MKLDNNSplitNode(const std::shared_ptr& op, const this->axis = axis; } -void MKLDNNSplitNode::getSupportedDescriptors() { +void Split::getSupportedDescriptors() { } -void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() { +void Split::initSupportedPrimitiveDescriptors() { constexpr size_t channelsPos = 1lu; if (!supportedPrimitiveDescriptors.empty()) @@ -234,18 +237,18 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() { } } -bool MKLDNNSplitNode::needPrepareParams() const { +bool Split::needPrepareParams() const { if (isOptimized()) { return false; } - return MKLDNNNode::inputShapesModified(); + return Node::inputShapesModified(); } -std::vector MKLDNNSplitNode::shapeInfer() const { - return MKLDNNNode::shapeInferGeneric(PortMask(1, 2)); +std::vector Split::shapeInfer() const { + return Node::shapeInferGeneric(PortMask(1, 2)); } -void MKLDNNSplitNode::prepareParams() { +void Split::prepareParams() { const auto &srcMemPtr = getParentEdgesAtPort(0)[0]->getMemoryPtr(); if (!srcMemPtr || !srcMemPtr->isAllocated()) { THROW_ERROR << "has not allocated input memory"; @@ -280,11 +283,11 @@ void MKLDNNSplitNode::prepareParams() { } } -bool MKLDNNSplitNode::isExecutable() const { +bool Split::isExecutable() const { return !isInputTensorAtPortEmpty(0) && !isOptimized(); } -void MKLDNNSplitNode::execute(mkldnn::stream strm) { +void Split::execute(mkldnn::stream strm) { if (isOptimized()) { return; } @@ -306,22 +309,22 @@ void MKLDNNSplitNode::execute(mkldnn::stream strm) { execPtr->exec(srcData, dstMemPtrs, batch, MB); } -bool MKLDNNSplitNode::created() const { - return getType() == Split; +bool Split::created() const { + return getType() == Type::Split; } -bool MKLDNNSplitNode::isOptimized() const { +bool Split::isOptimized() const { return getSelectedPrimitiveDescriptor() && getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].inPlace() >= 0; } -void MKLDNNSplitNode::initOptimalPrimitiveDescriptor() { +void Split::initOptimalPrimitiveDescriptor() { auto selected_pd = getSelectedPrimitiveDescriptor(); if (selected_pd == nullptr) THROW_ERROR << "Preferable primitive descriptor is not set."; auto config = selected_pd->getConfig(); if (!isOptimized()) { - MKLDNNNode::initOptimalPrimitiveDescriptor(); + Node::initOptimalPrimitiveDescriptor(); } else if (!isDynamicNode() && !isConfigDefined(config)) { for (size_t i = 0; i < config.inConfs.size(); i++) { int num = getParentEdgeAt(i)->getInputNum(); @@ -381,7 +384,7 @@ void MKLDNNSplitNode::initOptimalPrimitiveDescriptor() { } } -void MKLDNNSplitNode::selectOptimalPrimitiveDescriptor() { +void Split::selectOptimalPrimitiveDescriptor() { // Enforce the reference implementation for the planar layout if the implementation is in the impl priorities list. // This is needed mostly for the testing purposes, since for the planar layout Split works always in place, we need to enforce // the reference implementation when it is selected in a test to test that piece of code. @@ -477,14 +480,14 @@ void MKLDNNSplitNode::selectOptimalPrimitiveDescriptor() { selectPrimitiveDescriptorByIndex(0); } -void MKLDNNSplitNode::setDynamicBatchLim(int lim) { +void Split::setDynamicBatchLim(int lim) { if (axis == 0) THROW_ERROR << "Dynamic batch is not supported by split layer with axis == 0 parameter"; dynBatchLim = lim; } -void MKLDNNSplitNode::optimizedNspc2Ncsp(size_t MB) { +void Split::optimizedNspc2Ncsp(size_t MB) { auto parentEdge = getParentEdgeAt(0); const int rank = parentEdge->getMemory().GetShape().getRank(); const auto parentDims = parentEdge->getMemory().getStaticDims(); @@ -530,7 +533,7 @@ void MKLDNNSplitNode::optimizedNspc2Ncsp(size_t MB) { } } -MKLDNNSplitNode::SplitOptimizedExecutor::SplitOptimizedExecutor(BlockedMemoryDescCPtr inDesc, const std::vector &outDescs, +Split::SplitOptimizedExecutor::SplitOptimizedExecutor(BlockedMemoryDescCPtr inDesc, const std::vector &outDescs, const size_t axis) { // find axis order position const auto& order = inDesc->getOrder(); @@ -573,7 +576,7 @@ MKLDNNSplitNode::SplitOptimizedExecutor::SplitOptimizedExecutor(BlockedMemoryDes } } -void MKLDNNSplitNode::SplitOptimizedExecutor::exec(const uint8_t* srcData, const std::vector> &dstMemPtrs, +void Split::SplitOptimizedExecutor::exec(const uint8_t* srcData, const std::vector> &dstMemPtrs, const Dim origBatch, const Dim perInferBatch) { size_t execCountStrides = countStrides; if (origBatch != perInferBatch) @@ -588,4 +591,6 @@ void MKLDNNSplitNode::SplitOptimizedExecutor::exec(const uint8_t* srcData, const }); } -REG_MKLDNN_PRIM_FOR(MKLDNNSplitNode, Split); +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/split.h b/src/plugins/intel_cpu/src/nodes/split.h index abc5fd9b596..cb4f09bfd4c 100644 --- a/src/plugins/intel_cpu/src/nodes/split.h +++ b/src/plugins/intel_cpu/src/nodes/split.h @@ -10,10 +10,11 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNSplitNode : public MKLDNNNode { +class Split : public Node { public: - MKLDNNSplitNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + Split(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; void getSupportedDescriptors() override; @@ -64,5 +65,6 @@ private: size_t INPUTS_NUM = 2; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/strided_slice.cpp b/src/plugins/intel_cpu/src/nodes/strided_slice.cpp index 35ab631cda4..4c60521dced 100644 --- a/src/plugins/intel_cpu/src/nodes/strided_slice.cpp +++ b/src/plugins/intel_cpu/src/nodes/strided_slice.cpp @@ -14,10 +14,13 @@ #define THROW_ERROR IE_THROW() << NameFromType(getType()) << " node with name '" << getName() << "' " using namespace mkldnn; -using namespace ov::intel_cpu; using namespace InferenceEngine; using namespace InferenceEngine::details; +namespace ov { +namespace intel_cpu { +namespace node { + static inline size_t parallel_init(size_t start, size_t nDims, const VectorDims& dims, VectorDims& indexes) { for (int j = nDims - 1; j >= 0; j--) { indexes[j] = start % dims[j]; @@ -26,7 +29,7 @@ static inline size_t parallel_init(size_t start, size_t nDims, const VectorDims& return start; } -bool MKLDNNStridedSliceNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool StridedSlice::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { if (!ov::is_type(op) && !ov::is_type(op)) { @@ -48,8 +51,8 @@ bool MKLDNNStridedSliceNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : - MKLDNNNode(op, eng, cache) { +StridedSlice::StridedSlice(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache) : + Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -142,7 +145,7 @@ MKLDNNStridedSliceNode::MKLDNNStridedSliceNode(const std::shared_ptr& } } -void MKLDNNStridedSliceNode::getSupportedDescriptors() { +void StridedSlice::getSupportedDescriptors() { const size_t inputRank = getInputShapeAtPort(DATA_ID).getRank(); const size_t outputRank = getOutputShapeAtPort(0).getRank(); const size_t nDims = std::max(inputRank, outputRank); @@ -165,9 +168,9 @@ void MKLDNNStridedSliceNode::getSupportedDescriptors() { } auto fillingInParameters = [&](std::vector ¶meter, const size_t type, const size_t size, const int value) { - const auto constNode = std::dynamic_pointer_cast(getParentEdgesAtPort(type)[0]->getParent()); + const auto constNode = std::dynamic_pointer_cast(getParentEdgesAtPort(type)[0]->getParent()); if (!constNode) { - THROW_ERROR << "can't cast node on " << type << " port to MKLDNNInputNode"; + THROW_ERROR << "can't cast node on " << type << " port to Input"; } auto blob = constNode->getMemoryPtr(); if (blob->GetDataType() != mkldnn::memory::data_type::s32) @@ -211,7 +214,7 @@ void MKLDNNStridedSliceNode::getSupportedDescriptors() { } -void MKLDNNStridedSliceNode::addHiddenDims(const size_t nSrcDims, int ellipsisPos1) { +void StridedSlice::addHiddenDims(const size_t nSrcDims, int ellipsisPos1) { // all masks and input parameters are for planar layouts. So if we use blocked or per channel layout and // there is ellipsis should to add default values in hidden dimensions to know real order of mask or parameter values size_t afterDims = attrs.ellipsisMask.size() - ellipsisPos1 - 1; @@ -238,7 +241,7 @@ void MKLDNNStridedSliceNode::addHiddenDims(const size_t nSrcDims, int ellipsisPo addHiddenDims(attrs.shrinkAxisMask); } -void MKLDNNStridedSliceNode::initSupportedPrimitiveDescriptors() { +void StridedSlice::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -302,11 +305,11 @@ void MKLDNNStridedSliceNode::initSupportedPrimitiveDescriptors() { } } -bool MKLDNNStridedSliceNode::isExecutable() const { +bool StridedSlice::isExecutable() const { return !isInputTensorAtPortEmpty(0); } -void MKLDNNStridedSliceNode::createPrimitive() { +void StridedSlice::createPrimitive() { if (!isExecutable()) { return; } @@ -328,7 +331,7 @@ void MKLDNNStridedSliceNode::createPrimitive() { } } -void MKLDNNStridedSliceNode::orderParametersByLayouts(const MKLDNNMemoryPtr& srcMemPtr) { +void StridedSlice::orderParametersByLayouts(const MemoryPtr& srcMemPtr) { size_t blk = 1; bool isBlockedLayout = false; if (srcMemPtr->getDesc().hasLayoutType(LayoutType::nCsp16c)) { @@ -373,13 +376,13 @@ void MKLDNNStridedSliceNode::orderParametersByLayouts(const MKLDNNMemoryPtr& src } } -void MKLDNNStridedSliceNode::prepareParams() { +void StridedSlice::prepareParams() { execPtr = std::make_shared(attrs, getParentEdgeAt(0)->getMemoryPtr()->GetDescWithType()->getBlockDims(), getChildEdgeAt(0)->getMemoryPtr()->GetDescWithType()->getBlockDims()); } -MKLDNNStridedSliceNode::StridedSliceExecutor::StridedSliceExecutor(const StridedSliceAttributes& attrs, +StridedSlice::StridedSliceExecutor::StridedSliceExecutor(const StridedSliceAttributes& attrs, const VectorDims& srcBlockedDims, const VectorDims& dstBlockedDims) { StridedSliceParams params; @@ -393,7 +396,7 @@ MKLDNNStridedSliceNode::StridedSliceExecutor::StridedSliceExecutor(const Strided indicesCalculation(params); } -void MKLDNNStridedSliceNode::StridedSliceExecutor::dimsNormalization(StridedSliceParams& params) { +void StridedSlice::StridedSliceExecutor::dimsNormalization(StridedSliceParams& params) { // creating new src and dst dimensions and parameters of the same size using masks // // example 1: before srcDims = [5, 6, 8, 3, 2], begin = [1, 0], end = [4, 0], stride = [1, 1] @@ -502,7 +505,7 @@ void MKLDNNStridedSliceNode::StridedSliceExecutor::dimsNormalization(StridedSlic } } -void MKLDNNStridedSliceNode::StridedSliceExecutor::dimsGluing(StridedSliceParams& params, const size_t realNDims) { +void StridedSlice::StridedSliceExecutor::dimsGluing(StridedSliceParams& params, const size_t realNDims) { // gluing of dimensions if there aren't begin, end and stride != 1 on this axis // example: before gluing srcDims = [5, 6, 8, 3, 2], begin = [1, 0, 0, 0, 0], stride = [1, 1, 2, 1, 1], dstDims = [4, 6, 4, 3, 2] // after gluing srcDims = [30, 8, 6], begin = [6, 0, 0], stride = [1, 2, 1], dstDims = [24, 4, 6] @@ -596,7 +599,7 @@ void MKLDNNStridedSliceNode::StridedSliceExecutor::dimsGluing(StridedSliceParams } } -void MKLDNNStridedSliceNode::StridedSliceExecutor::indicesCalculation(const StridedSliceParams& params) { +void StridedSlice::StridedSliceExecutor::indicesCalculation(const StridedSliceParams& params) { // indices calculation before execution for the best performance srcIndices.resize(workAmount, 0); dstIndices.resize(workAmount, 0); @@ -646,7 +649,7 @@ void MKLDNNStridedSliceNode::StridedSliceExecutor::indicesCalculation(const Stri }); } -void MKLDNNStridedSliceNode::StridedSliceExecutor::indicesCalculationForOptimized(const StridedSliceParams& params) { +void StridedSlice::StridedSliceExecutor::indicesCalculationForOptimized(const StridedSliceParams& params) { const size_t dstIdx0 = params.dstStrides[0] * params.attrs.dataSize; const size_t dstIdx1 = params.dstStrides[1] * params.attrs.dataSize; const size_t srcIdx0 = params.attrs.stride[0] * params.srcStrides[0] * params.attrs.dataSize; @@ -665,7 +668,7 @@ void MKLDNNStridedSliceNode::StridedSliceExecutor::indicesCalculationForOptimize } } -void MKLDNNStridedSliceNode::StridedSliceExecutor::exec(const uint8_t* srcData, uint8_t* dstData) { +void StridedSlice::StridedSliceExecutor::exec(const uint8_t* srcData, uint8_t* dstData) { const uint8_t* srcShiftedData = srcData + srcShift; parallel_nt(nThreads, [&](const int ithr, const int nthr) { size_t start = 0, end = 0; @@ -676,7 +679,7 @@ void MKLDNNStridedSliceNode::StridedSliceExecutor::exec(const uint8_t* srcData, }); } -void MKLDNNStridedSliceNode::execute(mkldnn::stream strm) { +void StridedSlice::execute(mkldnn::stream strm) { if (!execPtr) THROW_ERROR << "doesn't have compiled executor!"; const uint8_t* srcData = reinterpret_cast(getParentEdgeAt(0)->getMemory().GetPtr()); @@ -684,12 +687,14 @@ void MKLDNNStridedSliceNode::execute(mkldnn::stream strm) { execPtr->exec(srcData, dstData); } -void MKLDNNStridedSliceNode::executeDynamicImpl(mkldnn::stream strm) { +void StridedSlice::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -bool MKLDNNStridedSliceNode::created() const { - return getType() == StridedSlice; +bool StridedSlice::created() const { + return getType() == Type::StridedSlice; } -REG_MKLDNN_PRIM_FOR(MKLDNNStridedSliceNode, StridedSlice); +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/strided_slice.h b/src/plugins/intel_cpu/src/nodes/strided_slice.h index ea4d4815f16..0cdc0d0187b 100644 --- a/src/plugins/intel_cpu/src/nodes/strided_slice.h +++ b/src/plugins/intel_cpu/src/nodes/strided_slice.h @@ -10,10 +10,11 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNStridedSliceNode : public MKLDNNNode { +class StridedSlice : public Node { public: - MKLDNNStridedSliceNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + StridedSlice(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; void getSupportedDescriptors() override; @@ -33,7 +34,7 @@ protected: private: void addHiddenDims(const size_t nSrcDims, int ellipsisPos1); - void orderParametersByLayouts(const MKLDNNMemoryPtr& srcMemPtr); + void orderParametersByLayouts(const MemoryPtr& srcMemPtr); struct StridedSliceAttributes { std::vector begin; @@ -100,5 +101,6 @@ private: bool isConstantInput[AXES_ID + 1] = {false}; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/subgraph.cpp b/src/plugins/intel_cpu/src/nodes/subgraph.cpp index b05dcd18ef7..ffed92b3511 100644 --- a/src/plugins/intel_cpu/src/nodes/subgraph.cpp +++ b/src/plugins/intel_cpu/src/nodes/subgraph.cpp @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include @@ -24,15 +24,18 @@ #include #include "emitters/cpu_generator.hpp" -using namespace ov::intel_cpu; using namespace InferenceEngine; using namespace mkldnn::impl::utils; using namespace mkldnn::impl::cpu; using namespace mkldnn::impl::cpu::x64; using namespace Xbyak; -MKLDNNSnippetNode::MKLDNNSnippetNode(const std::shared_ptr& op, const dnnl::engine& eng, MKLDNNWeightsSharing::Ptr &cache) - : MKLDNNNode(op, eng, cache) { +namespace ov { +namespace intel_cpu { +namespace node { + +Snippet::Snippet(const std::shared_ptr& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache) + : Node(op, eng, cache) { host_isa = dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_common) ? dnnl::impl::cpu::x64::avx512_common : dnnl::impl::cpu::x64::avx2; @@ -54,7 +57,7 @@ MKLDNNSnippetNode::MKLDNNSnippetNode(const std::shared_ptr& op, co } } -void MKLDNNSnippetNode::initSupportedPrimitiveDescriptors() { +void Snippet::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -162,11 +165,11 @@ void MKLDNNSnippetNode::initSupportedPrimitiveDescriptors() { supportedPrimitiveDescriptors.emplace_back(initDesc(Planar)); } -void MKLDNNSnippetNode::selectOptimalPrimitiveDescriptor() { +void Snippet::selectOptimalPrimitiveDescriptor() { selectPreferPrimitiveDescriptor(getPrimitivesPriority(), true); } -void MKLDNNSnippetNode::createPrimitive() { +void Snippet::createPrimitive() { // schedule definition part // it defines offsets, strides and sizes for snippet kernel scheduling define_schedule(); @@ -179,9 +182,9 @@ void MKLDNNSnippetNode::createPrimitive() { generate(); } -void MKLDNNSnippetNode::execute(dnnl::stream strm) { +void Snippet::execute(dnnl::stream strm) { if (schedule.ptr == nullptr || !canUseOptimizedImpl) { - IE_THROW() << "MKLDNNSnippetNode can't use Optimized implementation and can't fallback to reference"; + IE_THROW() << "Snippet can't use Optimized implementation and can't fallback to reference"; } jit_snippets_call_args call_args; for (size_t i = 0; i < srcMemPtrs.size(); i++) @@ -197,12 +200,12 @@ void MKLDNNSnippetNode::execute(dnnl::stream strm) { } } -bool MKLDNNSnippetNode::created() const { - return getType() == Subgraph; +bool Snippet::created() const { + return getType() == Type::Subgraph; } -bool MKLDNNSnippetNode::canBeInPlace() const { - if (getParentEdgesAtPort(0)[0]->getParent()->getType() == Input) { +bool Snippet::canBeInPlace() const { + if (getParentEdgesAtPort(0)[0]->getParent()->getType() == Type::Input) { return false; } @@ -212,7 +215,7 @@ bool MKLDNNSnippetNode::canBeInPlace() const { return false; // WA to prevent memory corruption caused by inplace feature - if (parent->getType() == Concatenation) { + if (parent->getType() == Type::Concatenation) { for (auto& parentParentEdge : parent->getParentEdges()) { auto parentParent = parentParentEdge.lock()->getParent(); if (parentParent->getChildEdges().size() != 1) @@ -247,8 +250,8 @@ static auto collapseLastDims(std::vector& dims, size_t dimsToCollapse) - } } -void MKLDNNSnippetNode::define_schedule() { - auto edgeToBlockedShape = [](const MKLDNNEdgePtr& edge) { +void Snippet::define_schedule() { + auto edgeToBlockedShape = [](const EdgePtr& edge) { const auto blockedDesc = edge->getMemory().GetDescWithType(); ngraph::Shape shape(blockedDesc->getBlockDims()); ngraph::AxisVector blocking(blockedDesc->getOrder()); @@ -413,7 +416,7 @@ void MKLDNNSnippetNode::define_schedule() { initSchedulingInfo(); } -void MKLDNNSnippetNode::generate() { +void Snippet::generate() { jit_snippets_compile_args jcp; jcp.output_dims = exec_domain; std::copy(sch_dims.begin(), sch_dims.end(), jcp.scheduler_dims); @@ -435,7 +438,7 @@ void MKLDNNSnippetNode::generate() { schedule = snippet->generate(reinterpret_cast(&jcp)); } -void MKLDNNSnippetNode::schedule_6d(const jit_snippets_call_args& call_args) const { +void Snippet::schedule_6d(const jit_snippets_call_args& call_args) const { const auto& dom = exec_domain; // < N, C, H, W > < 1, 1, N, C*H*W> parallel_for5d(dom[0], dom[1], dom[2], dom[3], dom[4], @@ -445,7 +448,7 @@ void MKLDNNSnippetNode::schedule_6d(const jit_snippets_call_args& call_args) con }); } -void MKLDNNSnippetNode::schedule_nt(const jit_snippets_call_args& call_args) const { +void Snippet::schedule_nt(const jit_snippets_call_args& call_args) const { const auto& work_size = exec_domain; parallel_nt(0, [&](const int ithr, const int nthr) { size_t start = 0, end = 0; @@ -464,5 +467,6 @@ void MKLDNNSnippetNode::schedule_nt(const jit_snippets_call_args& call_args) con }); } -REG_MKLDNN_PRIM_FOR(MKLDNNSnippetNode, Subgraph); - +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/subgraph.h b/src/plugins/intel_cpu/src/nodes/subgraph.h index 03fea84008d..b2a2cc20fc0 100644 --- a/src/plugins/intel_cpu/src/nodes/subgraph.h +++ b/src/plugins/intel_cpu/src/nodes/subgraph.h @@ -17,14 +17,15 @@ namespace ov { namespace intel_cpu { +namespace node { -/// MKLDNNSnippetNode represents subgraph node in MKLDNN plugin +/// Snippet represents subgraph node in CPU plugin /// potentially, snippet can be placed as a postop to any support operation while it doesn't support postops itself /// precision: fp32 -class MKLDNNSnippetNode : public MKLDNNNode { +class Snippet : public Node { public: - MKLDNNSnippetNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); - ~MKLDNNSnippetNode() override = default; + Snippet(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); + ~Snippet() override = default; void getSupportedDescriptors() override {}; void initSupportedPrimitiveDescriptors() override; @@ -73,8 +74,8 @@ private: size_t schedulerWorkAmount = 0; const size_t maxTileRank = 2; - std::vector srcMemPtrs = {}; - std::vector dstMemPtrs = {}; + std::vector srcMemPtrs = {}; + std::vector dstMemPtrs = {}; std::vector> dims_in = {}; std::vector> offsets_in = {}; @@ -90,5 +91,6 @@ private: bool canUseOptimizedImpl = true; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp b/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp index 05ee35a4e6b..f02f82524be 100644 --- a/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp +++ b/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp @@ -6,7 +6,7 @@ #include #include -#include +#include #include #include #include "common/blocked_desc_creator.h" @@ -15,12 +15,12 @@ #include "common/cpu_memcpy.h" using namespace mkldnn; -using namespace ov::intel_cpu; using namespace InferenceEngine; using namespace InferenceEngine::details; namespace ov { namespace intel_cpu { +namespace node { #define THROW_ERROR IE_THROW() << getTypeStr() << " layer with name '" << getName() << "' " @@ -53,7 +53,7 @@ static NodeConfig make_plain_config(const std::shared_ptr& op) { return config; } -static void redefineToMemories(const std::vector& to_mems, MemoryDescPtr new_desc) { +static void redefineToMemories(const std::vector& to_mems, MemoryDescPtr new_desc) { const auto &currDesc = to_mems.front()->getDesc(); if (currDesc.getShape().isDynamic() || currDesc.getShape().getStaticDims() != new_desc->getShape().getStaticDims()) { // TODO : check the entire dstMemPtrs usage considering the proper memory sharing @@ -64,8 +64,8 @@ static void redefineToMemories(const std::vector& to_mems, Memo } // this method get all memory ptrs of childs of one port to redefine descs for them -static std::vector getToMemories(const MKLDNNNode* node, const size_t port) { - std::vector memories; +static std::vector getToMemories(const Node* node, const size_t port) { + std::vector memories; for (auto& edge : node->getChildEdgesAtPort(port)) memories.push_back(edge->getMemoryPtr()); return memories; @@ -79,7 +79,7 @@ static void nullifyUndefinedDims(VectorDims& dims) { class PortIteratorHelper : public PortMapHelper { public: - PortIteratorHelper(const MKLDNNMemoryPtr &from, const MKLDNNMemoryPtr &to, bool sliced_src, + PortIteratorHelper(const MemoryPtr &from, const MemoryPtr &to, bool sliced_src, const PortMap &slice_rule, const mkldnn::engine& eng) : sliced_src(sliced_src) { const auto &full_blob = sliced_src ? from : to; @@ -108,7 +108,7 @@ public: const auto full_mem_handler = full_mem.get_data_handle(); mkldnn::memory chunk_mem = {chunk_desc, eng, full_mem_handler}; - auto elem_size = MKLDNNExtensionUtils::sizeOfDataType(mkldnn::memory::data_type(chunk_desc.data.data_type)); + auto elem_size = DnnlExtensionUtils::sizeOfDataType(mkldnn::memory::data_type(chunk_desc.data.data_type)); chunk_stride_in_byte = chunk_desc.data.format_desc.blocking.strides[axis] * elem_size * abs_stride; chunk_offset_in_byte = sign_of_stride < 0 ? (iter_count - 1) * chunk_stride_in_byte : 0; @@ -146,7 +146,7 @@ private: class BackEdgePortHelper : public PortMapHelper { public: - BackEdgePortHelper(const MKLDNNMemoryPtr &from, const MKLDNNMemoryPtr &to, const mkldnn::engine& eng) { + BackEdgePortHelper(const MemoryPtr &from, const MemoryPtr &to, const mkldnn::engine& eng) { mem_holder_src = from->GetPrimitive(); mem_holder_dst = to->GetPrimitive(); reorder = {mem_holder_src, mem_holder_dst}; @@ -161,7 +161,7 @@ public: class IterCountPortHelper : public PortMapHelper { public: - IterCountPortHelper(const MKLDNNMemoryPtr &to, const mkldnn::engine& eng) { + IterCountPortHelper(const MemoryPtr &to, const mkldnn::engine& eng) { // Only scalar I32 tensor is supported IE_ASSERT(to->GetDataType() == memory::data_type::s32); IE_ASSERT(to->GetShape() == Shape(VectorDims{1})); @@ -180,7 +180,7 @@ public: class asBoolCheck : public PortChecker { public: - asBoolCheck(const MKLDNNMemoryPtr &mem) { + asBoolCheck(const MemoryPtr &mem) { IE_ASSERT(mem->GetDataType() == memory::data_type::u8); IE_ASSERT(mem->GetShape() == Shape(InferenceEngine::SizeVector{1})); mem_holder = mem->GetPrimitive(); @@ -197,7 +197,7 @@ public: class asIntCheck : public PortChecker { public: - asIntCheck(const MKLDNNMemoryPtr &mem) { + asIntCheck(const MemoryPtr &mem) { IE_ASSERT(mem->GetDataType() == memory::data_type::s32); IE_ASSERT(mem->GetShape() == Shape(InferenceEngine::SizeVector{1})); mem_holder = mem->GetPrimitive(); @@ -223,9 +223,9 @@ private: int value; }; -DynamicBuffer::DynamicBuffer(const MKLDNNMemoryPtr &from_, const std::vector &to_, +DynamicBuffer::DynamicBuffer(const MemoryPtr &from_, const std::vector &to_, const PortMap &map_rule_) : from(from_), to(to_), map_rule(map_rule_) { - elem_size = MKLDNNExtensionUtils::sizeOfDataType(from->GetDataType()); + elem_size = DnnlExtensionUtils::sizeOfDataType(from->GetDataType()); } void DynamicBuffer::execute(const mkldnn::engine& eng, const int iter) { @@ -274,7 +274,7 @@ std::shared_ptr DynamicBuffer::create_buffer(const mkldnn::engin " is expected, but actual: " << from->getStaticDims()[axis]; dims[axis] += abs_stride; - mkldnn::memory::desc new_buffer_desc(dims, old_desc.data_type(), MKLDNNExtensionUtils::GetPlainFormatByRank(dims.size())); + mkldnn::memory::desc new_buffer_desc(dims, old_desc.data_type(), DnnlExtensionUtils::GetPlainFormatByRank(dims.size())); if (stride > 0.0f) { chunk_offset_in_byte += new_buffer_desc.data.format_desc.blocking.strides[axis] * elem_size * abs_stride; @@ -304,10 +304,10 @@ void DynamicBuffer::move_data() { src_stride, dst_stride, count, src_stride); } -void DynamicBuffer::transfer(const MKLDNNNode* node) { +void DynamicBuffer::transfer(const Node* node) { if (mem_holder_buffer) { const auto desc = node->getBaseMemDescAtOutputPort(map_rule.from)->cloneWithNewDims( - MKLDNNExtensionUtils::convertToVectorDims(mem_holder_buffer->get_desc().dims())); + DnnlExtensionUtils::convertToVectorDims(mem_holder_buffer->get_desc().dims())); redefineToMemories(to, desc); copy(get_ptr(*mem_holder_buffer.get()), reinterpret_cast(to.front()->GetPtr()), 0, 0, 1, to.front()->GetSize()); @@ -335,10 +335,7 @@ uint8_t* DynamicBuffer::get_ptr(mkldnn::memory& prim) { return ptr + wrapper.offset0() * wrapper.data_type_size(); } -} // namespace intel_cpu -} // namespace ov - -bool MKLDNNTensorIteratorNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool TensorIterator::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { if (!one_of(op->get_type_info(), ov::op::v0::TensorIterator::get_type_info_static(), @@ -352,15 +349,15 @@ bool MKLDNNTensorIteratorNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : - MKLDNNNode(op, eng, cache), ngraphOp(op) { +TensorIterator::TensorIterator(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache) : + Node(op, eng, cache), ngraphOp(op) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; } } -void MKLDNNTensorIteratorNode::getSupportedDescriptors() { +void TensorIterator::getSupportedDescriptors() { auto tiOp = ov::as_type_ptr(ngraphOp); if (!tiOp) { THROW_ERROR << "cannot be cast to ov::op::util::SubGraphOp"; @@ -439,7 +436,7 @@ void MKLDNNTensorIteratorNode::getSupportedDescriptors() { } if (auto loopOp = ov::as_type_ptr(ngraphOp)) { - algorithm = TensorIteratorLoop; + algorithm = Algorithm::TensorIteratorLoop; auto spec_port = loopOp->get_special_body_ports(); if (spec_port.current_iteration_input_idx != -1) { loopBodyCurrentIterationIdx.push_back(spec_port.current_iteration_input_idx); @@ -450,20 +447,20 @@ void MKLDNNTensorIteratorNode::getSupportedDescriptors() { loopTripCountIdx = 0; loopExecutionConditionIdx = 1; } else if (auto ti = ov::as_type_ptr(ngraphOp)) { - algorithm = TensorIteratorCommon; + algorithm = Algorithm::TensorIteratorCommon; } else { THROW_ERROR << "isn't supported!"; } } -void MKLDNNTensorIteratorNode::initSupportedPrimitiveDescriptors() { +void TensorIterator::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; supportedPrimitiveDescriptors.emplace_back(make_plain_config(ngraphOp), impl_desc_type::unknown); } -void MKLDNNTensorIteratorNode::createPrimitive() { +void TensorIterator::createPrimitive() { if (loopBodyConditionOutputIdx == -1) continue_cond_check.reset(new staticValueCheck(true)); // always true if (loopExecutionConditionIdx == -1) { @@ -474,21 +471,21 @@ void MKLDNNTensorIteratorNode::createPrimitive() { if (isDynamicNode()) prepareDynamicBuffers(); - MKLDNNNode::createPrimitive(); + Node::createPrimitive(); } -bool MKLDNNTensorIteratorNode::needPrepareParams() const { - if (getAlgorithm() == TensorIteratorLoop) { +bool TensorIterator::needPrepareParams() const { + if (getAlgorithm() == Algorithm::TensorIteratorLoop) { const auto tripCountPtr = reinterpret_cast(getParentEdgesAtPort(loopTripCountIdx).front()->getMemoryPtr()->GetPtr()); const auto condPtr = reinterpret_cast(getParentEdgesAtPort(loopExecutionConditionIdx).front()->getMemoryPtr()->GetPtr()); if (tripCountPtr[0] != lastUsedTripCount || condPtr[0] != lastUsedCond) return true; } - return MKLDNNNode::needPrepareParams(); + return Node::needPrepareParams(); } -void MKLDNNTensorIteratorNode::prepareParams() { +void TensorIterator::prepareParams() { prepareTripCount(); prepareInitialCond(); @@ -510,7 +507,7 @@ void MKLDNNTensorIteratorNode::prepareParams() { } } -void MKLDNNTensorIteratorNode::execute(mkldnn::stream strm) { +void TensorIterator::execute(mkldnn::stream strm) { sub_graph.ResetInferCount(); bool continue_cond = initial_cond_check->getStatus(); @@ -539,7 +536,7 @@ void MKLDNNTensorIteratorNode::execute(mkldnn::stream strm) { mapper->execute(strm); } -void MKLDNNTensorIteratorNode::executeDynamicImpl(mkldnn::stream strm) { +void TensorIterator::executeDynamicImpl(mkldnn::stream strm) { const auto &eng = getEngine(); sub_graph.ResetInferCount(); @@ -574,7 +571,7 @@ void MKLDNNTensorIteratorNode::executeDynamicImpl(mkldnn::stream strm) { /* *==============* Prepare reorders, edges between body and TI *==============* */ -void MKLDNNTensorIteratorNode::prepareInputPorts() { +void TensorIterator::prepareInputPorts() { const auto &eng = getEngine(); for (auto map_rule : inputPortMap) { auto &from_mem = getParentEdgesAtPort(map_rule.from)[0]->getMemoryPtr(); @@ -588,7 +585,7 @@ void MKLDNNTensorIteratorNode::prepareInputPorts() { } } -void MKLDNNTensorIteratorNode::prepareOutputPorts() { +void TensorIterator::prepareOutputPorts() { const auto &eng = getEngine(); for (auto map_rule : outputPortMap) { auto &to_mem = getChildEdgesAtPort(map_rule.from)[0]->getMemoryPtr(); @@ -601,7 +598,7 @@ void MKLDNNTensorIteratorNode::prepareOutputPorts() { } } -void MKLDNNTensorIteratorNode::prepareBackEdges() { +void TensorIterator::prepareBackEdges() { const auto &eng = getEngine(); for (auto map_rule : backEdges) { auto from_mem = output_mem[map_rule.from]; @@ -611,7 +608,7 @@ void MKLDNNTensorIteratorNode::prepareBackEdges() { } } -void MKLDNNTensorIteratorNode::prepareDynamicBackEdges() { +void TensorIterator::prepareDynamicBackEdges() { const auto &eng = getEngine(); back_mappers.clear(); for (auto map_rule : backEdges) { @@ -625,7 +622,7 @@ void MKLDNNTensorIteratorNode::prepareDynamicBackEdges() { } } -void MKLDNNTensorIteratorNode::prepareDynamicBuffers() { +void TensorIterator::prepareDynamicBuffers() { for (auto map_rule : outputPortMap) { if (map_rule.axis != -1) { auto to_mems = getToMemories(this, map_rule.from); @@ -635,7 +632,7 @@ void MKLDNNTensorIteratorNode::prepareDynamicBuffers() { } } -void MKLDNNTensorIteratorNode::prepareLoopBodyCurrentIteration() { +void TensorIterator::prepareLoopBodyCurrentIteration() { const auto &eng = getEngine(); for (auto idx : loopBodyCurrentIterationIdx) { auto to_mem = input_mems[idx].front(); // first memory is enough to get common memory ptr @@ -643,14 +640,14 @@ void MKLDNNTensorIteratorNode::prepareLoopBodyCurrentIteration() { } } -void MKLDNNTensorIteratorNode::prepareContinueCond() { +void TensorIterator::prepareContinueCond() { if (loopBodyConditionOutputIdx != -1 || !continue_cond_check) { auto mem = output_mem[loopBodyConditionOutputIdx]; continue_cond_check.reset(new asBoolCheck(mem)); } } -void MKLDNNTensorIteratorNode::prepareInitialCond() { +void TensorIterator::prepareInitialCond() { if (loopExecutionConditionIdx != -1 || !initial_cond_check) { auto mem = getParentEdgesAtPort(loopExecutionConditionIdx)[0]->getMemoryPtr(); initial_cond_check.reset(new asBoolCheck(mem)); @@ -658,7 +655,7 @@ void MKLDNNTensorIteratorNode::prepareInitialCond() { } } -void MKLDNNTensorIteratorNode::prepareTripCount() { +void TensorIterator::prepareTripCount() { if (loopTripCountIdx == -1) { trip_count_check.reset(new staticValueCheck(getNumIteration(inputPortMap, outputPortMap))); } else { @@ -670,7 +667,7 @@ void MKLDNNTensorIteratorNode::prepareTripCount() { /* *==============* *==============* *==============* *==============* *==============* */ -void MKLDNNTensorIteratorNode::reshapeSubgraphInput() { +void TensorIterator::reshapeSubgraphInput() { for (auto map_rule : inputPortMap) { auto &from_mem = getParentEdgesAtPort(map_rule.from)[0]->getMemoryPtr(); auto &to_mems = input_mems[map_rule.to]; @@ -683,7 +680,7 @@ void MKLDNNTensorIteratorNode::reshapeSubgraphInput() { } } -void MKLDNNTensorIteratorNode::reshapeAndFillOutput(mkldnn::stream strm) { +void TensorIterator::reshapeAndFillOutput(mkldnn::stream strm) { auto eng = strm.get_engine(); for (auto map_rule : outputPortMap) { if (map_rule.axis == -1) { @@ -710,7 +707,7 @@ void MKLDNNTensorIteratorNode::reshapeAndFillOutput(mkldnn::stream strm) { } } -int MKLDNNTensorIteratorNode::getNumIteration(const std::vector& inputPortMap, const std::vector& outputPortMap) const { +int TensorIterator::getNumIteration(const std::vector& inputPortMap, const std::vector& outputPortMap) const { const auto isIterable = [](const PortMap& rule) { return rule.axis != -1; }; @@ -795,7 +792,10 @@ int MKLDNNTensorIteratorNode::getNumIteration(const std::vector& inputP return numIterations; } -bool MKLDNNTensorIteratorNode::created() const { - return getType() == TensorIterator; +bool TensorIterator::created() const { + return getType() == Type::TensorIterator; } -REG_MKLDNN_PRIM_FOR(MKLDNNTensorIteratorNode, TensorIterator); + +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/tensoriterator.h b/src/plugins/intel_cpu/src/nodes/tensoriterator.h index b9850b6df3a..64379c650df 100644 --- a/src/plugins/intel_cpu/src/nodes/tensoriterator.h +++ b/src/plugins/intel_cpu/src/nodes/tensoriterator.h @@ -13,6 +13,7 @@ namespace ov { namespace intel_cpu { +namespace node { struct PortMap { // Data map rule @@ -63,11 +64,11 @@ protected: */ class DynamicBuffer { public: - DynamicBuffer(const MKLDNNMemoryPtr &from_, const std::vector &to_, const PortMap &map_rule_); + DynamicBuffer(const MemoryPtr &from_, const std::vector &to_, const PortMap &map_rule_); ~DynamicBuffer() = default; void execute(const mkldnn::engine& eng, const int iter); - void transfer(const MKLDNNNode* node); + void transfer(const Node* node); private: void init(const mkldnn::engine& eng); @@ -86,16 +87,16 @@ private: ptrdiff_t chunk_offset_in_byte = 0; ptrdiff_t buffer_offset_in_byte = 0; - MKLDNNMemoryPtr from; - std::vector to; + MemoryPtr from; + std::vector to; PortMap map_rule; std::shared_ptr mem_holder_buffer; }; -class MKLDNNTensorIteratorNode : public MKLDNNNode { +class TensorIterator : public Node { public: - MKLDNNTensorIteratorNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + TensorIterator(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; void initSupportedPrimitiveDescriptors() override; @@ -105,7 +106,7 @@ public: void execute(mkldnn::stream strm) override; bool isExecutable() const override { return true; } - void setExtManager(const MKLDNNExtensionManager::Ptr& extMgr) { ext_mng = extMgr; } + void setExtManager(const ExtensionManager::Ptr& extMgr) { ext_mng = extMgr; } protected: // needShapeInfer() should return false @@ -132,10 +133,10 @@ private: void reshapeAndFillOutput(mkldnn::stream strm); int getNumIteration(const std::vector& inputPortMap, const std::vector& outputPortMap) const; - MKLDNNExtensionManager::Ptr ext_mng; - MKLDNNGraph sub_graph; - std::vector> input_mems; - std::vector output_mem; + ExtensionManager::Ptr ext_mng; + Graph sub_graph; + std::vector> input_mems; + std::vector output_mem; std::vector> first_mappers, /// < Applied once before loop @@ -166,5 +167,6 @@ private: const std::shared_ptr ngraphOp; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/tile.cpp b/src/plugins/intel_cpu/src/nodes/tile.cpp index 5ddcead71a6..52caa13b247 100644 --- a/src/plugins/intel_cpu/src/nodes/tile.cpp +++ b/src/plugins/intel_cpu/src/nodes/tile.cpp @@ -6,9 +6,12 @@ #include "common/cpu_memcpy.h" using namespace InferenceEngine; -using namespace ov::intel_cpu; -bool MKLDNNTileNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +namespace ov { +namespace intel_cpu { +namespace node { + +bool Tile::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { if (!ov::is_type(op)) { errorMessage = "Only opset1 Tile operation is supported."; @@ -29,8 +32,8 @@ bool MKLDNNTileNode::isSupportedOperation(const std::shared_ptr& return true; } -MKLDNNTileNode::MKLDNNTileNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : - MKLDNNNode(op, eng, cache) { +Tile::Tile(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache) : + Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -47,7 +50,7 @@ MKLDNNTileNode::MKLDNNTileNode(const std::shared_ptr& op, const mkldnn } } -void MKLDNNTileNode::getSupportedDescriptors() { +void Tile::getSupportedDescriptors() { if (getParentEdges().size() != 2) IE_THROW() << errorPrefix << " has incorrect number of input edges. " "Expected: 2, Actual: " << getParentEdges().size(); @@ -73,18 +76,18 @@ void MKLDNNTileNode::getSupportedDescriptors() { needPrepareParamsVar = true; } -void MKLDNNTileNode::initSupportedPrimitiveDescriptors() { +void Tile::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; supportedPrimitiveDescriptors = getSupportedConfigs(this); } -bool MKLDNNTileNode::needPrepareParams() const { +bool Tile::needPrepareParams() const { return needPrepareParamsVar; } -void MKLDNNTileNode::prepareParams() { +void Tile::prepareParams() { if (!constMap[TILE_REPEATS]) { const auto& repeatsMem = getParentEdgesAtPort(TILE_REPEATS)[0]->getMemory(); @@ -104,7 +107,7 @@ void MKLDNNTileNode::prepareParams() { optimizedCase = prepareOptimizedParams(this, srcBlockedDims, dstBlockedDims); } -bool MKLDNNTileNode::needShapeInfer() const { +bool Tile::needShapeInfer() const { needPrepareParamsVar = true; if (inputShapesModified()) { return true; @@ -122,15 +125,15 @@ bool MKLDNNTileNode::needShapeInfer() const { return false; } -std::vector MKLDNNTileNode::shapeInfer() const { - return MKLDNNNode::shapeInferGeneric(PortMask(TILE_REPEATS)); +std::vector Tile::shapeInfer() const { + return Node::shapeInferGeneric(PortMask(TILE_REPEATS)); } -void MKLDNNTileNode::executeDynamicImpl(mkldnn::stream strm) { +void Tile::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -void MKLDNNTileNode::execute(mkldnn::stream strm) { +void Tile::execute(mkldnn::stream strm) { if (optimizedCase) { optimizedExecute(getParentEdgeAt(TILE_INPUT)->getMemoryPtr(), getChildEdgeAt(0)->getMemoryPtr()); } else { @@ -138,7 +141,7 @@ void MKLDNNTileNode::execute(mkldnn::stream strm) { } } -void MKLDNNTileNode::plainExecute(mkldnn::stream strm) { +void Tile::plainExecute(mkldnn::stream strm) { if (noTiling) { return; } @@ -194,8 +197,10 @@ void MKLDNNTileNode::plainExecute(mkldnn::stream strm) { } } -bool MKLDNNTileNode::created() const { - return getType() == Tile; +bool Tile::created() const { + return getType() == Type::Tile; } -REG_MKLDNN_PRIM_FOR(MKLDNNTileNode, Tile); +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/tile.h b/src/plugins/intel_cpu/src/nodes/tile.h index c1ab98f9cd2..a469229389d 100644 --- a/src/plugins/intel_cpu/src/nodes/tile.h +++ b/src/plugins/intel_cpu/src/nodes/tile.h @@ -10,10 +10,11 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNTileNode : public MKLDNNNode, public TileBroadcastCommon { +class Tile : public Node, public TileBroadcastCommon { public: - MKLDNNTileNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + Tile(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; @@ -43,5 +44,6 @@ private: std::string errorPrefix; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/topk.cpp b/src/plugins/intel_cpu/src/nodes/topk.cpp index f40343d49ed..be917c62cd1 100644 --- a/src/plugins/intel_cpu/src/nodes/topk.cpp +++ b/src/plugins/intel_cpu/src/nodes/topk.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include "emitters/jit_load_store_emitters.hpp" #include "ie_parallel.hpp" #include @@ -23,13 +23,16 @@ #include using namespace mkldnn; -using namespace ov::intel_cpu; using namespace InferenceEngine; using namespace mkldnn::impl; using namespace mkldnn::impl::cpu::x64; using namespace mkldnn::impl::utils; using namespace Xbyak; +namespace ov { +namespace intel_cpu { +namespace node { + #define GET_OFF(field) offsetof(jit_topk_call_args, field) #define vmm_mask Vmm(0) @@ -98,7 +101,7 @@ struct jit_uni_topk_kernel_f32 : public jit_uni_topk_kernel, public jit_generato if (!shape_agnostic_alg) mov(reg_table, l_table); - data_type = MKLDNNExtensionUtils::IEPrecisionToDataType(jcp_.precision); + data_type = DnnlExtensionUtils::IEPrecisionToDataType(jcp_.precision); if (!shape_agnostic_alg && jcp_.layout == TopKLayoutType::topk_blocked && jcp_.topk_innermost) blk_stride = jcp_.sort_stride * jcp_.blk_size; @@ -1786,7 +1789,7 @@ private: } }; -bool MKLDNNTopKNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool TopK::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { const auto topKOp = ngraph::as_type_ptr(op); if (!topKOp) { @@ -1819,8 +1822,8 @@ bool MKLDNNTopKNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) - : MKLDNNNode(op, eng, cache) { +TopK::TopK(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache) + : Node(op, eng, cache) { std::string errorMessage; if (isSupportedOperation(op, errorMessage)) { errorPrefix = "TopK layer with name '" + getName() + "'"; @@ -1869,9 +1872,9 @@ MKLDNNTopKNode::MKLDNNTopKNode(const std::shared_ptr& op, const mk } } -void MKLDNNTopKNode::getSupportedDescriptors() {} +void TopK::getSupportedDescriptors() {} -void MKLDNNTopKNode::initSupportedPrimitiveDescriptors() { +void TopK::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -1923,21 +1926,21 @@ void MKLDNNTopKNode::initSupportedPrimitiveDescriptors() { } } -bool MKLDNNTopKNode::needShapeInfer() const { +bool TopK::needShapeInfer() const { const int src_k = reinterpret_cast(getParentEdgeAt(TOPK_K)->getMemoryPtr()->GetPtr())[0]; return inputShapesModified() || src_k != top_k; } -std::vector MKLDNNTopKNode::shapeInfer() const { - return MKLDNNNode::shapeInferGeneric(PortMask(1)); +std::vector TopK::shapeInfer() const { + return Node::shapeInferGeneric(PortMask(1)); } -bool MKLDNNTopKNode::needPrepareParams() const { +bool TopK::needPrepareParams() const { const int src_k = reinterpret_cast(getParentEdgeAt(TOPK_K)->getMemoryPtr()->GetPtr())[0]; return inputShapesModified() || top_k != src_k; } -void MKLDNNTopKNode::preset_params() { +void TopK::preset_params() { auto &srcMemPtr = getParentEdgeAt(TOPK_DATA)->getMemoryPtr(); if (srcMemPtr->getDesc().hasLayoutType(LayoutType::ncsp)) { layout = TopKLayoutType::topk_ncsp; @@ -1948,8 +1951,8 @@ void MKLDNNTopKNode::preset_params() { } auto selectedPD = getSelectedPrimitiveDescriptor(); - auto data_type = MKLDNNExtensionUtils::IEPrecisionToDataType(selectedPD->getConfig().inConfs[TOPK_DATA].getMemDesc()->getPrecision()); - data_size = MKLDNNExtensionUtils::sizeOfDataType(data_type); + auto data_type = DnnlExtensionUtils::IEPrecisionToDataType(selectedPD->getConfig().inConfs[TOPK_DATA].getMemDesc()->getPrecision()); + data_size = DnnlExtensionUtils::sizeOfDataType(data_type); topk_innermost = (layout == TopKLayoutType::topk_ncsp && axis == static_cast(getOutputShapeAtPort(TOPK_DATA).getRank() - 1)) || ((layout == TopKLayoutType::topk_nspc || layout == TopKLayoutType::topk_blocked) && axis == 1); @@ -1970,7 +1973,7 @@ void MKLDNNTopKNode::preset_params() { } } -void MKLDNNTopKNode::prepareParams() { +void TopK::prepareParams() { auto &dstMemPtr = getChildEdgeAt(TOPK_DATA)->getMemoryPtr(); auto &srcMemPtr = getParentEdgeAt(TOPK_DATA)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) @@ -2047,7 +2050,7 @@ void MKLDNNTopKNode::prepareParams() { } } -void MKLDNNTopKNode::createPrimitive() { +void TopK::createPrimitive() { if (inputShapesDefined() && isExecutable()) { if (needPrepareParams()) prepareParams(); @@ -2106,11 +2109,11 @@ void MKLDNNTopKNode::createPrimitive() { } } -void MKLDNNTopKNode::executeDynamicImpl(mkldnn::stream strm) { +void TopK::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -void MKLDNNTopKNode::execute(mkldnn::stream strm) { +void TopK::execute(mkldnn::stream strm) { auto &srcMemPtr = getParentEdgeAt(TOPK_DATA)->getMemoryPtr(); auto &dstMemPtr = getChildEdgeAt(TOPK_DATA)->getMemoryPtr(); auto &dstIndexesMemPtr = getChildEdgeAt(TOPK_INDEX)->getMemoryPtr(); @@ -2133,7 +2136,7 @@ void MKLDNNTopKNode::execute(mkldnn::stream strm) { } } -void MKLDNNTopKNode::topk_process(const uint8_t *in_ptr, uint8_t *out_ptr, uint8_t *out_idx_ptr) { +void TopK::topk_process(const uint8_t *in_ptr, uint8_t *out_ptr, uint8_t *out_idx_ptr) { uint8_t *process_ptr = vec_process_ptr.data(); uint8_t *process_idx_ptr = vec_process_idx_ptr.data(); @@ -2186,7 +2189,7 @@ void MKLDNNTopKNode::topk_process(const uint8_t *in_ptr, uint8_t *out_ptr, uint8 } } -inline void MKLDNNTopKNode::topk_kernel_process(const uint8_t *in_p, uint8_t *out_p, uint8_t *out_idx_p, +inline void TopK::topk_kernel_process(const uint8_t *in_p, uint8_t *out_p, uint8_t *out_idx_p, uint8_t *process_p, uint8_t *process_idx_p, size_t work_amount) { auto arg = jit_topk_call_args(); arg.src = static_cast(in_p); @@ -2205,7 +2208,7 @@ inline void MKLDNNTopKNode::topk_kernel_process(const uint8_t *in_p, uint8_t *ou (*topk_kernel)(&arg); } -inline void MKLDNNTopKNode::prepare_original_idx() { +inline void TopK::prepare_original_idx() { bool shape_agnostic_alg = algorithm == TopKAlgorithm::topk_heap_sort || (algorithm == TopKAlgorithm::topk_bubble_sort && !bubble_inplace); if (shape_agnostic_alg) { @@ -2255,7 +2258,7 @@ inline void MKLDNNTopKNode::prepare_original_idx() { // n: number of valid elements in bitonic sort // p: pow of 2 number, so that p/2 < n <= p // empty tail: p-n elements in the rear don't need sorting, -inline void MKLDNNTopKNode::bitonic_push_idx(int p, int n, std::vector &vec, int &cnt, bool cmp_val) { +inline void TopK::bitonic_push_idx(int p, int n, std::vector &vec, int &cnt, bool cmp_val) { // memory stride of adjacent elements in sorting int sort_stride = static_cast(I); cnt = 0; @@ -2299,7 +2302,7 @@ inline void MKLDNNTopKNode::bitonic_push_idx(int p, int n, std::vector &vec } } -void MKLDNNTopKNode::calc_bitonic_idx(size_t n, int &cnt, bool cmp_val) { +void TopK::calc_bitonic_idx(size_t n, int &cnt, bool cmp_val) { int m = n - 1; int log_p = 0; int p = 1; @@ -2324,7 +2327,7 @@ void MKLDNNTopKNode::calc_bitonic_idx(size_t n, int &cnt, bool cmp_val) { // O: total size of the outer dimensions // A: size of the topk imposed dimension // I: total size of the inner dimensions -void MKLDNNTopKNode::calc_dims_size(const SizeVector &layout_dims) { +void TopK::calc_dims_size(const SizeVector &layout_dims) { O = 1, I = 1; A = src_dims[axis]; int layout_axis = axis; @@ -2341,14 +2344,14 @@ void MKLDNNTopKNode::calc_dims_size(const SizeVector &layout_dims) { } } -void MKLDNNTopKNode::topk_ref(const float *in_ptr, float *out_ptr, int32_t *dst_idx) { +void TopK::topk_ref(const float *in_ptr, float *out_ptr, int32_t *dst_idx) { if (mode_max) topk_ref_process(in_ptr, out_ptr, dst_idx, src_dims, [](float x, float y)->float { return x > y; }); else topk_ref_process(in_ptr, out_ptr, dst_idx, src_dims, [](float x, float y)->float { return x < y; }); } -void MKLDNNTopKNode::topk_ref_process(const float* src_data, float* dst_data, int32_t* dst_idx, const SizeVector &in_dims, +void TopK::topk_ref_process(const float* src_data, float* dst_data, int32_t* dst_idx, const SizeVector &in_dims, std::function compare) const { int after_num = count(in_dims, axis + 1, in_dims.size()); @@ -2412,19 +2415,21 @@ void MKLDNNTopKNode::topk_ref_process(const float* src_data, float* dst_data, in }); } -inline int MKLDNNTopKNode::count(SizeVector dims, size_t start_ind, size_t end_ind) { +inline int TopK::count(SizeVector dims, size_t start_ind, size_t end_ind) { size_t count = 1; for (size_t i = start_ind; i < end_ind; i++) count *= dims[i]; return static_cast(count); } -inline int MKLDNNTopKNode::count(SizeVector dims, size_t start_ind) { +inline int TopK::count(SizeVector dims, size_t start_ind) { return count(dims, start_ind, dims.size()); } -bool MKLDNNTopKNode::created() const { - return getType() == TopK; +bool TopK::created() const { + return getType() == Type::TopK; } -REG_MKLDNN_PRIM_FOR(MKLDNNTopKNode, TopK); +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/topk.h b/src/plugins/intel_cpu/src/nodes/topk.h index a0da1af8000..c8d012b4961 100644 --- a/src/plugins/intel_cpu/src/nodes/topk.h +++ b/src/plugins/intel_cpu/src/nodes/topk.h @@ -12,6 +12,7 @@ namespace ov { namespace intel_cpu { +namespace node { enum TopKLayoutType { topk_ncsp, @@ -75,10 +76,10 @@ struct jit_uni_topk_kernel { jit_topk_config_params jcp_; }; -class MKLDNNTopKNode : public MKLDNNNode { +class TopK : public Node { public: - MKLDNNTopKNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); - ~MKLDNNTopKNode() override = default; + TopK(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); + ~TopK() override = default; void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; @@ -146,5 +147,6 @@ private: std::string errorPrefix; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/transpose.cpp b/src/plugins/intel_cpu/src/nodes/transpose.cpp index 2c3756105f6..051309a1198 100644 --- a/src/plugins/intel_cpu/src/nodes/transpose.cpp +++ b/src/plugins/intel_cpu/src/nodes/transpose.cpp @@ -7,13 +7,15 @@ #include #include -#include +#include #include using namespace mkldnn; -using namespace ov::intel_cpu; using namespace InferenceEngine; +namespace ov { +namespace intel_cpu { +namespace node { namespace { struct TransposeAsReorderKey { mkldnn::memory::desc src; @@ -40,7 +42,7 @@ bool TransposeAsReorderKey::operator==(const TransposeAsReorderKey& rhs) const { } } // namespace -bool MKLDNNTransposeNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool Transpose::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { if (!one_of(op->get_type_info(), ov::op::v1::Transpose::get_type_info_static())) { @@ -59,8 +61,8 @@ bool MKLDNNTransposeNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) - : MKLDNNNode(op, eng, cache) { +Transpose::Transpose(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache) + : Node(op, eng, cache) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -79,10 +81,10 @@ MKLDNNTransposeNode::MKLDNNTransposeNode(const std::shared_ptr& op, co } } -void MKLDNNTransposeNode::getSupportedDescriptors() { +void Transpose::getSupportedDescriptors() { } -void MKLDNNTransposeNode::initSupportedPrimitiveDescriptors() { +void Transpose::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -133,17 +135,17 @@ void MKLDNNTransposeNode::initSupportedPrimitiveDescriptors() { } } -bool MKLDNNTransposeNode::isExecutable() const { +bool Transpose::isExecutable() const { return !isInputTensorAtPortEmpty(0); } -bool MKLDNNTransposeNode::needPrepareParams() const { +bool Transpose::needPrepareParams() const { if (isOptimized) return false; return inputShapesModified(); } -void MKLDNNTransposeNode::prepareParams() { +void Transpose::prepareParams() { auto srcDesc = getParentEdgeAt(INPUT_DATA_IDX)->getMemory().GetDescWithType(); params.src_block_dims = srcDesc->getBlockDims(); auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); @@ -154,18 +156,18 @@ void MKLDNNTransposeNode::prepareParams() { const auto engine = getEngine(); auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); auto& srcMemPtr = getParentEdgeAt(INPUT_DATA_IDX)->getMemoryPtr(); - MKLDNNMemoryPtr src_blocked = std::make_shared(engine); - MKLDNNMemoryPtr dst_blocked = std::make_shared(engine); + MemoryPtr src_blocked = std::make_shared(engine); + MemoryPtr dst_blocked = std::make_shared(engine); dst_blocked->Create( - MKLDNNExtensionUtils::makeDescriptor(dstMemPtr->GetDescWithType()->getDnnlDesc()), + DnnlExtensionUtils::makeDescriptor(dstMemPtr->GetDescWithType()->getDnnlDesc()), dstMemPtr->GetData(), false); const auto newDims = dst_blocked->getStaticDims(); - auto newDesc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(newDims), + auto newDesc = mkldnn::memory::desc(DnnlExtensionUtils::convertToDnnlDims(newDims), dst_blocked->GetDataType(), memory::format_tag::acdb); - src_blocked->Create(MKLDNNExtensionUtils::makeDescriptor(newDesc), srcMemPtr->GetData(), false); + src_blocked->Create(DnnlExtensionUtils::makeDescriptor(newDesc), srcMemPtr->GetData(), false); impl_desc_type impl_type = getSelectedPrimitiveDescriptor()->getImplementationType(); TransposeAsReorderKey key = {src_blocked->GetPrimitive().get_desc(), dst_blocked->GetPrimitive().get_desc()}; @@ -216,7 +218,7 @@ void MKLDNNTransposeNode::prepareParams() { execPtr = result.first; } -void MKLDNNTransposeNode::createPrimitive() { +void Transpose::createPrimitive() { auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); auto& srcMemPtr = getParentEdgeAt(INPUT_DATA_IDX)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) @@ -252,7 +254,7 @@ void MKLDNNTransposeNode::createPrimitive() { } template -static void transpose_to_0312(const int MB, const MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { +static void transpose_to_0312(const int MB, const MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr) { const auto src_data = reinterpret_cast(srcMemPtr->GetPtr()); auto dst_data = reinterpret_cast(dstMemPtr->GetPtr()); @@ -277,7 +279,7 @@ static void transpose_to_0312(const int MB, const MKLDNNMemoryPtr& srcMemPtr, MK } template -static void transpose_to_04123(const int MB, const MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { +static void transpose_to_04123(const int MB, const MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr) { const auto src_data = reinterpret_cast(srcMemPtr->GetPtr()); auto dst_data = reinterpret_cast(dstMemPtr->GetPtr()); @@ -305,7 +307,7 @@ static void transpose_to_04123(const int MB, const MKLDNNMemoryPtr& srcMemPtr, M } template -static void transpose_to_051234(const int MB, const MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { +static void transpose_to_051234(const int MB, const MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr) { const auto src_data = reinterpret_cast(srcMemPtr->GetPtr()); auto dst_data = reinterpret_cast(dstMemPtr->GetPtr()); @@ -336,7 +338,7 @@ static void transpose_to_051234(const int MB, const MKLDNNMemoryPtr& srcMemPtr, } template -void MKLDNNTransposeNode::optimizedExecute(const int MB, const MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { +void Transpose::optimizedExecute(const int MB, const MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr) { switch (srcMemPtr->getStaticDims().size()) { case 4: transpose_to_0312(MB, srcMemPtr, dstMemPtr); @@ -352,7 +354,7 @@ void MKLDNNTransposeNode::optimizedExecute(const int MB, const MKLDNNMemoryPtr& } } -void MKLDNNTransposeNode::execute(mkldnn::stream strm) { +void Transpose::execute(mkldnn::stream strm) { if (prim) { (*prim).execute(strm, primArgs); } else if (execPtr) { @@ -372,15 +374,15 @@ void MKLDNNTransposeNode::execute(mkldnn::stream strm) { } } -void MKLDNNTransposeNode::executeDynamicImpl(mkldnn::stream strm) { +void Transpose::executeDynamicImpl(mkldnn::stream strm) { execute(strm); } -MKLDNNTransposeNode::TransposeJitExecutor::TransposeJitExecutor(const PermuteParams& params) { +Transpose::TransposeJitExecutor::TransposeJitExecutor(const PermuteParams& params) { pKernel = std::make_shared(params); } -void MKLDNNTransposeNode::TransposeJitExecutor::exec(MKLDNNTransposeNode* node, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr, const int MB) { +void Transpose::TransposeJitExecutor::exec(Transpose* node, MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr, const int MB) { if (!pKernel) IE_THROW() << "Could not execute. Kernel for Transpose node was not compiled."; @@ -390,7 +392,7 @@ void MKLDNNTransposeNode::TransposeJitExecutor::exec(MKLDNNTransposeNode* node, pKernel->execute(srcData, dstData, MB); } -void MKLDNNTransposeNode::TransposeRefExecutor::exec(MKLDNNTransposeNode* node, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr, const int MB) { +void Transpose::TransposeRefExecutor::exec(Transpose* node, MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr, const int MB) { const size_t dataSize = srcMemPtr->getDesc().getPrecision().size(); TransposeContext ctx = {node, srcMemPtr, dstMemPtr, MB}; OV_SWITCH(intel_cpu, TransposeOptimizedEmitter, ctx, dataSize, @@ -399,8 +401,10 @@ void MKLDNNTransposeNode::TransposeRefExecutor::exec(MKLDNNTransposeNode* node, OV_CASE(4, PrecisionTrait::value_type)); } -bool MKLDNNTransposeNode::created() const { - return getType() == Transpose; +bool Transpose::created() const { + return getType() == Type::Transpose; } -REG_MKLDNN_PRIM_FOR(MKLDNNTransposeNode, Transpose); +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/transpose.h b/src/plugins/intel_cpu/src/nodes/transpose.h index b9397315f17..ee300ed5d23 100644 --- a/src/plugins/intel_cpu/src/nodes/transpose.h +++ b/src/plugins/intel_cpu/src/nodes/transpose.h @@ -13,10 +13,11 @@ namespace ov { namespace intel_cpu { +namespace node { -class MKLDNNTransposeNode : public MKLDNNNode { +class Transpose : public Node { public: - MKLDNNTransposeNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + Transpose(const std::shared_ptr& op, const mkldnn::engine& eng, WeightsSharing::Ptr &cache); static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; void getSupportedDescriptors() override; @@ -42,7 +43,7 @@ protected: private: struct TransposeExecutor { TransposeExecutor() = default; - virtual void exec(MKLDNNTransposeNode* node, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr, const int MB) = 0; + virtual void exec(Transpose* node, MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr, const int MB) = 0; virtual ~TransposeExecutor() = default; }; using executorPtr = std::shared_ptr; @@ -50,17 +51,17 @@ private: struct TransposeJitExecutor : public TransposeExecutor { TransposeJitExecutor(const PermuteParams& params); - void exec(MKLDNNTransposeNode* node, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr, const int MB) override; + void exec(Transpose* node, MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr, const int MB) override; std::shared_ptr pKernel; }; struct TransposeRefExecutor : public TransposeExecutor { TransposeRefExecutor() = default; - void exec(MKLDNNTransposeNode* node, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr, const int MB) override; + void exec(Transpose* node, MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr, const int MB) override; }; - template void optimizedExecute(const int MB, const MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr); + template void optimizedExecute(const int MB, const MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr); InferenceEngine::SizeVector order; InferenceEngine::Precision prec; @@ -75,9 +76,9 @@ private: PermuteParams params; struct TransposeContext { - MKLDNNTransposeNode* nodePtr; - MKLDNNMemoryPtr srcMemPtr; - MKLDNNMemoryPtr dstMemPtr; + Transpose* nodePtr; + MemoryPtr srcMemPtr; + MemoryPtr dstMemPtr; int MB; }; @@ -96,5 +97,6 @@ private: bool performAsReorder = false; }; +} // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes_factory.cpp b/src/plugins/intel_cpu/src/nodes_factory.cpp index 274c661cc4d..2e2b0990fab 100644 --- a/src/plugins/intel_cpu/src/nodes_factory.cpp +++ b/src/plugins/intel_cpu/src/nodes_factory.cpp @@ -86,96 +86,99 @@ #include "nodes/priorbox.h" #include "nodes/priorbox_clustered.h" -#define MKLDNN_NODE(__prim, __type) \ - registerNodeIfRequired(intel_cpu, __prim, __type, MKLDNNNodeImpl<__prim>) +#define INTEL_CPU_NODE(__prim, __type) \ + registerNodeIfRequired(intel_cpu, __prim, __type, NodeImpl<__prim>) -ov::intel_cpu::MKLDNNNode::NodesFactory::NodesFactory() +ov::intel_cpu::Node::NodesFactory::NodesFactory() : Factory("NodesFactory") { - MKLDNN_NODE(MKLDNNGenericNode, Generic); - MKLDNN_NODE(MKLDNNCumSumNode, CumSum); - MKLDNN_NODE(MKLDNNConvolutionNode, Convolution); - MKLDNN_NODE(MKLDNNSpaceToBatchNode, SpaceToBatch); - MKLDNN_NODE(MKLDNNLrnNode, Lrn); - MKLDNN_NODE(MKLDNNBatchToSpaceNode, BatchToSpace); - MKLDNN_NODE(MKLDNNNormalizeL2Node, NormalizeL2); - MKLDNN_NODE(MKLDNNIfNode, If); - MKLDNN_NODE(MKLDNNProposalNode, Proposal); - MKLDNN_NODE(MKLDNNBroadcastNode, Broadcast); - MKLDNN_NODE(MKLDNNExperimentalDetectronTopKROIsNode, ExperimentalDetectronTopKROIs); - MKLDNN_NODE(MKLDNNReorderNode, Reorder); - MKLDNN_NODE(MKLDNNBinaryConvolutionNode, BinaryConvolution); - MKLDNN_NODE(MKLDNNMatrixNmsNode, MatrixNms); - MKLDNN_NODE(MKLDNNAdaptivePoolingNode, AdaptivePooling); - MKLDNN_NODE(MKLDNNPoolingNode, Pooling); - MKLDNN_NODE(MKLDNNReduceNode, Reduce); - MKLDNN_NODE(MKLDNNEltwiseNode, Eltwise); - MKLDNN_NODE(MKLDNNSoftMaxNode, Softmax); - MKLDNN_NODE(MKLDNNEmbeddingBagPackedSumNode, EmbeddingBagPackedSum); - MKLDNN_NODE(MKLDNNInputNode, Input); - MKLDNN_NODE(MKLDNNInputNode, Output); - MKLDNN_NODE(MKLDNNMemoryInputNode, MemoryInput); - MKLDNN_NODE(MKLDNNMemoryOutputNode, MemoryOutput); - MKLDNN_NODE(MKLDNNTileNode, Tile); - MKLDNN_NODE(MKLDNNDFTNode, DFT); - MKLDNN_NODE(MKLDNNGatherTreeNode, GatherTree); - MKLDNN_NODE(MKLDNNSpaceToDepthNode, SpaceToDepth); - MKLDNN_NODE(MKLDNNFullyConnectedNode, FullyConnected); - MKLDNN_NODE(MKLDNNCTCGreedyDecoderNode, CTCGreedyDecoder); - MKLDNN_NODE(MKLDNNTransposeNode, Transpose); - MKLDNN_NODE(MKLDNNDeformableConvolutionNode, DeformableConvolution); - MKLDNN_NODE(MKLDNNReorgYoloNode, ReorgYolo); - MKLDNN_NODE(MKLDNNEmbeddingSegmentsSumNode, EmbeddingSegmentsSum); - MKLDNN_NODE(MKLDNNSelectNode, Select); - MKLDNN_NODE(MKLDNNShapeOfNode, ShapeOf); - MKLDNN_NODE(MKLDNNExperimentalDetectronGenerateProposalsSingleImageNode, ExperimentalDetectronGenerateProposalsSingleImage); - MKLDNN_NODE(MKLDNNReverseSequenceNode, ReverseSequence); - MKLDNN_NODE(MKLDNNFakeQuantizeNode, FakeQuantize); - MKLDNN_NODE(MKLDNNNonMaxSuppressionNode, NonMaxSuppression); - MKLDNN_NODE(MKLDNNExperimentalDetectronPriorGridGeneratorNode, ExperimentalDetectronPriorGridGenerator); - MKLDNN_NODE(MKLDNNGatherNDNode, GatherND); - MKLDNN_NODE(MKLDNNLogSoftmaxNode, LogSoftmax); - MKLDNN_NODE(MKLDNNPSROIPoolingNode, PSROIPooling); - MKLDNN_NODE(MKLDNNRNN, RNNCell); - MKLDNN_NODE(MKLDNNRNN, RNNSeq); - MKLDNN_NODE(MKLDNNCTCLossNode, CTCLoss); - MKLDNN_NODE(MKLDNNSplitNode, Split); - MKLDNN_NODE(MKLDNNDetectionOutputNode, DetectionOutput); - MKLDNN_NODE(MKLDNNGatherElementsNode, GatherElements); - MKLDNN_NODE(MKLDNNCTCGreedyDecoderSeqLenNode, CTCGreedyDecoderSeqLen); - MKLDNN_NODE(MKLDNNBucketizeNode, Bucketize); - MKLDNN_NODE(MKLDNNExperimentalDetectronROIFeatureExtractorNode, ExperimentalDetectronROIFeatureExtractor); - MKLDNN_NODE(MKLDNNMathNode, Math); - MKLDNN_NODE(MKLDNNMultiClassNmsNode, MulticlassNms); - MKLDNN_NODE(MKLDNNConvertNode, Convert); - MKLDNN_NODE(MKLDNNEmbeddingBagOffsetSumNode, EmbeddingBagOffsetsSum); - MKLDNN_NODE(MKLDNNRollNode, Roll); - MKLDNN_NODE(MKLDNNPadNode, Pad); - MKLDNN_NODE(MKLDNNReshapeNode, Reshape); - MKLDNN_NODE(MKLDNNMVNNode, MVN); - MKLDNN_NODE(MKLDNNMatMulNode, MatMul); - MKLDNN_NODE(MKLDNNScatterUpdateNode, ScatterUpdate); - MKLDNN_NODE(MKLDNNScatterUpdateNode, ScatterElementsUpdate); - MKLDNN_NODE(MKLDNNScatterUpdateNode, ScatterNDUpdate); - MKLDNN_NODE(MKLDNNInterpolateNode, Interpolate); - MKLDNN_NODE(MKLDNNROIPoolingNode, ROIPooling); - MKLDNN_NODE(MKLDNNTensorIteratorNode, TensorIterator); - MKLDNN_NODE(MKLDNNConcatNode, Concatenation); - MKLDNN_NODE(MKLDNNExtractImagePatchesNode, ExtractImagePatches); - MKLDNN_NODE(MKLDNNOneHotNode, OneHot); - MKLDNN_NODE(MKLDNNExperimentalDetectronDetectionOutputNode, ExperimentalDetectronDetectionOutput); - MKLDNN_NODE(MKLDNNROIAlignNode, ROIAlign); - MKLDNN_NODE(MKLDNNShuffleChannelsNode, ShuffleChannels); - MKLDNN_NODE(MKLDNNDepthToSpaceNode, DepthToSpace); - MKLDNN_NODE(MKLDNNDeconvolutionNode, Deconvolution); - MKLDNN_NODE(MKLDNNGatherNode, Gather); - MKLDNN_NODE(MKLDNNRegionYoloNode, RegionYolo); - MKLDNN_NODE(MKLDNNRangeNode, Range); - MKLDNN_NODE(MKLDNNTopKNode, TopK); - MKLDNN_NODE(MKLDNNStridedSliceNode, StridedSlice); - MKLDNN_NODE(MKLDNNGRNNode, GRN); - MKLDNN_NODE(MKLDNNNonZeroNode, NonZero); - MKLDNN_NODE(MKLDNNSnippetNode, Subgraph); - MKLDNN_NODE(MKLDNNColorConvertNode, ColorConvert); - MKLDNN_NODE(MKLDNNPriorBoxNode, PriorBox); - MKLDNN_NODE(MKLDNNPriorBoxClusteredNode, PriorBoxClustered); + using namespace node; + INTEL_CPU_NODE(Generic, Type::Generic); + INTEL_CPU_NODE(CumSum, Type::CumSum); + INTEL_CPU_NODE(Convolution, Type::Convolution); + INTEL_CPU_NODE(SpaceToBatch, Type::SpaceToBatch); + INTEL_CPU_NODE(Lrn, Type::Lrn); + INTEL_CPU_NODE(BatchToSpace, Type::BatchToSpace); + INTEL_CPU_NODE(NormalizeL2, Type::NormalizeL2); + INTEL_CPU_NODE(If, Type::If); + INTEL_CPU_NODE(Proposal, Type::Proposal); + INTEL_CPU_NODE(Broadcast, Type::Broadcast); + INTEL_CPU_NODE(ExperimentalDetectronTopKROIs, Type::ExperimentalDetectronTopKROIs); + INTEL_CPU_NODE(Reorder, Type::Reorder); + INTEL_CPU_NODE(BinaryConvolution, Type::BinaryConvolution); + INTEL_CPU_NODE(MatrixNms, Type::MatrixNms); + INTEL_CPU_NODE(AdaptivePooling, Type::AdaptivePooling); + INTEL_CPU_NODE(Pooling, Type::Pooling); + INTEL_CPU_NODE(Reduce, Type::Reduce); + INTEL_CPU_NODE(Eltwise, Type::Eltwise); + INTEL_CPU_NODE(SoftMax, Type::Softmax); + INTEL_CPU_NODE(EmbeddingBagPackedSum, Type::EmbeddingBagPackedSum); + INTEL_CPU_NODE(node::Input, Type::Input); + INTEL_CPU_NODE(node::Input, Type::Output); + INTEL_CPU_NODE(MemoryInput, Type::MemoryInput); + INTEL_CPU_NODE(MemoryOutput, Type::MemoryOutput); + INTEL_CPU_NODE(Tile, Type::Tile); + INTEL_CPU_NODE(DFT, Type::DFT); + INTEL_CPU_NODE(GatherTree, Type::GatherTree); + INTEL_CPU_NODE(SpaceToDepth, Type::SpaceToDepth); + INTEL_CPU_NODE(FullyConnected, Type::FullyConnected); + INTEL_CPU_NODE(CTCGreedyDecoder, Type::CTCGreedyDecoder); + INTEL_CPU_NODE(Transpose, Type::Transpose); + INTEL_CPU_NODE(DeformableConvolution, Type::DeformableConvolution); + INTEL_CPU_NODE(ReorgYolo, Type::ReorgYolo); + INTEL_CPU_NODE(EmbeddingSegmentsSum, Type::EmbeddingSegmentsSum); + INTEL_CPU_NODE(Select, Type::Select); + INTEL_CPU_NODE(ShapeOf, Type::ShapeOf); + INTEL_CPU_NODE(ExperimentalDetectronGenerateProposalsSingleImage, Type::ExperimentalDetectronGenerateProposalsSingleImage); + INTEL_CPU_NODE(ReverseSequence, Type::ReverseSequence); + INTEL_CPU_NODE(FakeQuantize, Type::FakeQuantize); + INTEL_CPU_NODE(NonMaxSuppression, Type::NonMaxSuppression); + INTEL_CPU_NODE(ExperimentalDetectronPriorGridGenerator, Type::ExperimentalDetectronPriorGridGenerator); + INTEL_CPU_NODE(GatherND, Type::GatherND); + INTEL_CPU_NODE(LogSoftmax, Type::LogSoftmax); + INTEL_CPU_NODE(PSROIPooling, Type::PSROIPooling); + INTEL_CPU_NODE(RNN, Type::RNNCell); + INTEL_CPU_NODE(RNN, Type::RNNSeq); + INTEL_CPU_NODE(CTCLoss, Type::CTCLoss); + INTEL_CPU_NODE(Split, Type::Split); + INTEL_CPU_NODE(DetectionOutput, Type::DetectionOutput); + INTEL_CPU_NODE(GatherElements, Type::GatherElements); + INTEL_CPU_NODE(CTCGreedyDecoderSeqLen, Type::CTCGreedyDecoderSeqLen); + INTEL_CPU_NODE(Bucketize, Type::Bucketize); + INTEL_CPU_NODE(ExperimentalDetectronROIFeatureExtractor, Type::ExperimentalDetectronROIFeatureExtractor); + INTEL_CPU_NODE(Math, Type::Math); + INTEL_CPU_NODE(MultiClassNms, Type::MulticlassNms); + INTEL_CPU_NODE(Convert, Type::Convert); + INTEL_CPU_NODE(EmbeddingBagOffsetSum, Type::EmbeddingBagOffsetsSum); + INTEL_CPU_NODE(Roll, Type::Roll); + INTEL_CPU_NODE(Pad, Type::Pad); + INTEL_CPU_NODE(Reshape, Type::Reshape); + INTEL_CPU_NODE(MVN, Type::MVN); + INTEL_CPU_NODE(MatMul, Type::MatMul); + INTEL_CPU_NODE(ScatterUpdate, Type::ScatterUpdate); + INTEL_CPU_NODE(ScatterUpdate, Type::ScatterElementsUpdate); + INTEL_CPU_NODE(ScatterUpdate, Type::ScatterNDUpdate); + INTEL_CPU_NODE(Interpolate, Type::Interpolate); + INTEL_CPU_NODE(ROIPooling, Type::ROIPooling); + INTEL_CPU_NODE(TensorIterator, Type::TensorIterator); + INTEL_CPU_NODE(Concat, Type::Concatenation); + INTEL_CPU_NODE(ExtractImagePatches, Type::ExtractImagePatches); + INTEL_CPU_NODE(OneHot, Type::OneHot); + INTEL_CPU_NODE(ExperimentalDetectronDetectionOutput, Type::ExperimentalDetectronDetectionOutput); + INTEL_CPU_NODE(ROIAlign, Type::ROIAlign); + INTEL_CPU_NODE(ShuffleChannels, Type::ShuffleChannels); + INTEL_CPU_NODE(DepthToSpace, Type::DepthToSpace); + INTEL_CPU_NODE(Deconvolution, Type::Deconvolution); + INTEL_CPU_NODE(Gather, Type::Gather); + INTEL_CPU_NODE(RegionYolo, Type::RegionYolo); + INTEL_CPU_NODE(Range, Type::Range); + INTEL_CPU_NODE(TopK, Type::TopK); + INTEL_CPU_NODE(StridedSlice, Type::StridedSlice); + INTEL_CPU_NODE(GRN, Type::GRN); + INTEL_CPU_NODE(NonZero, Type::NonZero); + INTEL_CPU_NODE(Snippet, Type::Subgraph); + INTEL_CPU_NODE(ColorConvert, Type::ColorConvert); + INTEL_CPU_NODE(PriorBox, Type::PriorBox); + INTEL_CPU_NODE(PriorBoxClustered, Type::PriorBoxClustered); } + +#undef INTEL_CPU_NODE diff --git a/src/plugins/intel_cpu/src/normalize_preprocess.cpp b/src/plugins/intel_cpu/src/normalize_preprocess.cpp index 22f04fc5ab8..eae3fd1bd9f 100644 --- a/src/plugins/intel_cpu/src/normalize_preprocess.cpp +++ b/src/plugins/intel_cpu/src/normalize_preprocess.cpp @@ -7,9 +7,11 @@ #include "nodes/common/cpu_memcpy.h" #include "utils/general_utils.h" -using namespace ov::intel_cpu; using namespace InferenceEngine; +namespace ov { +namespace intel_cpu { + NormalizePreprocess::NormalizePreprocess() : meanBuffer(nullptr) { } @@ -41,7 +43,7 @@ void NormalizePreprocess::Load(const Shape& inputShape, InputInfo::Ptr inputInfo } break; case MEAN_IMAGE: { - // since MKLDNN expects all channels in the same buffer - we copy it here as it comes from different channels... + // since oneDNN expects all channels in the same buffer - we copy it here as it comes from different channels... auto meanWidth = pp[0]->meanData->getTensorDesc().getDims()[pp[0]->meanData->getTensorDesc().getDims().size() - 1]; auto meanHeight = pp[0]->meanData->getTensorDesc().getDims()[pp[0]->meanData->getTensorDesc().getDims().size() - 2]; @@ -119,3 +121,6 @@ void NormalizePreprocess::NormalizeImage(const Shape &inputShape, float *input, IE_THROW() << "Preprocessing error: meanValues and stdScales arrays are inconsistent."; } } + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index 66970677b9d..b18089ec941 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -20,7 +20,6 @@ #include #include #include -#include #include #include @@ -132,11 +131,13 @@ #include -using namespace ov::intel_cpu; using namespace InferenceEngine; #define IE_CPU_PLUGIN_THROW(...) IE_THROW(__VA_ARGS__) << "CPU plugin: " +namespace ov { +namespace intel_cpu { + static std::string getDeviceFullName() { std::string brand_string; #if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64) @@ -162,7 +163,7 @@ static std::string getDeviceFullName() { Engine::Engine() : deviceFullName(getDeviceFullName()) { _pluginName = "CPU"; - extensionManager->AddExtension(std::make_shared()); + extensionManager->AddExtension(std::make_shared()); } Engine::~Engine() { @@ -341,13 +342,13 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr pass_config->set_callback( [](const_node_ptr &node) -> bool { std::string errorMessage; - return MKLDNNMVNNode::isSupportedOperation(node, errorMessage); + return node::MVN::isSupportedOperation(node, errorMessage); }); pass_config->set_callback( [](const_node_ptr &node) -> bool { std::string errorMsg; - return MKLDNNNormalizeL2Node::isSupportedOperation(node, errorMsg); + return node::NormalizeL2::isSupportedOperation(node, errorMsg); }); pass_config->enable(); @@ -413,7 +414,7 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr ngraph::pass::MulFakeQuantizeFusion, ngraph::pass::FakeQuantizeMulFusion>([](const_node_ptr &node) -> bool { std::string errMsg; - return !MKLDNNFakeQuantizeNode::isSupportedOperation(node, errMsg); + return !node::FakeQuantize::isSupportedOperation(node, errMsg); }); pass_config->set_callback([&defaultPrecisions](const_node_ptr &node) -> bool { @@ -429,7 +430,7 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr using namespace ngraph::pass::low_precision; if (useLpt) { - OV_ITT_SCOPE(FIRST_INFERENCE, ov::intel_cpu::itt::domains::intel_cpu_LT, "LowPrecisionTransformations"); + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "LowPrecisionTransformations"); auto supportedPrecisions = std::vector({ OperationPrecisionRestriction::create({ @@ -495,7 +496,7 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr postLPTPassManager.get_pass_config()->set_callback([](const_node_ptr &node) -> bool { std::string errMsg; - return MKLDNNFakeQuantizeNode::isSupportedOperation(node, errMsg); + return node::FakeQuantize::isSupportedOperation(node, errMsg); }); postLPTPassManager.get_pass_config()->set_callback([](const_node_ptr &node) -> bool { // UnrollTI transformation is disabled by default, is turned on by LowLatency transformation @@ -703,7 +704,7 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std conf.batchLimit = static_cast(network.getBatchSize()); } - return std::make_shared(clonedNetwork, conf, extensionManager, weightsSharing, shared_from_this()); + return std::make_shared(clonedNetwork, conf, extensionManager, weightsSharing, shared_from_this()); } void Engine::SetConfig(const std::map &config) { @@ -895,7 +896,7 @@ void Engine::AddExtension(const InferenceEngine::IExtensionPtr& extension) { QueryNetworkResult Engine::QueryNetwork(const CNNNetwork& network, const std::map& config) const { QueryNetworkResult res; - MKLDNNWeightsSharing::Ptr fake_w_cache; + WeightsSharing::Ptr fake_w_cache; auto function = network.getFunction(); if (function != nullptr) { std::unordered_set originalOps; @@ -923,9 +924,9 @@ QueryNetworkResult Engine::QueryNetwork(const CNNNetwork& network, const std::ma std::unordered_set unsupported; for (auto op : ops) { auto layerIsSupported = [&] { - std::unique_ptr ptr; + std::unique_ptr ptr; try { - ptr.reset(MKLDNNNode::factory().create(op, {mkldnn::engine::kind::cpu, 0}, extensionManager, fake_w_cache)); + ptr.reset(Node::factory().create(op, {mkldnn::engine::kind::cpu, 0}, extensionManager, fake_w_cache)); } catch (InferenceEngine::Exception&) { return false; } @@ -1000,7 +1001,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr Engine::ImportNetwork(std::istr conf.batchLimit = static_cast(cnnnetwork.getBatchSize()); } - auto execNetwork = std::make_shared(cnnnetwork, conf, extensionManager, weightsSharing, shared_from_this()); + auto execNetwork = std::make_shared(cnnnetwork, conf, extensionManager, weightsSharing, shared_from_this()); execNetwork->setNetworkInputs(cnnnetwork.getInputsInfo()); execNetwork->setNetworkOutputs(cnnnetwork.getOutputsInfo()); @@ -1009,5 +1010,9 @@ InferenceEngine::IExecutableNetworkInternal::Ptr Engine::ImportNetwork(std::istr return execNetwork; } +} // namespace intel_cpu +} // namespace ov + +using namespace ov::intel_cpu; static const Version version = {{2, 1}, CI_BUILD_NUMBER, "openvino_intel_cpu_plugin"}; IE_DEFINE_PLUGIN_CREATE_FUNCTION(Engine, version) diff --git a/src/plugins/intel_cpu/src/plugin.h b/src/plugins/intel_cpu/src/plugin.h index f2534834d79..84964646c3d 100644 --- a/src/plugins/intel_cpu/src/plugin.h +++ b/src/plugins/intel_cpu/src/plugin.h @@ -52,7 +52,7 @@ private: Config engConfig; NumaNodesWeights weightsSharing; - MKLDNNExtensionManager::Ptr extensionManager = std::make_shared(); + ExtensionManager::Ptr extensionManager = std::make_shared(); /* Explicily configured streams have higher priority even than performance hints. So track if streams is set explicitly (not auto-configured) */ bool streamsExplicitlySetForEngine = false; diff --git a/src/plugins/intel_cpu/src/primitive.cpp b/src/plugins/intel_cpu/src/primitive.cpp index 26f579a8474..e5d263fc4a1 100644 --- a/src/plugins/intel_cpu/src/primitive.cpp +++ b/src/plugins/intel_cpu/src/primitive.cpp @@ -5,23 +5,27 @@ #include #include "primitive.h" -using namespace ov::intel_cpu; +namespace ov { +namespace intel_cpu { -MKLDNNPrimitive::MKLDNNPrimitive() {} +Primitive::Primitive() {} -MKLDNNPrimitive::operator bool() const { +Primitive::operator bool() const { return prim ? true : false; } -mkldnn::primitive MKLDNNPrimitive::operator*() { +mkldnn::primitive Primitive::operator*() { return *prim; } -void MKLDNNPrimitive::reset(mkldnn::primitive* primitive) { +void Primitive::reset(mkldnn::primitive* primitive) { prim.reset(primitive); } -MKLDNNPrimitive &MKLDNNPrimitive::operator=(const std::shared_ptr& primitive) { +Primitive &Primitive::operator=(const std::shared_ptr& primitive) { prim = primitive; return *this; } + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/primitive.h b/src/plugins/intel_cpu/src/primitive.h index daf3695105a..fe157597fe5 100644 --- a/src/plugins/intel_cpu/src/primitive.h +++ b/src/plugins/intel_cpu/src/primitive.h @@ -13,11 +13,11 @@ namespace ov { namespace intel_cpu { -class MKLDNNPrimitive { +class Primitive { public: - MKLDNNPrimitive(); + Primitive(); operator bool() const; - MKLDNNPrimitive& operator=(const std::shared_ptr& primitive); + Primitive& operator=(const std::shared_ptr& primitive); mkldnn::primitive operator*(); void reset(mkldnn::primitive* primitive); diff --git a/src/plugins/intel_cpu/src/serialize.cpp b/src/plugins/intel_cpu/src/serialize.cpp index e334e85e020..27e4e9a5451 100644 --- a/src/plugins/intel_cpu/src/serialize.cpp +++ b/src/plugins/intel_cpu/src/serialize.cpp @@ -67,7 +67,7 @@ namespace { } }; // namespace -CNNNetworkSerializer::CNNNetworkSerializer(std::ostream & ostream, MKLDNNExtensionManager::Ptr extensionManager) +CNNNetworkSerializer::CNNNetworkSerializer(std::ostream & ostream, ExtensionManager::Ptr extensionManager) : _ostream(ostream) , _extensionManager(extensionManager) { } diff --git a/src/plugins/intel_cpu/src/serialize.h b/src/plugins/intel_cpu/src/serialize.h index b8261ff5f23..e558db6d38b 100644 --- a/src/plugins/intel_cpu/src/serialize.h +++ b/src/plugins/intel_cpu/src/serialize.h @@ -13,12 +13,12 @@ namespace intel_cpu { class CNNNetworkSerializer { public: - CNNNetworkSerializer(std::ostream & ostream, MKLDNNExtensionManager::Ptr extensionManager); + CNNNetworkSerializer(std::ostream & ostream, ExtensionManager::Ptr extensionManager); void operator << (const InferenceEngine::CNNNetwork & network); private: std::ostream & _ostream; - MKLDNNExtensionManager::Ptr _extensionManager; + ExtensionManager::Ptr _extensionManager; }; class CNNNetworkDeserializer { diff --git a/src/plugins/intel_cpu/src/utils/blob_dump.cpp b/src/plugins/intel_cpu/src/utils/blob_dump.cpp index 565eeeb9972..a45da04ecce 100644 --- a/src/plugins/intel_cpu/src/utils/blob_dump.cpp +++ b/src/plugins/intel_cpu/src/utils/blob_dump.cpp @@ -5,7 +5,7 @@ #include "blob_dump.h" #include "blob_factory.hpp" #include -#include +#include #include #include "common/memory_desc_wrapper.hpp" @@ -85,7 +85,7 @@ static DnnlBlockedMemoryDesc parse_header(IEB_HEADER &header) { return DnnlBlockedMemoryDesc{prc, Shape(dims)}; } -void BlobDumper::prepare_plain_data(const MKLDNNMemoryPtr &memory, std::vector &data) const { +void BlobDumper::prepare_plain_data(const MemoryPtr &memory, std::vector &data) const { const auto &desc = memory->getDesc(); size_t data_size = desc.getShape().getElementsCount(); const auto size = data_size * desc.getPrecision().size(); diff --git a/src/plugins/intel_cpu/src/utils/blob_dump.h b/src/plugins/intel_cpu/src/utils/blob_dump.h index e99bca74beb..dd77c438e8f 100644 --- a/src/plugins/intel_cpu/src/utils/blob_dump.h +++ b/src/plugins/intel_cpu/src/utils/blob_dump.h @@ -21,21 +21,21 @@ namespace intel_cpu { * NB! Channel is a second dimension for all blob types. */ class BlobDumper { - MKLDNNMemoryPtr memory; + MemoryPtr memory; - void prepare_plain_data(const MKLDNNMemoryPtr &memory, std::vector &data) const; + void prepare_plain_data(const MemoryPtr &memory, std::vector &data) const; public: BlobDumper() = default; BlobDumper(const DnnlBlockedMemoryDesc &desc) { mkldnn::engine eng(mkldnn::engine::kind::cpu, 0); - memory = std::make_shared(eng); + memory = std::make_shared(eng); memory->Create(desc); } BlobDumper(const BlobDumper&) = default; BlobDumper& operator = (BlobDumper&&) = default; - explicit BlobDumper(const MKLDNNMemoryPtr &_memory) : memory(_memory) {} + explicit BlobDumper(const MemoryPtr &_memory) : memory(_memory) {} static BlobDumper read(const std::string &file_path); static BlobDumper read(std::istream &stream); diff --git a/src/plugins/intel_cpu/src/utils/node_dumper.cpp b/src/plugins/intel_cpu/src/utils/node_dumper.cpp index 6c0618974ee..7956e46af68 100644 --- a/src/plugins/intel_cpu/src/utils/node_dumper.cpp +++ b/src/plugins/intel_cpu/src/utils/node_dumper.cpp @@ -26,7 +26,7 @@ static void formatNodeName(std::string& name) { std::replace(name.begin(), name.end(), ':', '-'); } -static bool shouldBeDumped(const MKLDNNNodePtr& node, const Config& config, const std::string& portsKind) { +static bool shouldBeDumped(const NodePtr& node, const Config& config, const std::string& portsKind) { const auto& dumpFilters = config.blobDumpFilters; if (dumpFilters.empty()) @@ -94,7 +94,7 @@ static void dump(const BlobDumper& bd, const std::string& file, const Config& co } } -static void dumpInternalBlobs(const MKLDNNNodePtr& node, const Config& config) { +static void dumpInternalBlobs(const NodePtr& node, const Config& config) { std::string nodeName = node->getName(); formatNodeName(nodeName); @@ -109,14 +109,14 @@ static void dumpInternalBlobs(const MKLDNNNodePtr& node, const Config& config) { if (desc.getPrecision() == Precision::BIN) continue; - MKLDNNMemoryPtr memory = std::make_shared(node->getEngine()); + MemoryPtr memory = std::make_shared(node->getEngine()); memory->Create(MemoryDescUtils::convertToDnnlBlockedMemoryDesc(desc), blb->buffer()); BlobDumper dumper(memory); dump(dumper, dump_file, config); } } -void dumpInputBlobs(const MKLDNNNodePtr& node, const Config& config, int count) { +void dumpInputBlobs(const NodePtr& node, const Config& config, int count) { if (!shouldBeDumped(node, config, "IN")) return; @@ -150,7 +150,7 @@ void dumpInputBlobs(const MKLDNNNodePtr& node, const Config& config, int count) dumpInternalBlobs(node, config); } -void dumpOutputBlobs(const MKLDNNNodePtr& node, const Config& config, int count) { +void dumpOutputBlobs(const NodePtr& node, const Config& config, int count) { if (!shouldBeDumped(node, config, "OUT")) return; diff --git a/src/plugins/intel_cpu/src/utils/node_dumper.h b/src/plugins/intel_cpu/src/utils/node_dumper.h index 98243786a28..4c7adf2dd0f 100644 --- a/src/plugins/intel_cpu/src/utils/node_dumper.h +++ b/src/plugins/intel_cpu/src/utils/node_dumper.h @@ -10,16 +10,16 @@ namespace ov { namespace intel_cpu { -void dumpInputBlobs(const MKLDNNNodePtr &node, const Config& config, int count = -1); -void dumpOutputBlobs(const MKLDNNNodePtr &node, const Config& config, int count = -1); +void dumpInputBlobs(const NodePtr &node, const Config& config, int count = -1); +void dumpOutputBlobs(const NodePtr &node, const Config& config, int count = -1); class DumpHelper { - const MKLDNNNodePtr& node; + const NodePtr& node; const int count; const Config& config; public: - explicit DumpHelper(const MKLDNNNodePtr& _node, const Config& _config, int _count = -1): node(_node), config(_config), count(_count) { + explicit DumpHelper(const NodePtr& _node, const Config& _config, int _count = -1): node(_node), config(_config), count(_count) { dumpInputBlobs(node, config, count); } diff --git a/src/plugins/intel_cpu/src/utils/rt_info/memory_formats_attribute.cpp b/src/plugins/intel_cpu/src/utils/rt_info/memory_formats_attribute.cpp index ada2499cd41..ce644bc8968 100644 --- a/src/plugins/intel_cpu/src/utils/rt_info/memory_formats_attribute.cpp +++ b/src/plugins/intel_cpu/src/utils/rt_info/memory_formats_attribute.cpp @@ -9,30 +9,33 @@ #include using namespace ngraph; -using namespace ov; -MKLDNNInputMemoryFormats::~MKLDNNInputMemoryFormats() = default; +namespace ov { +namespace intel_cpu { -std::string ngraph::getMKLDNNInputMemoryFormats(const std::shared_ptr& node) { - auto it_info = node->get_rt_info().find(MKLDNNInputMemoryFormats::get_type_info_static()); +InputMemoryFormats::~InputMemoryFormats() = default; + +std::string getInputMemoryFormats(const std::shared_ptr& node) { + auto it_info = node->get_rt_info().find(InputMemoryFormats::get_type_info_static()); if (it_info != node->get_rt_info().end()) { - if (it_info->second.is()) { - return it_info->second.as().getMemoryFormats(); + if (it_info->second.is()) { + return it_info->second.as().getMemoryFormats(); } } return {}; } -MKLDNNOutputMemoryFormats::~MKLDNNOutputMemoryFormats() = default; +OutputMemoryFormats::~OutputMemoryFormats() = default; -std::string ngraph::getMKLDNNOutputMemoryFormats(const std::shared_ptr& node) { - auto it_info = node->get_rt_info().find(MKLDNNOutputMemoryFormats::get_type_info_static()); +std::string getOutputMemoryFormats(const std::shared_ptr& node) { + auto it_info = node->get_rt_info().find(OutputMemoryFormats::get_type_info_static()); if (it_info != node->get_rt_info().end()) { - if (it_info->second.is()) { - return it_info->second.as().getMemoryFormats(); + if (it_info->second.is()) { + return it_info->second.as().getMemoryFormats(); } } return {}; } - +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/utils/rt_info/memory_formats_attribute.hpp b/src/plugins/intel_cpu/src/utils/rt_info/memory_formats_attribute.hpp index 62948fffbbd..796a5e1f041 100644 --- a/src/plugins/intel_cpu/src/utils/rt_info/memory_formats_attribute.hpp +++ b/src/plugins/intel_cpu/src/utils/rt_info/memory_formats_attribute.hpp @@ -10,19 +10,20 @@ #include #include -namespace ngraph { +namespace ov { +namespace intel_cpu { -constexpr const char *MKLDNNInputMemoryFormatsAttr = "MKLDNNInputMemoryFormats"; -constexpr const char *MKLDNNOutputMemoryFormatsAttr = "MKLDNNOutputMemoryFormats"; +constexpr const char *InputMemoryFormatsAttr = "InputMemoryFormats"; +constexpr const char *OutputMemoryFormatsAttr = "OutputMemoryFormats"; template -class MKLDNNMemoryFormats : public ov::RuntimeAttribute { +class MemoryFormats : public ov::RuntimeAttribute { protected: std::string memory_format; public: - MKLDNNMemoryFormats() = default; - explicit MKLDNNMemoryFormats(const std::string &_memory_format) : memory_format(_memory_format) {} + MemoryFormats() = default; + explicit MemoryFormats(const std::string &_memory_format) : memory_format(_memory_format) {} std::string getMemoryFormats() const { return memory_format; } ov::Any merge(const ngraph::NodeVector & nodes) const override { @@ -53,23 +54,25 @@ public: }; -class MKLDNNInputMemoryFormats : public MKLDNNMemoryFormats { +class InputMemoryFormats : public MemoryFormats { public: - OPENVINO_RTTI(MKLDNNInputMemoryFormatsAttr); - MKLDNNInputMemoryFormats() = default; - explicit MKLDNNInputMemoryFormats(const std::string &_memory_format) : MKLDNNMemoryFormats(_memory_format) {} - ~MKLDNNInputMemoryFormats() override; + OPENVINO_RTTI(InputMemoryFormatsAttr); + InputMemoryFormats() = default; + explicit InputMemoryFormats(const std::string &_memory_format) : MemoryFormats(_memory_format) {} + ~InputMemoryFormats() override; }; -std::string getMKLDNNInputMemoryFormats(const std::shared_ptr& node); +std::string getInputMemoryFormats(const std::shared_ptr& node); -class MKLDNNOutputMemoryFormats : public MKLDNNMemoryFormats { +class OutputMemoryFormats : public MemoryFormats { public: - OPENVINO_RTTI(MKLDNNOutputMemoryFormatsAttr); - MKLDNNOutputMemoryFormats() = default; - explicit MKLDNNOutputMemoryFormats(const std::string &_memory_format) : MKLDNNMemoryFormats(_memory_format) {} - ~MKLDNNOutputMemoryFormats() override; + OPENVINO_RTTI(OutputMemoryFormatsAttr); + OutputMemoryFormats() = default; + explicit OutputMemoryFormats(const std::string &_memory_format) : MemoryFormats(_memory_format) {} + ~OutputMemoryFormats() override; }; -std::string getMKLDNNOutputMemoryFormats(const std::shared_ptr& node); -} // namespace ngraph +std::string getOutputMemoryFormats(const std::shared_ptr& node); + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference.cpp b/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference.cpp index 11378e53be2..4b15fc58382 100644 --- a/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference.cpp +++ b/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference.cpp @@ -81,9 +81,12 @@ #include "variadic_split_shape_inference.hpp" #include "matmul_shape_inference.hpp" +namespace ov { +namespace intel_cpu { + void shape_inference(ov::Node* op, - const std::vector& input_shapes, - std::vector& output_shapes, + const std::vector& input_shapes, + std::vector& output_shapes, const std::map>& constant_data) { auto shapeInfer = make_shape_inference(op->shared_from_this()); output_shapes = shapeInfer->infer(input_shapes, constant_data); @@ -124,11 +127,11 @@ class entryIO : public entryBase { public: using entryBase::entryBase; - std::vector infer( - const std::vector& input_shapes, + std::vector infer( + const std::vector& input_shapes, const std::map>& constant_data) override { auto op = static_cast(node.get()); - std::vector output_shapes(op->get_output_size()); + std::vector output_shapes(op->get_output_size()); shape_infer(op, input_shapes, output_shapes); return output_shapes; } @@ -139,11 +142,11 @@ class entryIOC : public entryBase { public: using entryBase::entryBase; - std::vector infer( - const std::vector& input_shapes, + std::vector infer( + const std::vector& input_shapes, const std::map>& constant_data) override { auto op = static_cast(node.get()); - std::vector output_shapes(op->get_output_size()); + std::vector output_shapes(op->get_output_size()); shape_infer(op, input_shapes, output_shapes, constant_data); return output_shapes; } @@ -153,11 +156,11 @@ class entryCopy : public entryBase { public: using entryBase::entryBase; - std::vector infer( - const std::vector& input_shapes, + std::vector infer( + const std::vector& input_shapes, const std::map>& constant_data) override { auto op = node.get(); - std::vector output_shapes(op->get_output_size()); + std::vector output_shapes(op->get_output_size()); copy_shape_infer(op, input_shapes, output_shapes); return output_shapes; } @@ -167,11 +170,11 @@ class entryFirstPassthrough : public entryBase { public: using entryBase::entryBase; - std::vector infer( - const std::vector& input_shapes, + std::vector infer( + const std::vector& input_shapes, const std::map>& constant_data) override { auto op = node.get(); - std::vector output_shapes(op->get_output_size()); + std::vector output_shapes(op->get_output_size()); first_input_passthrough_infer(op, input_shapes, output_shapes); return output_shapes; } @@ -181,11 +184,11 @@ class entryEltwise : public entryBase { public: using entryBase::entryBase; - std::vector infer( - const std::vector& input_shapes, + std::vector infer( + const std::vector& input_shapes, const std::map>& constant_data) override { auto op = node.get(); - std::vector output_shapes(op->get_output_size()); + std::vector output_shapes(op->get_output_size()); eltwise_shape_infer(op, input_shapes, output_shapes); return output_shapes; } @@ -212,11 +215,11 @@ public: virtual void post_validate_and_infer_types(const std::shared_ptr& local_op) {} - std::vector infer( - const std::vector& input_shapes, + std::vector infer( + const std::vector& input_shapes, const std::map>& constant_data) override { auto op = node.get(); - std::vector output_shapes; + std::vector output_shapes; std::shared_ptr local_op; if (!constant_data.empty()) { @@ -265,7 +268,7 @@ public: OPENVINO_ASSERT(false, errorMessage.str()); } - output_shapes[i] = ov::StaticShape(partial_shape.to_shape()); + output_shapes[i] = StaticShape(partial_shape.to_shape()); } post_validate_and_infer_types(local_op); @@ -313,12 +316,12 @@ class entryInterpolate : public entryBase { public: using entryBase::entryBase; - std::vector infer( - const std::vector& input_shapes, + std::vector infer( + const std::vector& input_shapes, const std::map>& constant_data) override { std::vector pads_begin, pads_end; auto op = static_cast(node.get()); - std::vector output_shapes(op->get_output_size()); + std::vector output_shapes(op->get_output_size()); correct_pads_attr(op, pads_begin, pads_end, input_shapes); shape_infer(op, pads_begin, pads_end, input_shapes, output_shapes, constant_data); return output_shapes; @@ -335,11 +338,11 @@ public: const ov::CoordinateDiff& get_pads_end() override { return pads_end; } - std::vector infer( - const std::vector& input_shapes, + std::vector infer( + const std::vector& input_shapes, const std::map>& constant_data) override { auto op = static_cast(node.get()); - std::vector output_shapes(op->get_output_size()); + std::vector output_shapes(op->get_output_size()); bool status = resolve_auto_pad_for_shape(op, pads_begin, pads_end, input_shapes, 2, is_grouped ? 3 : 2); OPENVINO_ASSERT(status, "Convolution shape inference doesn't have enough information to calculate static shapes"); @@ -362,14 +365,14 @@ public: const ov::CoordinateDiff& get_pads_end() override { return pads_end; } - std::vector infer( - const std::vector& input_shapes, + std::vector infer( + const std::vector& input_shapes, const std::map>& constant_data) override { - ov::StaticShape output_shape_input; + StaticShape output_shape_input; auto op = static_cast(node.get()); - std::vector output_shapes(op->get_output_size()); + std::vector output_shapes(op->get_output_size()); if (op->get_input_size() == 3) - get_data_as_shape(2, op, output_shape_input, constant_data); + get_data_as_shape(2, op, output_shape_input, constant_data); bool status = resolve_auto_pad_for_shape_back_prop(op, pads_begin, pads_end, @@ -561,3 +564,6 @@ std::shared_ptr make_shape_inference(const std::shared_ptr(op); } } + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference.hpp b/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference.hpp index ee41c707061..3940c901083 100644 --- a/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference.hpp +++ b/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference.hpp @@ -10,15 +10,18 @@ #include "static_shape.hpp" +namespace ov { +namespace intel_cpu { + void shape_inference(ov::Node* op, - const std::vector& input_shapes, - std::vector& output_shapes, + const std::vector& input_shapes, + std::vector& output_shapes, const std::map>& constant_data = {}); class IShapeInfer { public: - virtual std::vector infer( - const std::vector& input_shapes, + virtual std::vector infer( + const std::vector& input_shapes, const std::map>& constant_data) = 0; // infer may generate padding as by-product, these APIs is designed to retrieve them back @@ -29,3 +32,6 @@ public: }; std::shared_ptr make_shape_inference(const std::shared_ptr& op); + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/utils/shape_inference/static_dimension.cpp b/src/plugins/intel_cpu/src/utils/shape_inference/static_dimension.cpp index 86a74b0915b..7b5bfa8bfd6 100644 --- a/src/plugins/intel_cpu/src/utils/shape_inference/static_dimension.cpp +++ b/src/plugins/intel_cpu/src/utils/shape_inference/static_dimension.cpp @@ -4,9 +4,10 @@ #include "static_dimension.hpp" -using namespace ov; +namespace ov { +namespace intel_cpu { -std::ostream& ov::operator<<(std::ostream& str, const StaticDimension& dimension) { +std::ostream& operator<<(std::ostream& str, const StaticDimension& dimension) { return str << dimension.get_length(); } @@ -102,3 +103,6 @@ StaticDimension::value_type StaticDimension::get_max_length() const { StaticDimension::value_type StaticDimension::get_min_length() const { return m_dimension; } + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/utils/shape_inference/static_dimension.hpp b/src/plugins/intel_cpu/src/utils/shape_inference/static_dimension.hpp index 52496bfa9d6..13b65bc7598 100644 --- a/src/plugins/intel_cpu/src/utils/shape_inference/static_dimension.hpp +++ b/src/plugins/intel_cpu/src/utils/shape_inference/static_dimension.hpp @@ -13,6 +13,8 @@ #include "openvino/core/except.hpp" namespace ov { +namespace intel_cpu { + /// \brief Class representing a dimension, which must be static, /// in a shape or shape-like object. /// @@ -74,4 +76,6 @@ private: }; std::ostream& operator<<(std::ostream& str, const StaticDimension& dimension); -} // namespace ov + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/utils/shape_inference/static_shape.cpp b/src/plugins/intel_cpu/src/utils/shape_inference/static_shape.cpp index 9f0ce98541f..a592b86ebe9 100644 --- a/src/plugins/intel_cpu/src/utils/shape_inference/static_shape.cpp +++ b/src/plugins/intel_cpu/src/utils/shape_inference/static_shape.cpp @@ -3,31 +3,33 @@ // #include "static_shape.hpp" -using namespace ov; -ov::StaticShape::StaticShape(std::vector dimensions) +namespace ov { +namespace intel_cpu { + +StaticShape::StaticShape(std::vector dimensions) : std::vector(std::move(dimensions)) {} -ov::StaticShape::StaticShape(const std::vector& dimensions) +StaticShape::StaticShape(const std::vector& dimensions) : std::vector(dimensions.begin(), dimensions.end()) {} -ov::StaticShape::StaticShape(std::initializer_list init) +StaticShape::StaticShape(std::initializer_list init) : std::vector(init.begin(), init.end()) {} -ov::Shape ov::StaticShape::get_max_shape() const { +ov::Shape StaticShape::get_max_shape() const { return (*this).to_shape(); } -ov::Shape ov::StaticShape::get_min_shape() const { +ov::Shape StaticShape::get_min_shape() const { return (*this).to_shape(); } -ov::Shape ov::StaticShape::get_shape() const { +ov::Shape StaticShape::get_shape() const { return (*this).to_shape(); } -ov::StaticShape ov::operator+(const StaticShape& s1, const StaticShape& s2) { +StaticShape operator+(const StaticShape& s1, const StaticShape& s2) { if (s1.size() != s2.size()) { throw std::invalid_argument("rank mismatch"); } @@ -38,7 +40,7 @@ ov::StaticShape ov::operator+(const StaticShape& s1, const StaticShape& s2) { return result; } -std::ostream& ov::operator<<(std::ostream& str, const StaticShape& shape) { +std::ostream& operator<<(std::ostream& str, const StaticShape& shape) { str << "{"; bool first = true; for (const auto& d : shape) { @@ -49,7 +51,7 @@ std::ostream& ov::operator<<(std::ostream& str, const StaticShape& shape) { return (str << "}"); } -bool ov::StaticShape::compatible(const StaticShape& s) const { +bool StaticShape::compatible(const StaticShape& s) const { if (size() != s.size()) return false; for (size_t i = 0; i < size(); ++i) @@ -58,7 +60,7 @@ bool ov::StaticShape::compatible(const StaticShape& s) const { return true; } -bool ov::StaticShape::same_scheme(const StaticShape& s) const { +bool StaticShape::same_scheme(const StaticShape& s) const { if (size() != s.size()) return false; for (size_t i = 0; i < size(); ++i) @@ -67,7 +69,7 @@ bool ov::StaticShape::same_scheme(const StaticShape& s) const { return true; } -bool ov::StaticShape::merge_rank(Rank r) { +bool StaticShape::merge_rank(Rank r) { if (r.is_dynamic()) { return true; } else { @@ -75,7 +77,7 @@ bool ov::StaticShape::merge_rank(Rank r) { } } -ov::Shape ov::StaticShape::to_shape() const { +ov::Shape StaticShape::to_shape() const { std::vector shape_dimensions(size()); std::transform(begin(), end(), shape_dimensions.begin(), [](const StaticDimension& d) { return d.get_length(); @@ -83,7 +85,7 @@ ov::Shape ov::StaticShape::to_shape() const { return shape_dimensions; } -ov::PartialShape ov::StaticShape::to_partial_shape() const { +ov::PartialShape StaticShape::to_partial_shape() const { ov::PartialShape shape_dimensions = PartialShape::dynamic(size()); std::transform(begin(), end(), shape_dimensions.begin(), [](const StaticDimension& d) { return d.get_length(); @@ -91,7 +93,7 @@ ov::PartialShape ov::StaticShape::to_partial_shape() const { return shape_dimensions; } -bool ov::StaticShape::merge_into(StaticShape& dst, const StaticShape& src) { +bool StaticShape::merge_into(StaticShape& dst, const StaticShape& src) { if (dst.size() != src.size()) return false; bool success = true; @@ -100,9 +102,9 @@ bool ov::StaticShape::merge_into(StaticShape& dst, const StaticShape& src) { return success; } -bool ov::StaticShape::broadcast_merge_into(StaticShape& dst, - const StaticShape& src, - const ngraph::op::AutoBroadcastSpec& autob) { +bool StaticShape::broadcast_merge_into(StaticShape& dst, + const StaticShape& src, + const ngraph::op::AutoBroadcastSpec& autob) { switch (autob.m_type) { case ngraph::op::AutoBroadcastType::NONE: return true; @@ -158,7 +160,7 @@ bool ov::StaticShape::broadcast_merge_into(StaticShape& dst, return false; } -bool ov::StaticShape::operator==(const StaticShape& shape) const { +bool StaticShape::operator==(const StaticShape& shape) const { if (size() != shape.size()) return false; for (auto i = 0; i < size(); ++i) @@ -167,6 +169,9 @@ bool ov::StaticShape::operator==(const StaticShape& shape) const { return true; } -bool ov::StaticShape::operator!=(const StaticShape& partial_shape) const { +bool StaticShape::operator!=(const StaticShape& partial_shape) const { return !(*this == partial_shape); -} \ No newline at end of file +} + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/utils/shape_inference/static_shape.hpp b/src/plugins/intel_cpu/src/utils/shape_inference/static_shape.hpp index 2726a7c1702..b4efbcdfdc4 100644 --- a/src/plugins/intel_cpu/src/utils/shape_inference/static_shape.hpp +++ b/src/plugins/intel_cpu/src/utils/shape_inference/static_shape.hpp @@ -17,7 +17,9 @@ namespace ov { namespace op { struct AutoBroadcastSpec; -} +} // namespace op + +namespace intel_cpu { /// \brief Class representing a shape that must be totally static. class StaticShape : public std::vector { @@ -41,7 +43,7 @@ public: bool refines(const StaticShape& s) const; bool merge_rank(Rank r); - Shape to_shape() const; + ov::Shape to_shape() const; PartialShape to_partial_shape() const; friend std::ostream& operator<<(std::ostream& str, const StaticShape& shape); @@ -62,4 +64,6 @@ public: StaticShape operator+(const StaticShape& s1, const StaticShape& s2); std::ostream& operator<<(std::ostream& str, const StaticShape& shape); -} // namespace ov + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/utils/verbose.cpp b/src/plugins/intel_cpu/src/utils/verbose.cpp index 5d06c232d66..df2e515c5ba 100644 --- a/src/plugins/intel_cpu/src/utils/verbose.cpp +++ b/src/plugins/intel_cpu/src/utils/verbose.cpp @@ -25,7 +25,7 @@ bool Verbose::shouldBePrinted() const { return false; if (node->isConstant() || - node->getType() == Input || node->getType() == Output) + node->getType() == Type::Input || node->getType() == Type::Output) return false; return true; } @@ -144,7 +144,7 @@ void Verbose::printInfo() { std::string nodeImplementer = "cpu"; if (node->prim) nodeImplementer = "dnnl"; // oneDNN - else if (node->getType() == Reference) + else if (node->getType() == Type::Reference) nodeImplementer = "ngraph_ref"; // ngraph reference const std::string& nodeName = colorize(GREEN, node->getName()); diff --git a/src/plugins/intel_cpu/src/utils/verbose.h b/src/plugins/intel_cpu/src/utils/verbose.h index bb2ac904da6..a3a159ec20d 100644 --- a/src/plugins/intel_cpu/src/utils/verbose.h +++ b/src/plugins/intel_cpu/src/utils/verbose.h @@ -16,7 +16,7 @@ namespace intel_cpu { class Verbose { public: - Verbose(const MKLDNNNodePtr& _node, const std::string& _lvl) + Verbose(const NodePtr& _node, const std::string& _lvl) : node(_node), lvl(atoi(_lvl.c_str())) { if (!shouldBePrinted()) return; @@ -32,7 +32,7 @@ public: } private: - const MKLDNNNodePtr& node; + const NodePtr& node; const int lvl; std::stringstream stream; diff --git a/src/plugins/intel_cpu/src/weights_cache.cpp b/src/plugins/intel_cpu/src/weights_cache.cpp index 14453a26d33..1779347a0e5 100644 --- a/src/plugins/intel_cpu/src/weights_cache.cpp +++ b/src/plugins/intel_cpu/src/weights_cache.cpp @@ -10,35 +10,35 @@ namespace ov { namespace intel_cpu { -const SimpleDataHash MKLDNNWeightsSharing::simpleCRC; +const SimpleDataHash WeightsSharing::simpleCRC; -MKLDNNWeightsSharing::MKLDNNSharedMemory::MKLDNNSharedMemory( +WeightsSharing::SharedMemory::SharedMemory( std::unique_lock && lock, - const MKLDNNMemoryInfo::Ptr & memory, - MKLDNNMemoryPtr newPtr) + const MemoryInfo::Ptr & memory, + MemoryPtr newPtr) : lock(std::move(lock)) , memory(memory) , newPtr(newPtr) {} -MKLDNNWeightsSharing::MKLDNNSharedMemory::operator MKLDNNMemoryPtr() const { +WeightsSharing::SharedMemory::operator MemoryPtr() const { return memory->sharedMemory.lock(); } -bool MKLDNNWeightsSharing::MKLDNNSharedMemory::isValid() const { +bool WeightsSharing::SharedMemory::isValid() const { return memory->valid.load(std::memory_order_acquire); } -void MKLDNNWeightsSharing::MKLDNNSharedMemory::valid(bool b) { +void WeightsSharing::SharedMemory::valid(bool b) { memory->valid.store(b, std::memory_order_release); } -MKLDNNWeightsSharing::MKLDNNSharedMemory::Ptr MKLDNNWeightsSharing::findOrCreate( +WeightsSharing::SharedMemory::Ptr WeightsSharing::findOrCreate( const std::string& key, - std::function create, + std::function create, bool valid) { - MKLDNNMemoryInfo::Ptr ptr; - MKLDNNMemoryPtr newPtr; + MemoryInfo::Ptr ptr; + MemoryPtr newPtr; { std::unique_lock lock(guard); auto found = sharedWeights.find(key); @@ -46,18 +46,18 @@ MKLDNNWeightsSharing::MKLDNNSharedMemory::Ptr MKLDNNWeightsSharing::findOrCreate if (found == sharedWeights.end() || !((ptr = found->second) && (newPtr = ptr->sharedMemory.lock()))) { newPtr = create(); - ptr = std::make_shared(newPtr, valid); + ptr = std::make_shared(newPtr, valid); sharedWeights[key] = ptr; } } - return std::make_shared(ptr->valid.load(std::memory_order_relaxed) + return std::make_shared(ptr->valid.load(std::memory_order_relaxed) ? std::unique_lock(ptr->guard, std::defer_lock) : std::unique_lock(ptr->guard), ptr, newPtr); } -MKLDNNWeightsSharing::MKLDNNSharedMemory::Ptr MKLDNNWeightsSharing::get(const std::string& key) const { - MKLDNNMemoryInfo::Ptr ptr; - MKLDNNMemoryPtr newPtr; +WeightsSharing::SharedMemory::Ptr WeightsSharing::get(const std::string& key) const { + MemoryInfo::Ptr ptr; + MemoryPtr newPtr; { std::unique_lock lock(guard); auto found = sharedWeights.find(key); @@ -66,24 +66,24 @@ MKLDNNWeightsSharing::MKLDNNSharedMemory::Ptr MKLDNNWeightsSharing::get(const st || !((ptr = found->second) && (newPtr = ptr->sharedMemory.lock()))) IE_THROW() << "Unknown shared memory with key " << key; } - return std::make_shared(ptr->valid.load(std::memory_order_relaxed) + return std::make_shared(ptr->valid.load(std::memory_order_relaxed) ? std::unique_lock(ptr->guard, std::defer_lock) : std::unique_lock(ptr->guard), ptr, newPtr); } NumaNodesWeights::NumaNodesWeights() { for (auto numa_id : InferenceEngine::getAvailableNUMANodes()) - _cache_map[numa_id] = std::make_shared(); + _cache_map[numa_id] = std::make_shared(); } -MKLDNNWeightsSharing::Ptr& NumaNodesWeights::operator[](int numa_id) { +WeightsSharing::Ptr& NumaNodesWeights::operator[](int numa_id) { auto found = _cache_map.find(numa_id); if (found == _cache_map.end()) IE_THROW() << "Unknown numa node id " << numa_id; return found->second; } -const MKLDNNWeightsSharing::Ptr& NumaNodesWeights::operator[](int numa_id) const { +const WeightsSharing::Ptr& NumaNodesWeights::operator[](int numa_id) const { auto found = _cache_map.find(numa_id); if (found == _cache_map.end()) IE_THROW() << "Unknown numa node id " << numa_id; diff --git a/src/plugins/intel_cpu/src/weights_cache.hpp b/src/plugins/intel_cpu/src/weights_cache.hpp index 6cacd1b6e08..1615234dde9 100644 --- a/src/plugins/intel_cpu/src/weights_cache.hpp +++ b/src/plugins/intel_cpu/src/weights_cache.hpp @@ -17,7 +17,7 @@ // TODO: While CPU plugin has no ease way to clone graph object we use weight // caching in global Engine context to avoid tensor memory duplication. // For same cases it may be switched of (like for single stream execution) -// When MKLDNNGraph clone function will be ready you may removed this +// When Graph clone function will be ready you may removed this // classes at all. namespace ov { @@ -48,57 +48,57 @@ protected: }; /** - * Caching store of MKLDNNMemory objects + * Caching store of Memory objects * Will return a cached object or create new one * * Is a thread safe */ -class MKLDNNWeightsSharing { - struct MKLDNNMemoryInfo { - typedef std::shared_ptr Ptr; +class WeightsSharing { + struct MemoryInfo { + typedef std::shared_ptr Ptr; - MKLDNNMemoryInfo(MKLDNNMemoryPtr memoryPtr, bool valid) + MemoryInfo(MemoryPtr memoryPtr, bool valid) : sharedMemory(memoryPtr) , valid(valid) {} std::mutex guard; - std::weak_ptr sharedMemory; + std::weak_ptr sharedMemory; std::atomic valid; }; public: - typedef std::shared_ptr Ptr; + typedef std::shared_ptr Ptr; - class MKLDNNSharedMemory { + class SharedMemory { public: - typedef std::shared_ptr Ptr; + typedef std::shared_ptr Ptr; - MKLDNNSharedMemory(std::unique_lock && lock, - const MKLDNNMemoryInfo::Ptr & memory, - MKLDNNMemoryPtr newPtr = nullptr); + SharedMemory(std::unique_lock && lock, + const MemoryInfo::Ptr & memory, + MemoryPtr newPtr = nullptr); - operator MKLDNNMemoryPtr() const; + operator MemoryPtr() const; bool isValid() const; void valid(bool b); private: std::unique_lock lock; - MKLDNNMemoryInfo::Ptr memory; - MKLDNNMemoryPtr newPtr; + MemoryInfo::Ptr memory; + MemoryPtr newPtr; }; - MKLDNNSharedMemory::Ptr findOrCreate(const std::string& key, - std::function create, - bool valid = true); + SharedMemory::Ptr findOrCreate(const std::string& key, + std::function create, + bool valid = true); - MKLDNNSharedMemory::Ptr get(const std::string& key) const; + SharedMemory::Ptr get(const std::string& key) const; static const SimpleDataHash& GetHashFunc () { return simpleCRC; } protected: mutable std::mutex guard; - std::unordered_map sharedWeights; + std::unordered_map sharedWeights; static const SimpleDataHash simpleCRC; }; @@ -111,11 +111,11 @@ class NumaNodesWeights { public: NumaNodesWeights(); - MKLDNNWeightsSharing::Ptr& operator[](int i); - const MKLDNNWeightsSharing::Ptr& operator[](int i) const; + WeightsSharing::Ptr& operator[](int i); + const WeightsSharing::Ptr& operator[](int i) const; private: - std::map _cache_map; + std::map _cache_map; }; } // namespace intel_cpu diff --git a/src/tests/functional/inference_engine/ngraph_reader/strided_slice_tests.cpp b/src/tests/functional/inference_engine/ngraph_reader/strided_slice_tests.cpp index 0d010ba6735..68efeab1a5a 100644 --- a/src/tests/functional/inference_engine/ngraph_reader/strided_slice_tests.cpp +++ b/src/tests/functional/inference_engine/ngraph_reader/strided_slice_tests.cpp @@ -4,7 +4,7 @@ #include #include "ngraph_reader_tests.hpp" -// MKLDNN: "Crop supports only 2d, 4d and 5d blobs." +// Crop supports only 2d, 4d and 5d blobs // This test should pass after deleting // "input_shape.size() != 2 && input_shape.size() != 4 && input_shape.size() != 5" condition in // strided_slice_to_crop transformation @@ -158,7 +158,7 @@ TEST_F(NGraphReaderTests, ConvertStridedSliceToCrop) { }); } -// MKLDNN: "Crop supports only 2d, 4d and 5d blobs." +// Crop supports only 2d, 4d and 5d blobs // This test should pass after deleting // "input_shape.size() != 2 && input_shape.size() != 4 && input_shape.size() != 5" condition in // strided_slice_to_crop transformation diff --git a/src/tests/functional/inference_engine/serialization/single_layer/mvn.cpp b/src/tests/functional/inference_engine/serialization/single_layer/mvn.cpp index 7c15c1b5f5d..619f216fc87 100644 --- a/src/tests/functional/inference_engine/serialization/single_layer/mvn.cpp +++ b/src/tests/functional/inference_engine/serialization/single_layer/mvn.cpp @@ -40,10 +40,10 @@ TEST_P(Mvn1LayerTest, Serialize) { Serialize(); } -INSTANTIATE_TEST_SUITE_P(smoke_MKLDNN_TestsMVN_across_channels, Mvn1LayerTest, MvnAcrossChannels, +INSTANTIATE_TEST_SUITE_P(smoke_INTEL_CPU_TestsMVN_across_channels, Mvn1LayerTest, MvnAcrossChannels, Mvn1LayerTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_MKLDNN_TestsMVN_reduction_axes, Mvn1LayerTest, MvnReductionAxes, +INSTANTIATE_TEST_SUITE_P(smoke_INTEL_CPU_TestsMVN_reduction_axes, Mvn1LayerTest, MvnReductionAxes, Mvn1LayerTest::getTestCaseName); // ------------------- MVN-6 ------------------------------------------------- diff --git a/src/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/cum_sum.cpp b/src/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/cum_sum.cpp index 2e0525bbb0e..a5ffe833104 100644 --- a/src/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/cum_sum.cpp +++ b/src/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/cum_sum.cpp @@ -105,11 +105,11 @@ const auto testCasesAxis_6 = ::testing::Combine( ::testing::Values(CommonTestUtils::DEVICE_CPU) ); -INSTANTIATE_TEST_SUITE_P(smoke_MKLDNN_TestsCumSum_negative_axis, CumSumLayerTest, testCasesNegativeAxis, CumSumLayerTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_MKLDNN_TestsCumSum_axis_0, CumSumLayerTest, testCasesAxis_0, CumSumLayerTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_MKLDNN_TestsCumSum_axis_1, CumSumLayerTest, testCasesAxis_1, CumSumLayerTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_MKLDNN_TestsCumSum_axis_2, CumSumLayerTest, testCasesAxis_2, CumSumLayerTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_MKLDNN_TestsCumSum_axis_3, CumSumLayerTest, testCasesAxis_3, CumSumLayerTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_MKLDNN_TestsCumSum_axis_4, CumSumLayerTest, testCasesAxis_4, CumSumLayerTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_MKLDNN_TestsCumSum_axis_5, CumSumLayerTest, testCasesAxis_5, CumSumLayerTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_MKLDNN_TestsCumSum_axis_6, CumSumLayerTest, testCasesAxis_6, CumSumLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_INTEL_CPU_TestsCumSum_negative_axis, CumSumLayerTest, testCasesNegativeAxis, CumSumLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_INTEL_CPU_TestsCumSum_axis_0, CumSumLayerTest, testCasesAxis_0, CumSumLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_INTEL_CPU_TestsCumSum_axis_1, CumSumLayerTest, testCasesAxis_1, CumSumLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_INTEL_CPU_TestsCumSum_axis_2, CumSumLayerTest, testCasesAxis_2, CumSumLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_INTEL_CPU_TestsCumSum_axis_3, CumSumLayerTest, testCasesAxis_3, CumSumLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_INTEL_CPU_TestsCumSum_axis_4, CumSumLayerTest, testCasesAxis_4, CumSumLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_INTEL_CPU_TestsCumSum_axis_5, CumSumLayerTest, testCasesAxis_5, CumSumLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_INTEL_CPU_TestsCumSum_axis_6, CumSumLayerTest, testCasesAxis_6, CumSumLayerTest::getTestCaseName); diff --git a/src/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/dft.cpp b/src/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/dft.cpp index e879e773b77..67da318fdb7 100644 --- a/src/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/dft.cpp +++ b/src/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/dft.cpp @@ -101,7 +101,7 @@ const auto testCase4D = ::testing::Combine( ); -INSTANTIATE_TEST_SUITE_P(smoke_MKLDNN_TestsDFT_1d, DFTLayerTest, testCase1D, DFTLayerTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_MKLDNN_TestsDFT_2d, DFTLayerTest, testCase2D, DFTLayerTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_MKLDNN_TestsDFT_3d, DFTLayerTest, testCase3D, DFTLayerTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_MKLDNN_TestsDFT_4d, DFTLayerTest, testCase4D, DFTLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_INTEL_CPU_TestsDFT_1d, DFTLayerTest, testCase1D, DFTLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_INTEL_CPU_TestsDFT_2d, DFTLayerTest, testCase2D, DFTLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_INTEL_CPU_TestsDFT_3d, DFTLayerTest, testCase3D, DFTLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_INTEL_CPU_TestsDFT_4d, DFTLayerTest, testCase4D, DFTLayerTest::getTestCaseName); diff --git a/src/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/mvn.cpp b/src/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/mvn.cpp index 92b67665939..73e89200223 100644 --- a/src/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/mvn.cpp +++ b/src/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/mvn.cpp @@ -69,9 +69,9 @@ const auto MvnReductionAxes = ::testing::Combine( ::testing::Values(CommonTestUtils::DEVICE_CPU) ); -INSTANTIATE_TEST_SUITE_P(smoke_MKLDNN_TestsMVN_AcrossChannels, Mvn1LayerTest, MvnAcrossChannels, Mvn1LayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_INTEL_CPU_TestsMVN_AcrossChannels, Mvn1LayerTest, MvnAcrossChannels, Mvn1LayerTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_MKLDNN_TestsMVN_ReductionAxes, Mvn1LayerTest, MvnReductionAxes, Mvn1LayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_INTEL_CPU_TestsMVN_ReductionAxes, Mvn1LayerTest, MvnReductionAxes, Mvn1LayerTest::getTestCaseName); std::vector idxPrecisions = { diff --git a/src/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/roll.cpp b/src/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/roll.cpp index ffd02632c63..c97d71b25ab 100644 --- a/src/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/roll.cpp +++ b/src/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/roll.cpp @@ -84,13 +84,21 @@ const auto testCaseUnordNegAxesAndShifts10D = ::testing::Combine( ::testing::Values(CommonTestUtils::DEVICE_CPU) ); -INSTANTIATE_TEST_SUITE_P(smoke_MKLDNN_TestsRoll_2d_zero_shifts, RollLayerTest, testCase2DZeroShifts, RollLayerTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_MKLDNN_TestsRoll_1d, RollLayerTest, testCase1D, RollLayerTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_MKLDNN_TestsRoll_2d, RollLayerTest, testCase2D, RollLayerTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_MKLDNN_TestsRoll_3d, RollLayerTest, testCase3D, RollLayerTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_MKLDNN_TestsRoll_negative_unordered_axes_4d, RollLayerTest, testCaseNegativeUnorderedAxes4D, RollLayerTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_MKLDNN_TestsRoll_negative_unordered_axes_5d, RollLayerTest, testCaseRepeatingAxes5D, RollLayerTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_MKLDNN_TestsRoll_negative_shifts_6d, RollLayerTest, testCaseNegativeShifts6D, RollLayerTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_MKLDNN_TestsRoll_unord_neg_shifts_and_axes_10d, RollLayerTest, testCaseUnordNegAxesAndShifts10D, RollLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_INTEL_CPU_TestsRoll_2d_zero_shifts, RollLayerTest, + testCase2DZeroShifts, RollLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_INTEL_CPU_TestsRoll_1d, RollLayerTest, + testCase1D, RollLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_INTEL_CPU_TestsRoll_2d, RollLayerTest, + testCase2D, RollLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_INTEL_CPU_TestsRoll_3d, RollLayerTest, + testCase3D, RollLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_INTEL_CPU_TestsRoll_negative_unordered_axes_4d, RollLayerTest, + testCaseNegativeUnorderedAxes4D, RollLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_INTEL_CPU_TestsRoll_negative_unordered_axes_5d, RollLayerTest, + testCaseRepeatingAxes5D, RollLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_INTEL_CPU_TestsRoll_negative_shifts_6d, RollLayerTest, + testCaseNegativeShifts6D, RollLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_INTEL_CPU_TestsRoll_unord_neg_shifts_and_axes_10d, RollLayerTest, + testCaseUnordNegAxesAndShifts10D, RollLayerTest::getTestCaseName); } // namespace diff --git a/src/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/select.cpp b/src/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/select.cpp index 36c6adcc035..717f86f54f1 100644 --- a/src/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/select.cpp +++ b/src/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/select.cpp @@ -81,6 +81,6 @@ const auto numpyCases = ::testing::Combine( ::testing::Values(CommonTestUtils::DEVICE_CPU) ); -INSTANTIATE_TEST_SUITE_P(smoke_MKLDNN_TestsSelect_none, SelectLayerTest, noneCases, SelectLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_INTEL_CPU_TestsSelect_none, SelectLayerTest, noneCases, SelectLayerTest::getTestCaseName); -INSTANTIATE_TEST_SUITE_P(smoke_MKLDNN_TestsSelect_numpy, SelectLayerTest, numpyCases, SelectLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_INTEL_CPU_TestsSelect_numpy, SelectLayerTest, numpyCases, SelectLayerTest::getTestCaseName); diff --git a/src/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/strided_slice.cpp b/src/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/strided_slice.cpp index 13e81cf060c..d59f984f7c7 100644 --- a/src/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/strided_slice.cpp +++ b/src/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/strided_slice.cpp @@ -119,7 +119,7 @@ std::vector ss_only_test_cases = { }; INSTANTIATE_TEST_SUITE_P( - smoke_MKLDNN, StridedSliceLayerTest, + smoke_INTEL_CPU, StridedSliceLayerTest, ::testing::Combine( ::testing::ValuesIn(ss_only_test_cases), ::testing::Values(InferenceEngine::Precision::FP32), diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/gru_sequence.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/gru_sequence.cpp index 34291d4306d..28a25cc0087 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/gru_sequence.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/gru_sequence.cpp @@ -92,7 +92,7 @@ protected: if (inputDynamicShapes.size() == 2 && inputDynamicShapes[0][0].is_dynamic() && inputDynamicShapes[1][0].is_dynamic()) throw std::runtime_error("Invalid test case. If 3rd input is constant, batch dimension must be static."); - // Method MKLDNNMemoryDesc::isSame can't correct compute layout for tensor with strides = 1 + // Method MemoryDesc::isSame can't correct compute layout for tensor with strides = 1 // returned output format always tnc if (inFmts.size() == 2 && (inputDynamicShapes[0][0].is_static() && inputDynamicShapes[0][0].get_length() == 1 || inputDynamicShapes[1].is_static() && ov::shape_size(inputDynamicShapes[1].to_shape()) == 1)) { @@ -140,7 +140,7 @@ protected: direction, seqMode); - // method MKLDNNMemoryDesc::isSame can't correct compute layout for tensor with strides = 1 + // method MemoryDesc::isSame can't correct compute layout for tensor with strides = 1 // returned output format always tnc if (gruSequenceOp->get_output_partial_shape(0).is_static() && ov::shape_size(gruSequenceOp->get_output_shape(0)) == 1) { outFmts[0] = tnc; diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/lstm_sequence.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/lstm_sequence.cpp index cc3c3db39c6..4fd95d4fb27 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/lstm_sequence.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/lstm_sequence.cpp @@ -93,7 +93,7 @@ protected: const size_t hiddenSize = targetStaticShapes.front()[1][2]; const size_t numDirections = direction == ov::op::RecurrentSequenceDirection::BIDIRECTIONAL ? 2 : 1; - // method MKLDNNMemoryDesc::isSame can't correct compute layout for tensor with strides = 1 + // method MemoryDesc::isSame can't correct compute layout for tensor with strides = 1 // returned output format always tnc if (inFmts.size() >= 3) { for (size_t i = 1; i < 3; i++) { @@ -140,7 +140,7 @@ protected: direction, seqMode); - // method MKLDNNMemoryDesc::isSame can't correct compute layout for tensor with strides = 1 + // method MemoryDesc::isSame can't correct compute layout for tensor with strides = 1 // returned output format always tnc if (outFmts.size() >= 3) { for (size_t i = 1; i < 3; i++) { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/matmul.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/matmul.cpp index 49f6a649d1f..c076ec9af74 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/matmul.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/matmul.cpp @@ -142,7 +142,7 @@ protected: const auto& inShapeA = inputDynamicShapes[0]; const auto& inShapeB = inputDynamicShapes[1]; - // see comment in MKLDNNMatMulNode::canFuse + // see comment in MatMul::canFuse if (!(nodeType == MatMulNodeType::MatMul && std::get<0>(fusingParams) && std::get<0>(fusingParams)->getFusedOpsNames().find("(PerChannel)") != std::string::npos && std::max(inShapeA.size(), inShapeB.size()) > 2)) diff --git a/src/tests/functional/plugin/cpu/subgraph_tests/src/add_convert_to_reorder.cpp b/src/tests/functional/plugin/cpu/subgraph_tests/src/add_convert_to_reorder.cpp index f2a0ab6611a..ad190d1f48b 100644 --- a/src/tests/functional/plugin/cpu/subgraph_tests/src/add_convert_to_reorder.cpp +++ b/src/tests/functional/plugin/cpu/subgraph_tests/src/add_convert_to_reorder.cpp @@ -57,7 +57,7 @@ namespace { Parameter[FP32] Constant[I8] \ / \ / - \ Reorder[I32] (Is inserted by the MKLDNNGraph) + \ Reorder[I32] (Is inserted by the Graph) \ / Gather[FP32] | diff --git a/src/tests/functional/plugin/cpu/subgraph_tests/src/gather_add_avgpool.cpp b/src/tests/functional/plugin/cpu/subgraph_tests/src/gather_add_avgpool.cpp index b5801787ec9..13aa7b47d7b 100644 --- a/src/tests/functional/plugin/cpu/subgraph_tests/src/gather_add_avgpool.cpp +++ b/src/tests/functional/plugin/cpu/subgraph_tests/src/gather_add_avgpool.cpp @@ -21,7 +21,7 @@ using namespace ngraph; the first one (nhwc) is picked instead of nchw. Subgraph's layout also affects AvgPool layout and for this node also nhwc is picked instead of more preferable nChw8c or nChw16c. - To address the issue, there is a WA in MKLDNNGraph::Replicate - we skip propagating + To address the issue, there is a WA in intel_cpu::Graph::Replicate - we skip propagating input's precision if its child has Subgraph consumers. Same scenario happens when we have Eltwise instead of Subgraph - to be addressed in #78939. */ diff --git a/src/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp b/src/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp index ce4a4edd8cc..2eaa43c05fb 100644 --- a/src/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp +++ b/src/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp @@ -267,12 +267,12 @@ CPUTestsBase::makeCPUInfo(const std::vector& inFmts, CPUInfo cpuInfo; if (!inFmts.empty()) { - cpuInfo.insert({ngraph::MKLDNNInputMemoryFormats::get_type_info_static(), - ngraph::MKLDNNInputMemoryFormats(fmts2str(inFmts, "cpu:"))}); + cpuInfo.insert({ov::intel_cpu::InputMemoryFormats::get_type_info_static(), + ov::intel_cpu::InputMemoryFormats(fmts2str(inFmts, "cpu:"))}); } if (!outFmts.empty()) { - cpuInfo.insert({ngraph::MKLDNNOutputMemoryFormats::get_type_info_static(), - ngraph::MKLDNNOutputMemoryFormats(fmts2str(outFmts, "cpu:"))}); + cpuInfo.insert({ov::intel_cpu::OutputMemoryFormats::get_type_info_static(), + ov::intel_cpu::OutputMemoryFormats(fmts2str(outFmts, "cpu:"))}); } if (!priority.empty()) { cpuInfo.insert({"PrimitivesPriority", impls2str(priority)}); diff --git a/src/tests/ie_tsan.supp b/src/tests/ie_tsan.supp index dac948d0379..4b2afe1ff49 100644 --- a/src/tests/ie_tsan.supp +++ b/src/tests/ie_tsan.supp @@ -1,16 +1,10 @@ # ThreadSanitizer suppression file for Inference Engine # global const variables from CPU plugin -race:^ov::intel_cpu::MKLDNNActivationNode::initializers[abi:cxx11]$ race:^ov::intel_cpu::type_to_name_tbl[abi:cxx11]$ -race:^ov::intel_cpu::MKLDNNPermuteNode::OptimizedCases$ # global variable from CPU plugin which is initialized during plugin loading -race:^ov::intel_cpu::MKLDNNNode::GetNodesHolder()::localHolder$ -race:^InferenceEngine::Extensions::Cpu::MKLDNNExtensions<(mkldnn::impl::cpu::cpu_isa_t)3>::GetExtensionsHolder()::localHolder$ -race:^InferenceEngine::Extensions::Cpu::MKLDNNExtensions<(mkldnn::impl::cpu::cpu_isa_t)2>::GetExtensionsHolder()::localHolder$ -race:^InferenceEngine::Extensions::Cpu::MKLDNNExtensions<(mkldnn::impl::cpu::cpu_isa_t)1>::GetExtensionsHolder()::localHolder$ -race:^InferenceEngine::Extensions::Cpu::MKLDNNExtensions<(mkldnn::impl::cpu::cpu_isa_t)0>::GetExtensionsHolder()::localHolder$ +race:^ov::intel_cpu::Node::GetNodesHolder()::localHolder$ # global const variable from pugixml race:^pugi::impl::(anonymous namespace)::dummy_node_set$ diff --git a/src/tests/unit/cpu/mkldnn_memory_desc_test.cpp b/src/tests/unit/cpu/mkldnn_memory_desc_test.cpp index c2f625b06c6..5d28cd0d768 100644 --- a/src/tests/unit/cpu/mkldnn_memory_desc_test.cpp +++ b/src/tests/unit/cpu/mkldnn_memory_desc_test.cpp @@ -9,7 +9,7 @@ #include #include "memory_desc/cpu_memory_desc_utils.h" #include "nodes/common/blocked_desc_creator.h" -#include +#include #include "memory_desc/dnnl_blocked_memory_desc.h" using namespace ov::intel_cpu; @@ -21,7 +21,7 @@ TEST(MemDescTest, Conversion) { // dnnl::memory::desc -> DnnlBlockedMemoryDesc -> CpuBlockedMemoryDesc -> DnnlBlockedMemoryDesc -> dnnl::memory::desc auto converted_correctly = [] (dnnl::memory::format_tag fmt, dnnl::memory::dims dims) { dnnl::memory::desc orig_tdesc {dims, dnnl::memory::data_type::u8, fmt}; - DnnlMemoryDescPtr plg_tdesc = MKLDNNExtensionUtils::makeDescriptor(orig_tdesc); + DnnlMemoryDescPtr plg_tdesc = DnnlExtensionUtils::makeDescriptor(orig_tdesc); BlockedMemoryDescPtr blk_tdesc = MemoryDescUtils::convertToBlockedMemoryDesc(plg_tdesc); MemoryDescPtr cpu_blk_tdesc = std::make_shared(blk_tdesc->getPrecision(), blk_tdesc->getShape(), blk_tdesc->getBlockDims(), blk_tdesc->getOrder(), blk_tdesc->getOffsetPadding(), blk_tdesc->getOffsetPaddingToData(), @@ -90,7 +90,7 @@ TEST(MemDescTest, UndefinedStateConversion) { TEST(MemDescTest, CompareWithTensorDescRecomputedStrides) { auto converted_correctly = [] (dnnl::memory::format_tag fmt, dnnl::memory::dims dims) { dnnl::memory::desc orig_tdesc {dims, dnnl::memory::data_type::u8, fmt}; - DnnlMemoryDescPtr plg_tdesc = MKLDNNExtensionUtils::makeDescriptor(orig_tdesc); + DnnlMemoryDescPtr plg_tdesc = DnnlExtensionUtils::makeDescriptor(orig_tdesc); BlockedMemoryDescPtr blk_tdesc = MemoryDescUtils::convertToBlockedMemoryDesc(plg_tdesc); CpuBlockedMemoryDesc recomputed_blk_tdesc(blk_tdesc->getPrecision(), blk_tdesc->getShape(), blk_tdesc->getBlockDims(), blk_tdesc->getOrder()); @@ -118,9 +118,9 @@ TEST(MemDescTest, isPlainCheck) { dnnl::memory::desc permt_tdesc {dims, type, dnnl::memory::format_tag::acdb}; dnnl::memory::desc blckd_tdesc {dims, type, dnnl::memory::format_tag::aBcd8b}; - ASSERT_TRUE(MKLDNNExtensionUtils::makeDescriptor(plain_tdesc)->hasLayoutType(LayoutType::ncsp)); - ASSERT_FALSE(MKLDNNExtensionUtils::makeDescriptor(permt_tdesc)->hasLayoutType(LayoutType::ncsp)); - ASSERT_FALSE(MKLDNNExtensionUtils::makeDescriptor(blckd_tdesc)->hasLayoutType(LayoutType::ncsp)); + ASSERT_TRUE(DnnlExtensionUtils::makeDescriptor(plain_tdesc)->hasLayoutType(LayoutType::ncsp)); + ASSERT_FALSE(DnnlExtensionUtils::makeDescriptor(permt_tdesc)->hasLayoutType(LayoutType::ncsp)); + ASSERT_FALSE(DnnlExtensionUtils::makeDescriptor(blckd_tdesc)->hasLayoutType(LayoutType::ncsp)); } TEST(MemDescTest, isBlockedCCheck) { @@ -131,19 +131,19 @@ TEST(MemDescTest, isBlockedCCheck) { dnnl::memory::desc tailc_tdesc {dims, type, dnnl::memory::format_tag::acdb}; dnnl::memory::desc blck8_tdesc {dims, type, dnnl::memory::format_tag::aBcd8b}; dnnl::memory::desc blck8_permCD_tdesc {dims, type, dnnl::memory::format_tag::aBdc16b}; - auto plain_mdesc = MKLDNNExtensionUtils::makeDescriptor(plain_tdesc); - auto tailc_mdesc = MKLDNNExtensionUtils::makeDescriptor(tailc_tdesc); + auto plain_mdesc = DnnlExtensionUtils::makeDescriptor(plain_tdesc); + auto tailc_mdesc = DnnlExtensionUtils::makeDescriptor(tailc_tdesc); ASSERT_FALSE(plain_mdesc->hasLayoutType(LayoutType::nCsp8c) || plain_mdesc->hasLayoutType(LayoutType::nCsp16c)); ASSERT_FALSE(tailc_mdesc->hasLayoutType(LayoutType::nCsp8c) || tailc_mdesc->hasLayoutType(LayoutType::nCsp16c)); - ASSERT_TRUE(MKLDNNExtensionUtils::makeDescriptor(blck8_tdesc)->hasLayoutType(LayoutType::nCsp8c)); - ASSERT_FALSE(MKLDNNExtensionUtils::makeDescriptor(blck8_permCD_tdesc)->hasLayoutType(LayoutType::nCsp16c)); + ASSERT_TRUE(DnnlExtensionUtils::makeDescriptor(blck8_tdesc)->hasLayoutType(LayoutType::nCsp8c)); + ASSERT_FALSE(DnnlExtensionUtils::makeDescriptor(blck8_permCD_tdesc)->hasLayoutType(LayoutType::nCsp16c)); const auto crop_dims = dnnl::memory::dims {2, 1, 5, 7}; const auto crop_off = dnnl::memory::dims {1, 0, 0, 0}; dnnl::memory::desc blck8_crop_tdesc = blck8_tdesc.submemory_desc(crop_dims, crop_off); dnnl::memory::desc blck8_permCD_crop_tdesc = blck8_permCD_tdesc.submemory_desc(crop_dims, crop_off); - ASSERT_TRUE(MKLDNNExtensionUtils::makeDescriptor(blck8_crop_tdesc)->hasLayoutType(LayoutType::nCsp8c)); - ASSERT_FALSE(MKLDNNExtensionUtils::makeDescriptor(blck8_permCD_crop_tdesc)->hasLayoutType(LayoutType::nCsp8c)); + ASSERT_TRUE(DnnlExtensionUtils::makeDescriptor(blck8_crop_tdesc)->hasLayoutType(LayoutType::nCsp8c)); + ASSERT_FALSE(DnnlExtensionUtils::makeDescriptor(blck8_permCD_crop_tdesc)->hasLayoutType(LayoutType::nCsp8c)); } TEST(MemDescTest, isTailCCheck) { @@ -154,18 +154,18 @@ TEST(MemDescTest, isTailCCheck) { dnnl::memory::desc tailc_tdesc {dims, type, dnnl::memory::format_tag::acdb}; dnnl::memory::desc permt_tdesc {dims, type, dnnl::memory::format_tag::bcda}; dnnl::memory::desc blck8_tdesc {dims, type, dnnl::memory::format_tag::aBcd8b}; - ASSERT_FALSE(MKLDNNExtensionUtils::makeDescriptor(plain_tdesc)->hasLayoutType(LayoutType::nspc)); - ASSERT_FALSE(MKLDNNExtensionUtils::makeDescriptor(permt_tdesc)->hasLayoutType(LayoutType::nspc)); - ASSERT_TRUE(MKLDNNExtensionUtils::makeDescriptor(tailc_tdesc)->hasLayoutType(LayoutType::nspc)); - ASSERT_FALSE(MKLDNNExtensionUtils::makeDescriptor(blck8_tdesc)->hasLayoutType(LayoutType::nspc)); + ASSERT_FALSE(DnnlExtensionUtils::makeDescriptor(plain_tdesc)->hasLayoutType(LayoutType::nspc)); + ASSERT_FALSE(DnnlExtensionUtils::makeDescriptor(permt_tdesc)->hasLayoutType(LayoutType::nspc)); + ASSERT_TRUE(DnnlExtensionUtils::makeDescriptor(tailc_tdesc)->hasLayoutType(LayoutType::nspc)); + ASSERT_FALSE(DnnlExtensionUtils::makeDescriptor(blck8_tdesc)->hasLayoutType(LayoutType::nspc)); dnnl::memory::desc blck8_permCD_tdesc {dims, type, dnnl::memory::format_tag::aBdc16b}; - ASSERT_FALSE(MKLDNNExtensionUtils::makeDescriptor(blck8_permCD_tdesc)->hasLayoutType(LayoutType::nspc)); + ASSERT_FALSE(DnnlExtensionUtils::makeDescriptor(blck8_permCD_tdesc)->hasLayoutType(LayoutType::nspc)); const auto crop_dims = dnnl::memory::dims {2, 1, 5, 7}; const auto crop_off = dnnl::memory::dims {1, 0, 0, 0}; dnnl::memory::desc tailc_crop_tdesc = blck8_tdesc.submemory_desc(crop_dims, crop_off); - ASSERT_FALSE(MKLDNNExtensionUtils::makeDescriptor(tailc_crop_tdesc)->hasLayoutType(LayoutType::nspc)); + ASSERT_FALSE(DnnlExtensionUtils::makeDescriptor(tailc_crop_tdesc)->hasLayoutType(LayoutType::nspc)); } TEST(MemDescTest, constructWithPlainFormat) { @@ -188,11 +188,11 @@ TEST(MemDescTest, ComaptibleWithFormat) { GTEST_SKIP(); } -TEST(MKLDNNMemDescTest, KeepOrder) { +TEST(MemDescTest, KeepOrder) { using mkldnn::memory; Shape dims(VectorDims{7, 3, 1, 5}); memory::data_type dataType = memory::data_type::u8; - DnnlBlockedMemoryDesc descPalanar(MKLDNNExtensionUtils::DataTypeToIEPrecision(dataType), dims); + DnnlBlockedMemoryDesc descPalanar(DnnlExtensionUtils::DataTypeToIEPrecision(dataType), dims); ASSERT_THAT(descPalanar.getOrder(), ElementsAre(0, 1, 2, 3)); DnnlBlockedMemoryDesc descTailC(dims, dataType, memory::format_tag::acdb); @@ -204,19 +204,19 @@ TEST(MKLDNNMemDescTest, KeepOrder) { DnnlBlockedMemoryDesc descWeightBlocked(dims, dataType, memory::format_tag::ABcd16b16a2b); ASSERT_THAT(descWeightBlocked.getOrder(), ElementsAre(0, 1, 2, 3, 1, 0, 1)); - auto dnnDims = MKLDNNExtensionUtils::convertToDnnlDims(dims.getStaticDims()); + auto dnnDims = DnnlExtensionUtils::convertToDnnlDims(dims.getStaticDims()); memory::desc mkldnnDescPlanar(dnnDims, dataType, memory::format_tag::abcd); - ASSERT_THAT(MKLDNNExtensionUtils::makeDescriptor(mkldnnDescPlanar)->as()->getOrder(), ElementsAre(0, 1, 2, 3)); + ASSERT_THAT(DnnlExtensionUtils::makeDescriptor(mkldnnDescPlanar)->as()->getOrder(), ElementsAre(0, 1, 2, 3)); memory::desc mkldnnDescTailC(dnnDims, dataType, memory::format_tag::acdb); - ASSERT_THAT(MKLDNNExtensionUtils::makeDescriptor(mkldnnDescTailC)->as()->getOrder(), ElementsAre(0, 2, 3, 1)); + ASSERT_THAT(DnnlExtensionUtils::makeDescriptor(mkldnnDescTailC)->as()->getOrder(), ElementsAre(0, 2, 3, 1)); memory::desc mkldnnDescBlockedC(dnnDims, dataType, memory::format_tag::aBcd16b); - ASSERT_THAT(MKLDNNExtensionUtils::makeDescriptor(mkldnnDescBlockedC)->as()->getOrder(), ElementsAre(0, 1, 2, 3, 1)); + ASSERT_THAT(DnnlExtensionUtils::makeDescriptor(mkldnnDescBlockedC)->as()->getOrder(), ElementsAre(0, 1, 2, 3, 1)); memory::desc mkldnnDescWeightBlocked(dnnDims, dataType, memory::format_tag::ABcd16b16a2b); - ASSERT_THAT(MKLDNNExtensionUtils::makeDescriptor(mkldnnDescWeightBlocked)->as()->getOrder(), ElementsAre(0, 1, 2, 3, 1, 0, 1)); + ASSERT_THAT(DnnlExtensionUtils::makeDescriptor(mkldnnDescWeightBlocked)->as()->getOrder(), ElementsAre(0, 1, 2, 3, 1, 0, 1)); } TEST(MemDescTest, UndefinedState) { @@ -289,7 +289,7 @@ TEST(MemDescTest, MemSize) { DnnlBlockedMemoryDesc memDescDefUpper(pluginShapeDefUpperBound, dnnlDataType, mkldnn::memory::format_tag::nhwc); ASSERT_EQ(memDescDefUpper.getCurrentMemSize(), undefSize); - ASSERT_EQ(memDescDefUpper.getMaxMemSize(), maxElementsCount * MKLDNNExtensionUtils::sizeOfDataType(dnnlDataType)); + ASSERT_EQ(memDescDefUpper.getMaxMemSize(), maxElementsCount * DnnlExtensionUtils::sizeOfDataType(dnnlDataType)); ngraph::PartialShape ngraphShapeDefined({{16}, {16}, {10}, {7}}); ov::intel_cpu::Shape pluginShapeDefined(ngraphShapeDefined); @@ -318,11 +318,11 @@ TEST(MakeUndefinedDnnlDesc, checkRank) { const memory::desc origin({10, 20, 15, 7}, dataType, memory::format_tag::nChw16c); ov::intel_cpu::Shape pluginShapeWrongRank(ngraph::PartialShape{{-1, -1}, {-1, -1}, {-1, -1}}); - ASSERT_THROW(MKLDNNExtensionUtils::makeUndefinedDesc(origin, pluginShapeWrongRank), InferenceEngine::ParameterMismatch); + ASSERT_THROW(DnnlExtensionUtils::makeUndefinedDesc(origin, pluginShapeWrongRank), InferenceEngine::ParameterMismatch); ov::intel_cpu::Shape pluginShapeRightRank(ngraph::PartialShape{{-1, -1}, {-1, -1}, {-1, -1}, {-1, -1}}); MemoryDescPtr memDesc; - ASSERT_NO_THROW(memDesc = MKLDNNExtensionUtils::makeUndefinedDesc(origin, pluginShapeRightRank)); + ASSERT_NO_THROW(memDesc = DnnlExtensionUtils::makeUndefinedDesc(origin, pluginShapeRightRank)); ASSERT_FALSE(memDesc->isDefined()); } @@ -335,13 +335,13 @@ TEST(MakeUndefinedDnnlDesc, checkDims) { for (size_t i = 0; i < fullyUndef.size(); ++i) { auto partialShape = fullyUndef; partialShape[i] = {3}; // just a number which is not equal to any origin dims - ASSERT_THROW(MKLDNNExtensionUtils::makeUndefinedDesc(origin, ov::intel_cpu::Shape(partialShape)), InferenceEngine::ParameterMismatch); + ASSERT_THROW(DnnlExtensionUtils::makeUndefinedDesc(origin, ov::intel_cpu::Shape(partialShape)), InferenceEngine::ParameterMismatch); } for (size_t i = 0; i < origin.dims().size(); ++i) { auto partialShape = fullyUndef; partialShape[i] = {origin.dims()[i]}; MemoryDescPtr memDesc; - ASSERT_NO_THROW(memDesc = MKLDNNExtensionUtils::makeUndefinedDesc(origin, ov::intel_cpu::Shape(fullyUndef))); + ASSERT_NO_THROW(memDesc = DnnlExtensionUtils::makeUndefinedDesc(origin, ov::intel_cpu::Shape(fullyUndef))); ASSERT_FALSE(memDesc->isDefined()); } } @@ -369,12 +369,12 @@ TEST(MakeUndefinedDnnlDesc, checkLayout) { std::tie(fmt, dims, strFormat) = item; const memory::desc origin(dims, dataType, fmt); - auto undefDesc = MKLDNNExtensionUtils::makeUndefinedDesc(origin, ov::intel_cpu::Shape(fullyUndef)); + auto undefDesc = DnnlExtensionUtils::makeUndefinedDesc(origin, ov::intel_cpu::Shape(fullyUndef)); ASSERT_FALSE(undefDesc->isDefined()); ov::intel_cpu::DnnlBlockedMemoryDesc referenceDesc(ov::intel_cpu::Shape(fullyUndef), dataType, fmt); ASSERT_TRUE(undefDesc->isCompatible(referenceDesc)); ASSERT_EQ(undefDesc->serializeFormat(), strFormat); - auto defDesc = undefDesc->cloneWithNewDims(MKLDNNExtensionUtils::convertToVectorDims(dims)); + auto defDesc = undefDesc->cloneWithNewDims(DnnlExtensionUtils::convertToVectorDims(dims)); ASSERT_TRUE(defDesc->isDefined()); ASSERT_EQ(origin, defDesc->as()->getDnnlDesc()); } @@ -406,11 +406,11 @@ TEST(MakeUndefinedDnnlDesc, extraData) { origin.data.extra.compensation_mask = 1; origin.data.extra.scale_adjust = 2.0f; - auto undefDesc = MKLDNNExtensionUtils::makeUndefinedDesc(origin, ov::intel_cpu::Shape(fullyUndef)); + auto undefDesc = DnnlExtensionUtils::makeUndefinedDesc(origin, ov::intel_cpu::Shape(fullyUndef)); ASSERT_FALSE(undefDesc->isDefined()); - auto defDesc = undefDesc->cloneWithNewDims(MKLDNNExtensionUtils::convertToVectorDims(dims)); + auto defDesc = undefDesc->cloneWithNewDims(DnnlExtensionUtils::convertToVectorDims(dims)); ASSERT_TRUE(defDesc->isDefined()); - auto referenceDesc = MKLDNNExtensionUtils::makeDescriptor(origin); + auto referenceDesc = DnnlExtensionUtils::makeDescriptor(origin); ASSERT_TRUE(defDesc->isCompatible(*referenceDesc)); ASSERT_EQ(origin, defDesc->as()->getDnnlDesc()); } @@ -420,7 +420,7 @@ TEST(MakeUndefinedDnnlDesc, extraData) { TEST(isSameMethodTest, CheckTensorWithSameStrides) { auto isSameDataFormat = [] (dnnl::memory::format_tag fmt, dnnl::memory::dims dims) { dnnl::memory::desc oneDnnDesc {dims, dnnl::memory::data_type::u8, fmt}; - auto pluginDesc = MKLDNNExtensionUtils::makeDescriptor(oneDnnDesc); + auto pluginDesc = DnnlExtensionUtils::makeDescriptor(oneDnnDesc); return pluginDesc->isSame(fmt); }; diff --git a/src/tests/unit/cpu/mkldnn_zero_dims_test.cpp b/src/tests/unit/cpu/mkldnn_zero_dims_test.cpp index fe26458eeb2..adace17402e 100644 --- a/src/tests/unit/cpu/mkldnn_zero_dims_test.cpp +++ b/src/tests/unit/cpu/mkldnn_zero_dims_test.cpp @@ -7,7 +7,7 @@ #include #include "memory_desc/cpu_memory_desc_utils.h" #include "nodes/common/blocked_desc_creator.h" -#include +#include #include "memory_desc/dnnl_blocked_memory_desc.h" using namespace ov::intel_cpu; @@ -30,7 +30,7 @@ protected: auto replaceShape = origShape; std::replace(replaceShape.begin(), replaceShape.end(), ngraph::Dimension(0), ngraph::Dimension(3)); Shape dummyShape(replaceShape); - DnnlBlockedMemoryDesc dummyDesc(dummyShape, MKLDNNExtensionUtils::IEPrecisionToDataType(precision), fmt); + DnnlBlockedMemoryDesc dummyDesc(dummyShape, DnnlExtensionUtils::IEPrecisionToDataType(precision), fmt); expectedBlkDims = dummyDesc.getBlockDims(); expectedOrder = dummyDesc.getOrder(); for (size_t i = 0; i < dummyShape.getRank(); i++) { @@ -134,7 +134,7 @@ public: } std::pair createDescs() const override { - DnnlBlockedMemoryDesc descDnnl(shape, MKLDNNExtensionUtils::IEPrecisionToDataType(precision), fmt); + DnnlBlockedMemoryDesc descDnnl(shape, DnnlExtensionUtils::IEPrecisionToDataType(precision), fmt); CpuBlockedMemoryDesc descCpu(precision, shape, descDnnl.getBlockDims(), descDnnl.getOrder()); return {descDnnl, descCpu}; } @@ -213,7 +213,7 @@ protected: }; TEST_P(MemDescWithZeroDimsCloneNewDimsTest, CloneWithNewDims) { - DnnlBlockedMemoryDesc dynamicDescDnnl(shapeDynamic, MKLDNNExtensionUtils::IEPrecisionToDataType(precision), fmt); + DnnlBlockedMemoryDesc dynamicDescDnnl(shapeDynamic, DnnlExtensionUtils::IEPrecisionToDataType(precision), fmt); CpuBlockedMemoryDesc dynamicDescCpu(precision, shape, dynamicDescDnnl.getBlockDims(), dynamicDescDnnl.getOrder()); const size_t offset = 0, offsetPadding = 0; VectorDims zeroStrides(dynamicDescDnnl.getBlockDims().size(), 0); diff --git a/src/tests/unit/cpu/nodes/reorder_node_test.cpp b/src/tests/unit/cpu/nodes/reorder_node_test.cpp index dba937777db..b2ac692a307 100644 --- a/src/tests/unit/cpu/nodes/reorder_node_test.cpp +++ b/src/tests/unit/cpu/nodes/reorder_node_test.cpp @@ -12,7 +12,7 @@ #include "cache/multi_cache.h" /* - * Test MKLDNNReorderNode::optimizedNcsp2Nspc() and MKLDNNReorderNode::optimizedNspc2Ncsp() for + * Test Reorder::optimizedNcsp2Nspc() and Reorder::optimizedNspc2Ncsp() for * inPlace and non-inPlace cases. Specifically, the test checks that dst batch strides are * correctly taken into account by the custom impls (the case when the reorder is followed by an inplace concat). */ @@ -55,22 +55,22 @@ protected: return result; }; const mkldnn::engine cpuEngine(dnnl::engine::kind::cpu, 0); - ov::intel_cpu::MKLDNNWeightsSharing::Ptr weightsCache; + ov::intel_cpu::WeightsSharing::Ptr weightsCache; - auto inputNode = std::make_shared(ov::intel_cpu::Shape(srcDims), - prec, - "Reorder_Input", "Input", - cpuEngine, weightsCache); - auto reorderNode = std::make_shared("Reorder", cpuEngine, weightsCache); - auto outputNode = std::make_shared(ov::intel_cpu::Shape(dstDims), - prec, - "Reorder_Output", "Output", - cpuEngine, weightsCache); + auto inputNode = std::make_shared(ov::intel_cpu::Shape(srcDims), + prec, + "Reorder_Input", "Input", + cpuEngine, weightsCache); + auto reorderNode = std::make_shared("Reorder", cpuEngine, weightsCache); + auto outputNode = std::make_shared(ov::intel_cpu::Shape(dstDims), + prec, + "Reorder_Output", "Output", + cpuEngine, weightsCache); - auto parentEdge = std::make_shared(inputNode, reorderNode, 0, 0); - auto childEdge = std::make_shared(reorderNode, outputNode, 0, 0); - parentEdge->changeStatus(ov::intel_cpu::MKLDNNEdge::Status::NeedAllocation); - childEdge->changeStatus(ov::intel_cpu::MKLDNNEdge::Status::NeedAllocation); + auto parentEdge = std::make_shared(inputNode, reorderNode, 0, 0); + auto childEdge = std::make_shared(reorderNode, outputNode, 0, 0); + parentEdge->changeStatus(ov::intel_cpu::Edge::Status::NeedAllocation); + childEdge->changeStatus(ov::intel_cpu::Edge::Status::NeedAllocation); reorderNode->addEdge(parentEdge); reorderNode->addEdge(childEdge); auto rtParamsCache = std::make_shared(100); @@ -90,8 +90,8 @@ protected: getBlockedDims(srcDims, dstOrder), dstOrder, 0, offsetPaddingToData, dstStrides); - auto parentMemory = std::make_shared(cpuEngine); - auto childMemory = std::make_shared(cpuEngine); + auto parentMemory = std::make_shared(cpuEngine); + auto childMemory = std::make_shared(cpuEngine); parentMemory->Create(inputDesc, srcData); childMemory->Create(outputDesc, dstData); parentEdge->reuse(parentMemory); @@ -99,7 +99,7 @@ protected: reorderNode->setDescs(inputDesc, outputDesc); reorderNode->setRuntimeCache(rtParamsCache); - std::vector> nodes {inputNode, reorderNode, outputNode}; + std::vector> nodes {inputNode, reorderNode, outputNode}; for (auto &n : nodes) { n->init(); n->getSupportedDescriptors(); diff --git a/src/tests/unit/cpu/shape_inference_test/assign_shape_inference.cpp b/src/tests/unit/cpu/shape_inference_test/assign_shape_inference.cpp index f841d301926..43cf2dc5699 100644 --- a/src/tests/unit/cpu/shape_inference_test/assign_shape_inference.cpp +++ b/src/tests/unit/cpu/shape_inference_test/assign_shape_inference.cpp @@ -10,6 +10,8 @@ #include using namespace ov; +using namespace ov::intel_cpu; + template std::shared_ptr constructGraph(); diff --git a/src/tests/unit/cpu/shape_inference_test/batch_to_space_shape_inference.cpp b/src/tests/unit/cpu/shape_inference_test/batch_to_space_shape_inference.cpp index e4878c5fe5a..2a748a4e50c 100644 --- a/src/tests/unit/cpu/shape_inference_test/batch_to_space_shape_inference.cpp +++ b/src/tests/unit/cpu/shape_inference_test/batch_to_space_shape_inference.cpp @@ -12,6 +12,7 @@ #include using namespace ov; +using namespace ov::intel_cpu; static std::shared_ptr make_batch_to_space( PartialShape data_shape = PartialShape::dynamic(ov::Rank(2)), diff --git a/src/tests/unit/cpu/shape_inference_test/broadcast_shape_inference.cpp b/src/tests/unit/cpu/shape_inference_test/broadcast_shape_inference.cpp index 63af1b3cc3d..ec77341e28b 100644 --- a/src/tests/unit/cpu/shape_inference_test/broadcast_shape_inference.cpp +++ b/src/tests/unit/cpu/shape_inference_test/broadcast_shape_inference.cpp @@ -12,6 +12,7 @@ #include using namespace ov; +using namespace ov::intel_cpu; TEST(StaticShapeInferenceTest, BroadcastBidirectionalTest) { auto input = std::make_shared(element::f32, PartialShape{-1, -1, -1}); diff --git a/src/tests/unit/cpu/shape_inference_test/bucketize_test.cpp b/src/tests/unit/cpu/shape_inference_test/bucketize_test.cpp index 9730a18a666..20745daf71a 100644 --- a/src/tests/unit/cpu/shape_inference_test/bucketize_test.cpp +++ b/src/tests/unit/cpu/shape_inference_test/bucketize_test.cpp @@ -7,6 +7,7 @@ #include "utils.hpp" using namespace ov; +using namespace ov::intel_cpu; using namespace std; TEST(StaticShapeInferenceTest, BucketizeV3) { @@ -14,5 +15,5 @@ TEST(StaticShapeInferenceTest, BucketizeV3) { auto buckets = make_shared(element::f32, ov::PartialShape{-1}); auto bucketize = make_shared(data, buckets); - check_static_shape(bucketize.get(), {ov::StaticShape{2, 3, 2}, ov::StaticShape{4}}, {ov::StaticShape{2, 3, 2}}); + check_static_shape(bucketize.get(), {StaticShape{2, 3, 2}, StaticShape{4}}, {StaticShape{2, 3, 2}}); } diff --git a/src/tests/unit/cpu/shape_inference_test/convolution_shape_inference.cpp b/src/tests/unit/cpu/shape_inference_test/convolution_shape_inference.cpp index 90ed705ac06..579c99331c8 100644 --- a/src/tests/unit/cpu/shape_inference_test/convolution_shape_inference.cpp +++ b/src/tests/unit/cpu/shape_inference_test/convolution_shape_inference.cpp @@ -14,6 +14,7 @@ #include using namespace ov; +using namespace ov::intel_cpu; TEST(StaticShapeInferenceTest, ConvolutionTest) { Strides strides{1, 1}; diff --git a/src/tests/unit/cpu/shape_inference_test/ctc_greedy_decoder_seq_len_shape_inference.cpp b/src/tests/unit/cpu/shape_inference_test/ctc_greedy_decoder_seq_len_shape_inference.cpp index ca7f5f56571..c50ec667d26 100644 --- a/src/tests/unit/cpu/shape_inference_test/ctc_greedy_decoder_seq_len_shape_inference.cpp +++ b/src/tests/unit/cpu/shape_inference_test/ctc_greedy_decoder_seq_len_shape_inference.cpp @@ -11,6 +11,7 @@ #include using namespace ov; +using namespace ov::intel_cpu; TEST(StaticShapeInferenceTest, CtcGreedyDecoderSeqLenTest) { auto P = std::make_shared(element::f32, PartialShape{-1, -1, -1}); diff --git a/src/tests/unit/cpu/shape_inference_test/ctc_greedy_decoder_shape_inference.cpp b/src/tests/unit/cpu/shape_inference_test/ctc_greedy_decoder_shape_inference.cpp index 21167145d56..97958799684 100644 --- a/src/tests/unit/cpu/shape_inference_test/ctc_greedy_decoder_shape_inference.cpp +++ b/src/tests/unit/cpu/shape_inference_test/ctc_greedy_decoder_shape_inference.cpp @@ -11,6 +11,7 @@ #include using namespace ov; +using namespace ov::intel_cpu; TEST(StaticShapeInferenceTest, CtcGreedyDecoderTest) { auto P = std::make_shared(element::f32, PartialShape{-1, -1, -1}); diff --git a/src/tests/unit/cpu/shape_inference_test/ctc_loss_shape_inference.cpp b/src/tests/unit/cpu/shape_inference_test/ctc_loss_shape_inference.cpp index da81410e3e3..47af7ad0ffc 100644 --- a/src/tests/unit/cpu/shape_inference_test/ctc_loss_shape_inference.cpp +++ b/src/tests/unit/cpu/shape_inference_test/ctc_loss_shape_inference.cpp @@ -11,6 +11,7 @@ #include using namespace ov; +using namespace ov::intel_cpu; TEST(StaticShapeInferenceTest, CTCLossTest) { const auto& logits = std::make_shared(element::f32, PartialShape{-1, -1, -1}); diff --git a/src/tests/unit/cpu/shape_inference_test/depth_to_space_shape_inference.cpp b/src/tests/unit/cpu/shape_inference_test/depth_to_space_shape_inference.cpp index 117530af035..128ca8eabce 100644 --- a/src/tests/unit/cpu/shape_inference_test/depth_to_space_shape_inference.cpp +++ b/src/tests/unit/cpu/shape_inference_test/depth_to_space_shape_inference.cpp @@ -9,12 +9,14 @@ #include #include +using namespace ov::intel_cpu; + TEST(StaticShapeInferenceTest, DepthToSpaceTest) { auto A = std::make_shared(ov::element::f32, ov::PartialShape::dynamic(ov::Rank(4))); auto depth_to_space = std::make_shared(A, ov::op::v0::DepthToSpace::DepthToSpaceMode::DEPTH_FIRST, 2); - const std::vector input_shapes = {ov::StaticShape{1, 16, 3, 1080, 1616}}; - std::vector output_shapes = {ov::StaticShape{}}; + const std::vector input_shapes = {StaticShape{1, 16, 3, 1080, 1616}}; + std::vector output_shapes = {StaticShape{}}; shape_inference(depth_to_space.get(), input_shapes, output_shapes); - ASSERT_EQ(output_shapes[0], (ov::StaticShape{1, 2, 2 * 3, 2 * 1080, 2 * 1616})); + ASSERT_EQ(output_shapes[0], (StaticShape{1, 2, 2 * 3, 2 * 1080, 2 * 1616})); } diff --git a/src/tests/unit/cpu/shape_inference_test/detection_output_shape_inference.cpp b/src/tests/unit/cpu/shape_inference_test/detection_output_shape_inference.cpp index b30ddfdfedf..39239e34087 100644 --- a/src/tests/unit/cpu/shape_inference_test/detection_output_shape_inference.cpp +++ b/src/tests/unit/cpu/shape_inference_test/detection_output_shape_inference.cpp @@ -11,6 +11,7 @@ #include "utils/shape_inference/static_shape.hpp" using namespace ov; +using namespace ov::intel_cpu; template std::shared_ptr create_detection_output(const PartialShape& box_logits_shape, diff --git a/src/tests/unit/cpu/shape_inference_test/einsum_test.cpp b/src/tests/unit/cpu/shape_inference_test/einsum_test.cpp index 69e87940401..515180f3c37 100644 --- a/src/tests/unit/cpu/shape_inference_test/einsum_test.cpp +++ b/src/tests/unit/cpu/shape_inference_test/einsum_test.cpp @@ -7,13 +7,14 @@ #include "utils.hpp" using namespace ov; +using namespace ov::intel_cpu; TEST(StaticShapeInferenceTest, Einsum1) { auto I1 = std::make_shared(element::f32, ov::PartialShape::dynamic()); auto I2 = std::make_shared(element::f32, ov::PartialShape::dynamic()); auto O = std::make_shared(OutputVector{I1, I2}, "i,i->"); - check_static_shape(O.get(), {ov::StaticShape{3}, ov::StaticShape{3}}, {ov::StaticShape{}}); + check_static_shape(O.get(), {StaticShape{3}, StaticShape{3}}, {StaticShape{}}); } TEST(StaticShapeInferenceTest, Einsum2) { @@ -21,21 +22,21 @@ TEST(StaticShapeInferenceTest, Einsum2) { auto I2 = std::make_shared(element::f32, ov::PartialShape::dynamic()); auto O = std::make_shared(OutputVector{I1, I2}, "ab,bc->ac"); - check_static_shape(O.get(), {ov::StaticShape{2, 3}, ov::StaticShape{3, 4}}, {ov::StaticShape{2, 4}}); + check_static_shape(O.get(), {StaticShape{2, 3}, StaticShape{3, 4}}, {StaticShape{2, 4}}); } TEST(StaticShapeInferenceTest, Einsum3) { auto I1 = std::make_shared(element::f32, ov::PartialShape::dynamic()); auto O = std::make_shared(OutputVector{I1}, "kii->k"); - check_static_shape(O.get(), {ov::StaticShape{2, 3, 3}}, {ov::StaticShape{2}}); + check_static_shape(O.get(), {StaticShape{2, 3, 3}}, {StaticShape{2}}); } TEST(StaticShapeInferenceTest, Einsum4) { auto I1 = std::make_shared(element::f32, ov::PartialShape::dynamic()); auto O = std::make_shared(OutputVector{I1}, "ijk->kij"); - check_static_shape(O.get(), {ov::StaticShape{1, 2, 3}}, {ov::StaticShape{3, 1, 2}}); + check_static_shape(O.get(), {StaticShape{1, 2, 3}}, {StaticShape{3, 1, 2}}); } TEST(StaticShapeInferenceTest, Einsum5) { @@ -45,6 +46,6 @@ TEST(StaticShapeInferenceTest, Einsum5) { auto O = std::make_shared(OutputVector{I1, I2, I3}, "ab,bcd,bc->ca"); check_static_shape(O.get(), - {ov::StaticShape{2, 5}, ov::StaticShape{5, 3, 6}, ov::StaticShape{5, 3}}, - {ov::StaticShape{3, 2}}); + {StaticShape{2, 5}, StaticShape{5, 3, 6}, StaticShape{5, 3}}, + {StaticShape{3, 2}}); } diff --git a/src/tests/unit/cpu/shape_inference_test/elementwises.cpp b/src/tests/unit/cpu/shape_inference_test/elementwises.cpp index 10ea18029fd..b4161049a95 100644 --- a/src/tests/unit/cpu/shape_inference_test/elementwises.cpp +++ b/src/tests/unit/cpu/shape_inference_test/elementwises.cpp @@ -13,6 +13,7 @@ #include using namespace ov; +using namespace ov::intel_cpu; TEST(StaticShapeInferenceTest, UnaryEltwiseTest) { auto data = std::make_shared(element::f32, PartialShape{-1, -1, -1, -1}); diff --git a/src/tests/unit/cpu/shape_inference_test/embedding_segments_sum_test.cpp b/src/tests/unit/cpu/shape_inference_test/embedding_segments_sum_test.cpp index c08330de57d..60d46fb6069 100644 --- a/src/tests/unit/cpu/shape_inference_test/embedding_segments_sum_test.cpp +++ b/src/tests/unit/cpu/shape_inference_test/embedding_segments_sum_test.cpp @@ -7,6 +7,7 @@ #include "utils.hpp" using namespace ov; +using namespace ov::intel_cpu; using namespace std; TEST(StaticShapeInferenceTest, EmbeddingSegmentsSum) { diff --git a/src/tests/unit/cpu/shape_inference_test/embeddingbag_offsets_sum_test.cpp b/src/tests/unit/cpu/shape_inference_test/embeddingbag_offsets_sum_test.cpp index 0a252e45a42..ff0beec88fd 100644 --- a/src/tests/unit/cpu/shape_inference_test/embeddingbag_offsets_sum_test.cpp +++ b/src/tests/unit/cpu/shape_inference_test/embeddingbag_offsets_sum_test.cpp @@ -7,6 +7,7 @@ #include "utils.hpp" using namespace ov; +using namespace ov::intel_cpu; using namespace std; TEST(StaticShapeInferenceTest, EmbeddingBagOffsetsSumV3) { @@ -21,6 +22,6 @@ TEST(StaticShapeInferenceTest, EmbeddingBagOffsetsSumV3) { check_static_shape( ebos.get(), - {ov::StaticShape{5, 2}, ov::StaticShape{4}, ov::StaticShape{3}, ov::StaticShape{}, ov::StaticShape{4}}, - {ov::StaticShape{3, 2}}); + {StaticShape{5, 2}, StaticShape{4}, StaticShape{3}, StaticShape{}, StaticShape{4}}, + {StaticShape{3, 2}}); } diff --git a/src/tests/unit/cpu/shape_inference_test/experimental_detectron_detection_output_shape_inference.cpp b/src/tests/unit/cpu/shape_inference_test/experimental_detectron_detection_output_shape_inference.cpp index c535c3a9246..cf759f9189b 100644 --- a/src/tests/unit/cpu/shape_inference_test/experimental_detectron_detection_output_shape_inference.cpp +++ b/src/tests/unit/cpu/shape_inference_test/experimental_detectron_detection_output_shape_inference.cpp @@ -13,6 +13,7 @@ #include "utils/shape_inference/static_shape.hpp" using namespace ov; +using namespace ov::intel_cpu; TEST(StaticShapeInferenceTest, ExperimentalDetectronDetectionOutputTest) { using Attrs = op::v6::ExperimentalDetectronDetectionOutput::Attributes; diff --git a/src/tests/unit/cpu/shape_inference_test/experimental_detectron_generate_proposal.cpp b/src/tests/unit/cpu/shape_inference_test/experimental_detectron_generate_proposal.cpp index f44f7e000cc..0c6a51b120a 100644 --- a/src/tests/unit/cpu/shape_inference_test/experimental_detectron_generate_proposal.cpp +++ b/src/tests/unit/cpu/shape_inference_test/experimental_detectron_generate_proposal.cpp @@ -10,6 +10,7 @@ #include using namespace ov; +using namespace ov::intel_cpu; using ExperimentalProposals = op::v6::ExperimentalDetectronGenerateProposalsSingleImage; TEST(StaticShapeInferenceTest, ExperimentalProposalsTest) { @@ -27,11 +28,11 @@ TEST(StaticShapeInferenceTest, ExperimentalProposalsTest) { auto proposals = std::make_shared(im_info, anchors, deltas, scores, attrs); - const std::vector input_shapes = {ov::StaticShape{3}, - ov::StaticShape{201600, 4}, - ov::StaticShape{12, 200, 336}, - ov::StaticShape{3, 200, 336}}; - std::vector output_shapes = {ov::StaticShape{}, ov::StaticShape{}}; + const std::vector input_shapes = {StaticShape{3}, + StaticShape{201600, 4}, + StaticShape{12, 200, 336}, + StaticShape{3, 200, 336}}; + std::vector output_shapes = {StaticShape{}, StaticShape{}}; shape_inference(proposals.get(), input_shapes, output_shapes); ASSERT_EQ(output_shapes[0], (StaticShape{post_nms_count, 4})); diff --git a/src/tests/unit/cpu/shape_inference_test/experimental_detectron_prior_grid_generator_shape_inference.cpp b/src/tests/unit/cpu/shape_inference_test/experimental_detectron_prior_grid_generator_shape_inference.cpp index deef2ecef54..1cf7c53ca95 100644 --- a/src/tests/unit/cpu/shape_inference_test/experimental_detectron_prior_grid_generator_shape_inference.cpp +++ b/src/tests/unit/cpu/shape_inference_test/experimental_detectron_prior_grid_generator_shape_inference.cpp @@ -11,6 +11,7 @@ #include using namespace ov; +using namespace ov::intel_cpu; TEST(StaticShapeInferenceTest, PriorGridGenerator) { op::v6::ExperimentalDetectronPriorGridGenerator::Attributes attrs; diff --git a/src/tests/unit/cpu/shape_inference_test/experimental_detectron_roi_feature_extractor.cpp b/src/tests/unit/cpu/shape_inference_test/experimental_detectron_roi_feature_extractor.cpp index 376bb2d23e5..a40110e4283 100644 --- a/src/tests/unit/cpu/shape_inference_test/experimental_detectron_roi_feature_extractor.cpp +++ b/src/tests/unit/cpu/shape_inference_test/experimental_detectron_roi_feature_extractor.cpp @@ -13,6 +13,7 @@ #include "utils/shape_inference/static_shape.hpp" using namespace ov; +using namespace ov::intel_cpu; TEST(StaticShapeInferenceTest, ExperimentalDetectronROIFeatureExtractor) { op::v6::ExperimentalDetectronROIFeatureExtractor::Attributes attrs; diff --git a/src/tests/unit/cpu/shape_inference_test/experimental_detectron_topkrois_shape_inference.cpp b/src/tests/unit/cpu/shape_inference_test/experimental_detectron_topkrois_shape_inference.cpp index 3d803b93dff..611689fd2bb 100644 --- a/src/tests/unit/cpu/shape_inference_test/experimental_detectron_topkrois_shape_inference.cpp +++ b/src/tests/unit/cpu/shape_inference_test/experimental_detectron_topkrois_shape_inference.cpp @@ -12,6 +12,7 @@ #include "utils/shape_inference/static_shape.hpp" using namespace ov; +using namespace ov::intel_cpu; TEST(StaticShapeInferenceTest, ExperimentalDetectronTopKROIsTest) { auto input_rois = std::make_shared(element::f32, PartialShape{-1, -1}); diff --git a/src/tests/unit/cpu/shape_inference_test/extract_image_patches_shape_inference.cpp b/src/tests/unit/cpu/shape_inference_test/extract_image_patches_shape_inference.cpp index 68cda5ae184..db013ce5256 100644 --- a/src/tests/unit/cpu/shape_inference_test/extract_image_patches_shape_inference.cpp +++ b/src/tests/unit/cpu/shape_inference_test/extract_image_patches_shape_inference.cpp @@ -11,6 +11,7 @@ #include using namespace ov; +using namespace ov::intel_cpu; TEST(StaticShapeInferenceTest, ExtractImagePatchesTest) { auto data = std::make_shared(element::i32, PartialShape{-1, -1, -1, -1}); diff --git a/src/tests/unit/cpu/shape_inference_test/fft_base_shape_inference.cpp b/src/tests/unit/cpu/shape_inference_test/fft_base_shape_inference.cpp index e13798d72dc..f973e042560 100644 --- a/src/tests/unit/cpu/shape_inference_test/fft_base_shape_inference.cpp +++ b/src/tests/unit/cpu/shape_inference_test/fft_base_shape_inference.cpp @@ -13,6 +13,7 @@ #include using namespace ov; +using namespace ov::intel_cpu; static std::shared_ptr build_dft() { auto input_shape = std::make_shared(element::f32, PartialShape{-1, -1, -1, -1}); diff --git a/src/tests/unit/cpu/shape_inference_test/gather_elements_shape_inference.cpp b/src/tests/unit/cpu/shape_inference_test/gather_elements_shape_inference.cpp index 9e8e336a302..4e20d131ff7 100644 --- a/src/tests/unit/cpu/shape_inference_test/gather_elements_shape_inference.cpp +++ b/src/tests/unit/cpu/shape_inference_test/gather_elements_shape_inference.cpp @@ -10,6 +10,7 @@ #include using namespace ov; +using namespace ov::intel_cpu; TEST(StaticShapeInferenceTest, GatherElementsTest) { int64_t axis = -1; diff --git a/src/tests/unit/cpu/shape_inference_test/gather_shape_inference.cpp b/src/tests/unit/cpu/shape_inference_test/gather_shape_inference.cpp index e43cc6b2590..cfa13447457 100644 --- a/src/tests/unit/cpu/shape_inference_test/gather_shape_inference.cpp +++ b/src/tests/unit/cpu/shape_inference_test/gather_shape_inference.cpp @@ -12,6 +12,7 @@ #include using namespace ov; +using namespace ov::intel_cpu; TEST(StaticShapeInferenceTest, GatherV1Test) { auto P = std::make_shared(element::f32, PartialShape{-1, -1}); diff --git a/src/tests/unit/cpu/shape_inference_test/gather_tree_shape_inference.cpp b/src/tests/unit/cpu/shape_inference_test/gather_tree_shape_inference.cpp index 10e2ee293e0..5fa1f571f22 100644 --- a/src/tests/unit/cpu/shape_inference_test/gather_tree_shape_inference.cpp +++ b/src/tests/unit/cpu/shape_inference_test/gather_tree_shape_inference.cpp @@ -12,6 +12,7 @@ #include using namespace ov; +using namespace ov::intel_cpu; TEST(StaticShapeInferenceTest, GatherTreeTest) { auto step_ids = std::make_shared(element::f32, PartialShape{-1, -1, -1}); diff --git a/src/tests/unit/cpu/shape_inference_test/interpolate_shape_inference.cpp b/src/tests/unit/cpu/shape_inference_test/interpolate_shape_inference.cpp index 2b45e820134..b2d8f02f156 100644 --- a/src/tests/unit/cpu/shape_inference_test/interpolate_shape_inference.cpp +++ b/src/tests/unit/cpu/shape_inference_test/interpolate_shape_inference.cpp @@ -12,6 +12,7 @@ #include using namespace ov; +using namespace ov::intel_cpu; using InterpolateMode = op::v4::Interpolate::InterpolateMode; using CoordinateTransformMode = op::v4::Interpolate::CoordinateTransformMode; diff --git a/src/tests/unit/cpu/shape_inference_test/lstm_cell_shape_inference.cpp b/src/tests/unit/cpu/shape_inference_test/lstm_cell_shape_inference.cpp index 229e98c2401..4efe3b1af8e 100644 --- a/src/tests/unit/cpu/shape_inference_test/lstm_cell_shape_inference.cpp +++ b/src/tests/unit/cpu/shape_inference_test/lstm_cell_shape_inference.cpp @@ -10,6 +10,7 @@ #include using namespace ov; +using namespace ov::intel_cpu; TEST(StaticShapeInferenceTest, LstmCellTest) { const size_t batch_size = 2; diff --git a/src/tests/unit/cpu/shape_inference_test/make_shape_inference.cpp b/src/tests/unit/cpu/shape_inference_test/make_shape_inference.cpp index fa4ce061306..c91b92527ad 100644 --- a/src/tests/unit/cpu/shape_inference_test/make_shape_inference.cpp +++ b/src/tests/unit/cpu/shape_inference_test/make_shape_inference.cpp @@ -14,6 +14,7 @@ #include using namespace ov; +using namespace ov::intel_cpu; TEST(StaticShapeInferenceTest, MakeShapeInference) { auto inp1_f32 = std::make_shared(element::f32, PartialShape{-1, -1, -1, -1}); diff --git a/src/tests/unit/cpu/shape_inference_test/matmul_shape_inference.cpp b/src/tests/unit/cpu/shape_inference_test/matmul_shape_inference.cpp index 73015f36ff5..f159471b76e 100644 --- a/src/tests/unit/cpu/shape_inference_test/matmul_shape_inference.cpp +++ b/src/tests/unit/cpu/shape_inference_test/matmul_shape_inference.cpp @@ -11,6 +11,7 @@ #include using namespace ov; +using namespace ov::intel_cpu; TEST(StaticShapeInferenceTest, MatMulTest) { auto A_input = std::make_shared(element::i64, PartialShape{-1, -1, -1}); diff --git a/src/tests/unit/cpu/shape_inference_test/one_hot_shape_inference.cpp b/src/tests/unit/cpu/shape_inference_test/one_hot_shape_inference.cpp index adcdc3e559a..84b8c93cec1 100644 --- a/src/tests/unit/cpu/shape_inference_test/one_hot_shape_inference.cpp +++ b/src/tests/unit/cpu/shape_inference_test/one_hot_shape_inference.cpp @@ -11,6 +11,7 @@ #include using namespace ov; +using namespace ov::intel_cpu; TEST(StaticShapeInferenceTest, OneHotTest) { auto indices = std::make_shared(element::i64, PartialShape{-1}); diff --git a/src/tests/unit/cpu/shape_inference_test/pad_test.cpp b/src/tests/unit/cpu/shape_inference_test/pad_test.cpp index 5f9f91ef5ce..bb1f42f6c89 100644 --- a/src/tests/unit/cpu/shape_inference_test/pad_test.cpp +++ b/src/tests/unit/cpu/shape_inference_test/pad_test.cpp @@ -7,6 +7,7 @@ #include "utils.hpp" using namespace ov; +using namespace ov::intel_cpu; TEST(StaticShapeInferenceTest, Padv1) { const auto data = std::make_shared(element::f32, PartialShape::dynamic()); @@ -18,9 +19,9 @@ TEST(StaticShapeInferenceTest, Padv1) { const auto pad = std::make_shared(data, pads_begin, pads_end, pad_val, op::PadMode::CONSTANT); check_static_shape(pad.get(), - {ov::StaticShape{3, 6, 5, 5}, - ov::StaticShape{4}, - ov::StaticShape{4}, - ov::StaticShape()}, - {ov::StaticShape({6, 9, 8, 8})}); + {StaticShape{3, 6, 5, 5}, + StaticShape{4}, + StaticShape{4}, + StaticShape()}, + {StaticShape({6, 9, 8, 8})}); } diff --git a/src/tests/unit/cpu/shape_inference_test/proposal.cpp b/src/tests/unit/cpu/shape_inference_test/proposal.cpp index e91f1a1f3d6..9f9173f33d6 100644 --- a/src/tests/unit/cpu/shape_inference_test/proposal.cpp +++ b/src/tests/unit/cpu/shape_inference_test/proposal.cpp @@ -10,6 +10,7 @@ #include using namespace ov; +using namespace ov::intel_cpu; TEST(StaticShapeInferenceTest, ProposalV0Test) { op::v0::Proposal::Attributes attrs; @@ -22,10 +23,10 @@ TEST(StaticShapeInferenceTest, ProposalV0Test) { auto class_bbox_deltas = std::make_shared(element::f32, PartialShape{-1, -1, -1, -1}); auto image_shape = std::make_shared(element::f32, PartialShape{-1}); auto op = std::make_shared(class_probs, class_bbox_deltas, image_shape, attrs); - const std::vector input_shapes = {ov::StaticShape{batch_size, 12, 34, 62}, - ov::StaticShape{batch_size, 24, 34, 62}, - ov::StaticShape{3}}; - std::vector output_shapes = {ov::StaticShape{}}; + const std::vector input_shapes = {StaticShape{batch_size, 12, 34, 62}, + StaticShape{batch_size, 24, 34, 62}, + StaticShape{3}}; + std::vector output_shapes = {StaticShape{}}; shape_inference(op.get(), input_shapes, output_shapes); ASSERT_EQ(output_shapes[0], (StaticShape{batch_size * attrs.post_nms_topn, 5})); @@ -42,10 +43,10 @@ TEST(StaticShapeInferenceTest, ProposalV4Test) { auto class_bbox_deltas = std::make_shared(element::f32, PartialShape{-1, -1, -1, -1}); auto image_shape = std::make_shared(element::f32, PartialShape{-1}); auto op = std::make_shared(class_probs, class_bbox_deltas, image_shape, attrs); - const std::vector input_shapes = {ov::StaticShape{batch_size, 12, 34, 62}, - ov::StaticShape{batch_size, 24, 34, 62}, - ov::StaticShape{3}}; - std::vector output_shapes = {ov::StaticShape{}, ov::StaticShape{}}; + const std::vector input_shapes = {StaticShape{batch_size, 12, 34, 62}, + StaticShape{batch_size, 24, 34, 62}, + StaticShape{3}}; + std::vector output_shapes = {StaticShape{}, StaticShape{}}; shape_inference(op.get(), input_shapes, output_shapes); ASSERT_EQ(output_shapes[0], (StaticShape{batch_size * attrs.post_nms_topn, 5})); diff --git a/src/tests/unit/cpu/shape_inference_test/range_test.cpp b/src/tests/unit/cpu/shape_inference_test/range_test.cpp index 3e07fb4a351..ea895627f81 100644 --- a/src/tests/unit/cpu/shape_inference_test/range_test.cpp +++ b/src/tests/unit/cpu/shape_inference_test/range_test.cpp @@ -7,6 +7,7 @@ #include "utils.hpp" using namespace ov; +using namespace ov::intel_cpu; using namespace std; TEST(StaticShapeInferenceTest, Rangev4_i32) { @@ -16,11 +17,11 @@ TEST(StaticShapeInferenceTest, Rangev4_i32) { auto range = make_shared(start, stop, step, element::i32); - check_static_shape(range.get(), {2, 0, -2}, {ov::StaticShape{1}}); - check_static_shape(range.get(), {2, 0, -1}, {ov::StaticShape{2}}); - check_static_shape(range.get(), {-19, 19, 1}, {ov::StaticShape{38}}); - check_static_shape(range.get(), {-19, 19, 3}, {ov::StaticShape{13}}); - check_static_shape(range.get(), {20, -19, 1}, {ov::StaticShape{0}}); + check_static_shape(range.get(), {2, 0, -2}, {StaticShape{1}}); + check_static_shape(range.get(), {2, 0, -1}, {StaticShape{2}}); + check_static_shape(range.get(), {-19, 19, 1}, {StaticShape{38}}); + check_static_shape(range.get(), {-19, 19, 3}, {StaticShape{13}}); + check_static_shape(range.get(), {20, -19, 1}, {StaticShape{0}}); } TEST(StaticShapeInferenceTest, Rangev4_f32) { @@ -30,7 +31,7 @@ TEST(StaticShapeInferenceTest, Rangev4_f32) { auto range = make_shared(start, stop, step, element::f32); - check_static_shape(range.get(), {0., 1., 0.25}, {ov::StaticShape{4}}); - check_static_shape(range.get(), {-1., 1., 0.25}, {ov::StaticShape{8}}); - check_static_shape(range.get(), {-1., 0.875, 0.25}, {ov::StaticShape{8}}); + check_static_shape(range.get(), {0., 1., 0.25}, {StaticShape{4}}); + check_static_shape(range.get(), {-1., 1., 0.25}, {StaticShape{8}}); + check_static_shape(range.get(), {-1., 0.875, 0.25}, {StaticShape{8}}); } diff --git a/src/tests/unit/cpu/shape_inference_test/read_value_shape_inference.cpp b/src/tests/unit/cpu/shape_inference_test/read_value_shape_inference.cpp index bd164257126..d325ef75ba3 100644 --- a/src/tests/unit/cpu/shape_inference_test/read_value_shape_inference.cpp +++ b/src/tests/unit/cpu/shape_inference_test/read_value_shape_inference.cpp @@ -9,6 +9,7 @@ #include using namespace ov; +using namespace ov::intel_cpu; template std::shared_ptr constructGraph(); diff --git a/src/tests/unit/cpu/shape_inference_test/reduce_test.cpp b/src/tests/unit/cpu/shape_inference_test/reduce_test.cpp index 049f141b7b4..5e39627fbd2 100644 --- a/src/tests/unit/cpu/shape_inference_test/reduce_test.cpp +++ b/src/tests/unit/cpu/shape_inference_test/reduce_test.cpp @@ -12,7 +12,7 @@ #include using namespace ov; - +using namespace ov::intel_cpu; TEST(StaticShapeInferenceTest, ReduceTest) { auto data = std::make_shared(element::f32, PartialShape{-1, -1, -1, -1}); diff --git a/src/tests/unit/cpu/shape_inference_test/region_yolo_test.cpp b/src/tests/unit/cpu/shape_inference_test/region_yolo_test.cpp index 4c3e2b0d652..4fde16f3031 100644 --- a/src/tests/unit/cpu/shape_inference_test/region_yolo_test.cpp +++ b/src/tests/unit/cpu/shape_inference_test/region_yolo_test.cpp @@ -7,13 +7,14 @@ #include "utils.hpp" using namespace ov; +using namespace ov::intel_cpu; using namespace std; TEST(StaticShapeInferenceTest, RegionYoloV0) { auto inputs = make_shared(element::f32, ov::PartialShape{-1, -1, -1, -1}); auto op = make_shared(inputs, 0, 0, 0, true, std::vector{}, 0, 1); - check_static_shape(op.get(), {ov::StaticShape{1, 125, 13, 13}}, {ov::StaticShape{1 * 125, 13, 13}}); + check_static_shape(op.get(), {StaticShape{1, 125, 13, 13}}, {StaticShape{1 * 125, 13, 13}}); } TEST(StaticShapeInferenceTest, RegionYoloV0Dynamic) { @@ -23,5 +24,5 @@ TEST(StaticShapeInferenceTest, RegionYoloV0Dynamic) { EXPECT_EQ(op->get_output_partial_shape(0), ov::PartialShape({{1, 11}, ov::Dimension::dynamic()})); - check_static_shape(op.get(), {ov::StaticShape{10, 125, 13, 13}}, {ov::StaticShape{10, 125 * 13 * 13}}); + check_static_shape(op.get(), {StaticShape{10, 125, 13, 13}}, {StaticShape{10, 125 * 13 * 13}}); } \ No newline at end of file diff --git a/src/tests/unit/cpu/shape_inference_test/reorg_yolo_test.cpp b/src/tests/unit/cpu/shape_inference_test/reorg_yolo_test.cpp index 4109246cec8..0da3ab58c7f 100644 --- a/src/tests/unit/cpu/shape_inference_test/reorg_yolo_test.cpp +++ b/src/tests/unit/cpu/shape_inference_test/reorg_yolo_test.cpp @@ -8,6 +8,7 @@ #include "utils.hpp" using namespace ov; +using namespace ov::intel_cpu; using namespace std; TEST(StaticShapeInferenceTest, ReorgYoloV0) { @@ -15,5 +16,5 @@ TEST(StaticShapeInferenceTest, ReorgYoloV0) { auto data_param = make_shared(element::f32, ov::PartialShape{-1, -1, -1, -1}); auto op = make_shared(data_param, stride); - check_static_shape(op.get(), {ov::StaticShape{1, 64, 26, 26}}, {ov::StaticShape{1, 256, 13, 13}}); + check_static_shape(op.get(), {StaticShape{1, 64, 26, 26}}, {StaticShape{1, 256, 13, 13}}); } \ No newline at end of file diff --git a/src/tests/unit/cpu/shape_inference_test/reverse_sequence_shape_inference.cpp b/src/tests/unit/cpu/shape_inference_test/reverse_sequence_shape_inference.cpp index dd84c71e76a..60da39b76d2 100644 --- a/src/tests/unit/cpu/shape_inference_test/reverse_sequence_shape_inference.cpp +++ b/src/tests/unit/cpu/shape_inference_test/reverse_sequence_shape_inference.cpp @@ -10,6 +10,7 @@ #include using namespace ov; +using namespace ov::intel_cpu; TEST(StaticShapeInferenceTest, ReverseSequenceTest) { auto data = std::make_shared(element::f32, PartialShape{-1, -1, -1}); diff --git a/src/tests/unit/cpu/shape_inference_test/roi_align_shape_inference.cpp b/src/tests/unit/cpu/shape_inference_test/roi_align_shape_inference.cpp index 1e32fee495c..56a17f858fe 100644 --- a/src/tests/unit/cpu/shape_inference_test/roi_align_shape_inference.cpp +++ b/src/tests/unit/cpu/shape_inference_test/roi_align_shape_inference.cpp @@ -10,16 +10,17 @@ #include using namespace ov; +using namespace ov::intel_cpu; TEST(StaticShapeInferenceTest, ROIAlignTest) { const auto data = std::make_shared(element::f32, PartialShape{-1, -1, -1, -1}); const auto rois = std::make_shared(element::f32, PartialShape{-1, -1}); const auto batch_indices = std::make_shared(element::i32, PartialShape{-1}); const auto op = std::make_shared(data, rois, batch_indices, 2, 2, 1, 1.0f, "avg"); - const std::vector input_shapes = {ov::StaticShape{2, 3, 5, 5}, - ov::StaticShape{7, 4}, - ov::StaticShape{7}}; - std::vector output_shapes = {ov::StaticShape{}}; + const std::vector input_shapes = {StaticShape{2, 3, 5, 5}, + StaticShape{7, 4}, + StaticShape{7}}; + std::vector output_shapes = {StaticShape{}}; shape_inference(op.get(), input_shapes, output_shapes); ASSERT_EQ(output_shapes[0], (StaticShape{7, 3, 2, 2})); diff --git a/src/tests/unit/cpu/shape_inference_test/roll_shape_inference.cpp b/src/tests/unit/cpu/shape_inference_test/roll_shape_inference.cpp index e98e91973a2..99aaf8482a5 100644 --- a/src/tests/unit/cpu/shape_inference_test/roll_shape_inference.cpp +++ b/src/tests/unit/cpu/shape_inference_test/roll_shape_inference.cpp @@ -10,6 +10,8 @@ #include #include +using namespace ov::intel_cpu; + TEST(StaticShapeInferenceTest, RollTest) { auto arg = std::make_shared(ov::element::f32, @@ -25,10 +27,10 @@ TEST(StaticShapeInferenceTest, RollTest) { std::map> constant_data; constant_data[2] = axes_tensor; - const std::vector input_shapes = {ov::StaticShape{3, 3, 3}, - ov::StaticShape{3}, - ov::StaticShape{3}}; - std::vector output_shapes = {ov::StaticShape{}}; + const std::vector input_shapes = {StaticShape{3, 3, 3}, + StaticShape{3}, + StaticShape{3}}; + std::vector output_shapes = {StaticShape{}}; shape_inference(roll.get(), input_shapes, output_shapes, constant_data); ASSERT_EQ(output_shapes[0], input_shapes[0]); } @@ -41,10 +43,10 @@ TEST(StaticShapeInferenceTest, RollTestWithConstAxis) { auto axes = std::make_shared(ov::element::i32, ov::Shape{3}, std::vector{0, 1, -1}); auto roll = std::make_shared(arg, shift, axes); - const std::vector input_shapes = {ov::StaticShape{3, 3, 3}, - ov::StaticShape{3}, - ov::StaticShape{3}}; - std::vector output_shapes = {ov::StaticShape{}}; + const std::vector input_shapes = {StaticShape{3, 3, 3}, + StaticShape{3}, + StaticShape{3}}; + std::vector output_shapes = {StaticShape{}}; shape_inference(roll.get(), input_shapes, output_shapes); ASSERT_EQ(output_shapes[0], input_shapes[0]); } diff --git a/src/tests/unit/cpu/shape_inference_test/scatter_elements_update_shape_inference.cpp b/src/tests/unit/cpu/shape_inference_test/scatter_elements_update_shape_inference.cpp index 0db1768a668..b798c772056 100644 --- a/src/tests/unit/cpu/shape_inference_test/scatter_elements_update_shape_inference.cpp +++ b/src/tests/unit/cpu/shape_inference_test/scatter_elements_update_shape_inference.cpp @@ -11,6 +11,7 @@ #include using namespace ov; +using namespace ov::intel_cpu; TEST(StaticShapeInferenceTest, ScatterElementsUpdateTest) { auto data_shape = std::make_shared(element::i32, PartialShape{-1, -1, -1, -1}); diff --git a/src/tests/unit/cpu/shape_inference_test/scatter_nd_shape_inference.cpp b/src/tests/unit/cpu/shape_inference_test/scatter_nd_shape_inference.cpp index 7deaa4ebf65..b1457c6d240 100644 --- a/src/tests/unit/cpu/shape_inference_test/scatter_nd_shape_inference.cpp +++ b/src/tests/unit/cpu/shape_inference_test/scatter_nd_shape_inference.cpp @@ -11,6 +11,7 @@ #include using namespace ov; +using namespace ov::intel_cpu; TEST(StaticShapeInferenceTest, ScatterNDUpdateTest) { auto data_shape = std::make_shared(element::i32, PartialShape{-1, -1, -1, -1}); diff --git a/src/tests/unit/cpu/shape_inference_test/select_shape_inference.cpp b/src/tests/unit/cpu/shape_inference_test/select_shape_inference.cpp index d5f791bf579..030f43965e1 100644 --- a/src/tests/unit/cpu/shape_inference_test/select_shape_inference.cpp +++ b/src/tests/unit/cpu/shape_inference_test/select_shape_inference.cpp @@ -11,6 +11,7 @@ #include "utils/shape_inference/static_shape.hpp" using namespace ov; +using namespace ov::intel_cpu; TEST(StaticShapeInferenceTest, SelectTestBCastModeNUMPY) { auto cond = std::make_shared(element::boolean, PartialShape{}); diff --git a/src/tests/unit/cpu/shape_inference_test/shape_node_tests.cpp b/src/tests/unit/cpu/shape_inference_test/shape_node_tests.cpp index e5c3cd08256..6c5cbe3afef 100644 --- a/src/tests/unit/cpu/shape_inference_test/shape_node_tests.cpp +++ b/src/tests/unit/cpu/shape_inference_test/shape_node_tests.cpp @@ -15,7 +15,7 @@ #include using namespace ov; - +using namespace ov::intel_cpu; TEST(StaticShapeInferenceTest, ReshapeTest) { auto data = std::make_shared(element::f32, PartialShape{-1, -1, -1, -1}); diff --git a/src/tests/unit/cpu/shape_inference_test/shuffle_channels_shape_inference.cpp b/src/tests/unit/cpu/shape_inference_test/shuffle_channels_shape_inference.cpp index 8862d8d7be3..c12dfcd2ab0 100644 --- a/src/tests/unit/cpu/shape_inference_test/shuffle_channels_shape_inference.cpp +++ b/src/tests/unit/cpu/shape_inference_test/shuffle_channels_shape_inference.cpp @@ -11,6 +11,7 @@ #include "utils/shape_inference/static_shape.hpp" using namespace ov; +using namespace ov::intel_cpu; TEST(StaticShapeInferenceTest, ShuffleChannelsTest) { const auto data = std::make_shared(element::f32, PartialShape{-1, -1, -1}); diff --git a/src/tests/unit/cpu/shape_inference_test/space_to_batch_shape_inference.cpp b/src/tests/unit/cpu/shape_inference_test/space_to_batch_shape_inference.cpp index f9855334d97..7d5fc08b7d3 100644 --- a/src/tests/unit/cpu/shape_inference_test/space_to_batch_shape_inference.cpp +++ b/src/tests/unit/cpu/shape_inference_test/space_to_batch_shape_inference.cpp @@ -12,6 +12,7 @@ #include using namespace ov; +using namespace ov::intel_cpu; static std::shared_ptr build_space_to_batch( PartialShape data_shape = PartialShape::dynamic(ov::Rank(2)), diff --git a/src/tests/unit/cpu/shape_inference_test/space_to_depth_shape_inference.cpp b/src/tests/unit/cpu/shape_inference_test/space_to_depth_shape_inference.cpp index 77eaa8f352a..9e178f4565c 100644 --- a/src/tests/unit/cpu/shape_inference_test/space_to_depth_shape_inference.cpp +++ b/src/tests/unit/cpu/shape_inference_test/space_to_depth_shape_inference.cpp @@ -9,12 +9,14 @@ #include #include +using namespace ov::intel_cpu; + TEST(StaticShapeInferenceTest, SpaceToDepthTest) { auto A = std::make_shared(ov::element::f32, ov::PartialShape::dynamic(ov::Rank(4))); auto space_to_depth = std::make_shared(A, ov::op::v0::SpaceToDepth::SpaceToDepthMode::DEPTH_FIRST, 2); - const std::vector input_shapes = {ov::StaticShape{1, 12, 4, 1080, 1616}}; - std::vector output_shapes = {ov::StaticShape{}}; + const std::vector input_shapes = {StaticShape{1, 12, 4, 1080, 1616}}; + std::vector output_shapes = {StaticShape{}}; shape_inference(space_to_depth.get(), input_shapes, output_shapes); - ASSERT_EQ(output_shapes[0], (ov::StaticShape{1, 12 * 8, 4 / 2, 1080 / 2, 1616 / 2})); + ASSERT_EQ(output_shapes[0], (StaticShape{1, 12 * 8, 4 / 2, 1080 / 2, 1616 / 2})); } diff --git a/src/tests/unit/cpu/shape_inference_test/split_tests.cpp b/src/tests/unit/cpu/shape_inference_test/split_tests.cpp index a408c51dd2d..8a5cf3794c3 100644 --- a/src/tests/unit/cpu/shape_inference_test/split_tests.cpp +++ b/src/tests/unit/cpu/shape_inference_test/split_tests.cpp @@ -7,6 +7,7 @@ #include "utils.hpp" using namespace ov; +using namespace ov::intel_cpu; static std::shared_ptr build_split(PartialShape data_shape, std::initializer_list axis_value, diff --git a/src/tests/unit/cpu/shape_inference_test/strided_slice_test.cpp b/src/tests/unit/cpu/shape_inference_test/strided_slice_test.cpp index a17ee78b247..c7f3fa12d69 100644 --- a/src/tests/unit/cpu/shape_inference_test/strided_slice_test.cpp +++ b/src/tests/unit/cpu/shape_inference_test/strided_slice_test.cpp @@ -7,6 +7,7 @@ #include "utils.hpp" using namespace ov; +using namespace ov::intel_cpu; TEST(StaticShapeInferenceTest, StridedSlice1) { auto data = std::make_shared(ngraph::element::f32, ov::PartialShape::dynamic()); @@ -20,8 +21,8 @@ TEST(StaticShapeInferenceTest, StridedSlice1) { auto ss = std::make_shared(data, begin, end, stride, begin_mask, end_mask); check_static_shape(ss.get(), - {ov::StaticShape{3, 4, 5}, ov::StaticShape{3}, ov::StaticShape{3}, ov::StaticShape{3}}, - {ov::StaticShape{3, 4, 5}}); + {StaticShape{3, 4, 5}, StaticShape{3}, StaticShape{3}, StaticShape{3}}, + {StaticShape{3, 4, 5}}); } TEST(StaticShapeInferenceTest, StridedSlice2) { @@ -36,16 +37,16 @@ TEST(StaticShapeInferenceTest, StridedSlice2) { auto ss = std::make_shared(data, begin, end, stride, begin_mask, end_mask); check_static_shape(ss.get(), - {ov::StaticShape{3, 2, 3}, {1, 0, 0}, {2, 1, 3}, {1, 1, 1}}, - {ov::StaticShape{1, 1, 3}}); + {StaticShape{3, 2, 3}, {1, 0, 0}, {2, 1, 3}, {1, 1, 1}}, + {StaticShape{1, 1, 3}}); check_static_shape(ss.get(), - {ov::StaticShape{3, 2, 3}, {1, 0, 0}, {2, 2, 3}, {1, 1, 1}}, - {ov::StaticShape{1, 2, 3}}); + {StaticShape{3, 2, 3}, {1, 0, 0}, {2, 2, 3}, {1, 1, 1}}, + {StaticShape{1, 2, 3}}); check_static_shape(ss.get(), - {ov::StaticShape{3, 2, 3}, {2, 0, 0}, {3, 2, 3}, {1, 1, 2}}, - {ov::StaticShape{1, 2, 2}}); + {StaticShape{3, 2, 3}, {2, 0, 0}, {3, 2, 3}, {1, 1, 2}}, + {StaticShape{1, 2, 2}}); } TEST(StaticShapeInferenceTest, StridedSlice3) { @@ -60,8 +61,8 @@ TEST(StaticShapeInferenceTest, StridedSlice3) { auto ss = std::make_shared(data, begin, end, stride, begin_mask, end_mask); check_static_shape(ss.get(), - {ov::StaticShape{3, 2, 3}, {1, 0, 0}, {0, 0, 0}, {1, 1, 1}}, - {ov::StaticShape{2, 2, 3}}); + {StaticShape{3, 2, 3}, {1, 0, 0}, {0, 0, 0}, {1, 1, 1}}, + {StaticShape{2, 2, 3}}); } TEST(StaticShapeInferenceTest, StridedSlice4) { @@ -76,8 +77,8 @@ TEST(StaticShapeInferenceTest, StridedSlice4) { auto ss = std::make_shared(data, begin, end, stride, begin_mask, end_mask); check_static_shape(ss.get(), - {ov::StaticShape{3, 2, 3}, {0, 1, 0}, {2, 0, 0}, {1, 1, 2}}, - {ov::StaticShape{2, 1, 2}}); + {StaticShape{3, 2, 3}, {0, 1, 0}, {2, 0, 0}, {1, 1, 2}}, + {StaticShape{2, 1, 2}}); } TEST(StaticShapeInferenceTest, StridedSlice5) { @@ -92,6 +93,6 @@ TEST(StaticShapeInferenceTest, StridedSlice5) { auto ss = std::make_shared(data, begin, end, stride, begin_mask, end_mask); check_static_shape(ss.get(), - {ov::StaticShape{3, 2, 3}, {0, 0, 0}, {1, 0, 0}, {1, 1, -1}}, - {ov::StaticShape{1, 2, 3}}); + {StaticShape{3, 2, 3}, {0, 0, 0}, {1, 0, 0}, {1, 1, -1}}, + {StaticShape{1, 2, 3}}); } \ No newline at end of file diff --git a/src/tests/unit/cpu/shape_inference_test/tile_shape_inference.cpp b/src/tests/unit/cpu/shape_inference_test/tile_shape_inference.cpp index dc758f627e6..c6b79747dfd 100644 --- a/src/tests/unit/cpu/shape_inference_test/tile_shape_inference.cpp +++ b/src/tests/unit/cpu/shape_inference_test/tile_shape_inference.cpp @@ -10,6 +10,7 @@ #include using namespace ov; +using namespace ov::intel_cpu; TEST(StaticShapeInferenceTest, TileTest) { auto param0 = std::make_shared(element::f32, PartialShape{-1, -1, -1}); diff --git a/src/tests/unit/cpu/shape_inference_test/topk_test.cpp b/src/tests/unit/cpu/shape_inference_test/topk_test.cpp index 46571699537..6c9268e976e 100644 --- a/src/tests/unit/cpu/shape_inference_test/topk_test.cpp +++ b/src/tests/unit/cpu/shape_inference_test/topk_test.cpp @@ -7,6 +7,7 @@ #include "utils.hpp" using namespace ov; +using namespace ov::intel_cpu; static std::shared_ptr build_topk(PartialShape data_shape = PartialShape::dynamic(), int64_t axis = 1, diff --git a/src/tests/unit/cpu/shape_inference_test/utils.hpp b/src/tests/unit/cpu/shape_inference_test/utils.hpp index 00607122be9..b539679861e 100644 --- a/src/tests/unit/cpu/shape_inference_test/utils.hpp +++ b/src/tests/unit/cpu/shape_inference_test/utils.hpp @@ -13,18 +13,18 @@ struct TestTensor { std::shared_ptr tensor; - ov::StaticShape static_shape; + ov::intel_cpu::StaticShape static_shape; template - TestTensor(std::initializer_list values) : TestTensor(ov::StaticShape({values.size()}), values) {} + TestTensor(std::initializer_list values) : TestTensor(ov::intel_cpu::StaticShape({values.size()}), values) {} template - TestTensor(T scalar) : TestTensor(ov::StaticShape({}), {scalar}) {} + TestTensor(T scalar) : TestTensor(ov::intel_cpu::StaticShape({}), {scalar}) {} - TestTensor(ov::StaticShape shape) : static_shape(shape) {} + TestTensor(ov::intel_cpu::StaticShape shape) : static_shape(shape) {} template - TestTensor(ov::StaticShape shape, std::initializer_list values) { + TestTensor(ov::intel_cpu::StaticShape shape, std::initializer_list values) { static_shape = shape; ov::Shape s; @@ -49,9 +49,9 @@ struct TestTensor { // {Shape{2,2}, {1,2,3,4}} tensor of shape [2,2] and values (1,2,3,4) static void check_static_shape(ov::Node* op, std::initializer_list inputs, - std::initializer_list expect_shapes) { - std::vector output_shapes; - std::vector input_shapes; + std::initializer_list expect_shapes) { + std::vector output_shapes; + std::vector input_shapes; std::map> constData; int index = 0; @@ -62,7 +62,7 @@ static void check_static_shape(ov::Node* op, index++; }); - output_shapes.resize(expect_shapes.size(), ov::StaticShape{}); + output_shapes.resize(expect_shapes.size(), ov::intel_cpu::StaticShape{}); shape_inference(op, input_shapes, output_shapes, constData); diff --git a/src/tests/unit/cpu/shape_inference_test/variadic_split_tests.cpp b/src/tests/unit/cpu/shape_inference_test/variadic_split_tests.cpp index 4057fadf4ed..d73119593c8 100644 --- a/src/tests/unit/cpu/shape_inference_test/variadic_split_tests.cpp +++ b/src/tests/unit/cpu/shape_inference_test/variadic_split_tests.cpp @@ -7,6 +7,7 @@ #include "utils.hpp" using namespace ov; +using namespace ov::intel_cpu; static std::shared_ptr build_variadic_split(PartialShape data_shape, std::initializer_list axis_value, diff --git a/tools/benchmark_tool/openvino/tools/benchmark/main.py b/tools/benchmark_tool/openvino/tools/benchmark/main.py index 78037e8ecd6..fc17da710bd 100644 --- a/tools/benchmark_tool/openvino/tools/benchmark/main.py +++ b/tools/benchmark_tool/openvino/tools/benchmark/main.py @@ -66,7 +66,7 @@ def run(args): benchmark = Benchmark(args.target_device, args.number_infer_requests, args.number_iterations, args.time, args.api_type, args.inference_only) - ## CPU (MKLDNN) extensions + ## CPU (OneDNN) extensions if CPU_DEVICE_NAME in device_name and args.path_to_extension: benchmark.add_extension(path_to_extension=args.path_to_extension) diff --git a/tools/cross_check_tool/README.md b/tools/cross_check_tool/README.md index ee1cbb06ba5..c0c43cb2f9b 100644 --- a/tools/cross_check_tool/README.md +++ b/tools/cross_check_tool/README.md @@ -61,7 +61,7 @@ Plugin specific arguments: --reference_config REFERENCE_CONFIG, -ref_conf REFERENCE_CONFIG Path to config file for -ref_d or -reference_device device plugin - -l L Required for MKLDNN (CPU)-targeted custom layers. + -l L Required for (CPU)-targeted custom layers. Comma separated paths to a shared libraries with the kernels implementation. diff --git a/tools/cross_check_tool/openvino/tools/cross_check_tool/utils.py b/tools/cross_check_tool/openvino/tools/cross_check_tool/utils.py index 21a736954d7..5ca24a3d81f 100644 --- a/tools/cross_check_tool/openvino/tools/cross_check_tool/utils.py +++ b/tools/cross_check_tool/openvino/tools/cross_check_tool/utils.py @@ -194,7 +194,7 @@ def build_parser(): plugin.add_argument('--reference_config', '-ref_conf', type=str, action=ExistingFileAction, help='Path to config file for -ref_d or -reference_device device plugin') plugin.add_argument('-l', type=str, action=ExistingFileAction, - help='Required for MKLDNN (CPU)-targeted custom layers. Comma separated paths to a shared' + help='Required for (CPU)-targeted custom layers. Comma separated paths to a shared' ' libraries with the kernels implementation.') modes = parser.add_argument_group('CCT mode arguments')