From e04ca1516dc9c8f8cb4539546f60a9cfc230141c Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Wed, 8 Dec 2021 11:03:24 +0300 Subject: [PATCH] [GPU] Plugin refactoring (#9068) * [GPU] Plugin files renaming * [GPU] Updated plugin namespace to ov::runtime::intel_gpu * [GPU] Renamed plugin classes to get rid of cldnn prefix --- src/plugins/intel_gpu/CMakeLists.txt | 8 +- .../intel_gpu/plugin/async_infer_request.hpp | 36 +++ .../plugin/cldnn_async_infer_request.h | 32 --- .../include/intel_gpu/plugin/cldnn_itt.h | 20 -- ...{cldnn_common_utils.h => common_utils.hpp} | 14 +- ...xecutable_network.h => compiled_model.hpp} | 22 +- ...{cldnn_custom_layer.h => custom_layer.hpp} | 20 +- .../{cldnn_config.h => device_config.hpp} | 12 +- .../plugin/{cldnn_graph.h => graph.hpp} | 24 +- ...ldnn_infer_request.h => infer_request.hpp} | 34 +-- .../include/intel_gpu/plugin/itt.hpp | 24 ++ .../plugin/{cldnn_engine.h => plugin.hpp} | 36 +-- ...rimitives_list.hpp => primitives_list.hpp} | 4 +- .../plugin/{cldnn_program.h => program.hpp} | 14 +- ...nn_remote_context.h => remote_context.hpp} | 219 +++++++++--------- .../plugin/{simple_math.h => simple_math.hpp} | 0 ...ipeline.h => transformations_pipeline.hpp} | 10 +- ...er_request.cpp => async_infer_request.cpp} | 30 ++- ...cutable_network.cpp => compiled_model.cpp} | 90 ++++--- ...ldnn_custom_layer.cpp => custom_layer.cpp} | 36 +-- .../{cldnn_config.cpp => device_config.cpp} | 18 +- .../src/plugin/{cldnn_graph.cpp => graph.cpp} | 68 +++--- ...nn_infer_request.cpp => infer_request.cpp} | 152 ++++++------ .../src/plugin/ops/batch_to_space.cpp | 14 +- .../intel_gpu/src/plugin/ops/broadcast.cpp | 16 +- .../intel_gpu/src/plugin/ops/concat.cpp | 14 +- .../intel_gpu/src/plugin/ops/constant.cpp | 12 +- .../intel_gpu/src/plugin/ops/convert.cpp | 12 +- .../intel_gpu/src/plugin/ops/convolution.cpp | 28 ++- .../src/plugin/ops/ctc_greedy_decoder.cpp | 20 +- .../intel_gpu/src/plugin/ops/cum_sum.cpp | 14 +- .../intel_gpu/src/plugin/ops/custom.cpp | 20 +- .../src/plugin/ops/depth_to_space.cpp | 12 +- .../src/plugin/ops/detection_output.cpp | 12 +- .../intel_gpu/src/plugin/ops/eltwise.cpp | 14 +- .../src/plugin/ops/embedding_bag.cpp | 24 +- ...mental_detectron_roi_feature_extractor.cpp | 14 +- .../src/plugin/ops/extract_image_patches.cpp | 14 +- .../src/plugin/ops/fake_quantize.cpp | 12 +- .../intel_gpu/src/plugin/ops/gather tree.cpp | 14 +- .../intel_gpu/src/plugin/ops/gather.cpp | 16 +- .../src/plugin/ops/gather_elements.cpp | 16 +- .../intel_gpu/src/plugin/ops/gather_nd.cpp | 12 +- src/plugins/intel_gpu/src/plugin/ops/grn.cpp | 12 +- .../intel_gpu/src/plugin/ops/interpolate.cpp | 16 +- src/plugins/intel_gpu/src/plugin/ops/loop.cpp | 16 +- src/plugins/intel_gpu/src/plugin/ops/lrn.cpp | 12 +- .../intel_gpu/src/plugin/ops/matmul.cpp | 18 +- src/plugins/intel_gpu/src/plugin/ops/mvn.cpp | 12 +- .../src/plugin/ops/non_max_suppression.cpp | 18 +- .../intel_gpu/src/plugin/ops/normalize_l2.cpp | 12 +- .../intel_gpu/src/plugin/ops/one_hot.cpp | 14 +- src/plugins/intel_gpu/src/plugin/ops/pad.cpp | 12 +- .../intel_gpu/src/plugin/ops/parameter.cpp | 32 +-- .../intel_gpu/src/plugin/ops/pooling.cpp | 16 +- .../intel_gpu/src/plugin/ops/prior_box.cpp | 12 +- .../intel_gpu/src/plugin/ops/proposal.cpp | 14 +- .../intel_gpu/src/plugin/ops/reduce.cpp | 12 +- .../intel_gpu/src/plugin/ops/region_yolo.cpp | 12 +- .../intel_gpu/src/plugin/ops/reorg_yolo.cpp | 12 +- .../intel_gpu/src/plugin/ops/reshape.cpp | 14 +- .../intel_gpu/src/plugin/ops/result.cpp | 12 +- .../src/plugin/ops/reverse_sequence.cpp | 12 +- src/plugins/intel_gpu/src/plugin/ops/rnn.cpp | 14 +- .../intel_gpu/src/plugin/ops/roi_align.cpp | 12 +- .../intel_gpu/src/plugin/ops/roi_pooling.cpp | 12 +- .../plugin/ops/scatter_elements_update.cpp | 14 +- .../src/plugin/ops/scatter_nd_update.cpp | 12 +- .../src/plugin/ops/scatter_update.cpp | 14 +- .../intel_gpu/src/plugin/ops/select.cpp | 14 +- .../src/plugin/ops/shuffle_channels.cpp | 12 +- .../intel_gpu/src/plugin/ops/softmax.cpp | 12 +- .../src/plugin/ops/space_to_batch.cpp | 14 +- .../src/plugin/ops/space_to_depth.cpp | 12 +- .../intel_gpu/src/plugin/ops/split.cpp | 16 +- .../src/plugin/ops/strided_slice.cpp | 22 +- .../src/plugin/ops/tensor_iterator.cpp | 17 +- src/plugins/intel_gpu/src/plugin/ops/tile.cpp | 14 +- src/plugins/intel_gpu/src/plugin/ops/topk.cpp | 14 +- .../intel_gpu/src/plugin/ops/transpose.cpp | 12 +- .../intel_gpu/src/plugin/ops/unary.cpp | 10 +- .../plugin/{cldnn_engine.cpp => plugin.cpp} | 130 ++++++----- .../plugin/{cldnn_program.cpp => program.cpp} | 20 +- ..._remote_context.cpp => remote_context.cpp} | 62 ++--- .../intel_gpu/src/plugin/simple_math.cpp | 2 +- ...eline.cpp => transformations_pipeline.cpp} | 18 +- 86 files changed, 1160 insertions(+), 842 deletions(-) create mode 100644 src/plugins/intel_gpu/include/intel_gpu/plugin/async_infer_request.hpp delete mode 100644 src/plugins/intel_gpu/include/intel_gpu/plugin/cldnn_async_infer_request.h delete mode 100644 src/plugins/intel_gpu/include/intel_gpu/plugin/cldnn_itt.h rename src/plugins/intel_gpu/include/intel_gpu/plugin/{cldnn_common_utils.h => common_utils.hpp} (96%) rename src/plugins/intel_gpu/include/intel_gpu/plugin/{cldnn_executable_network.h => compiled_model.hpp} (71%) rename src/plugins/intel_gpu/include/intel_gpu/plugin/{cldnn_custom_layer.h => custom_layer.hpp} (85%) rename src/plugins/intel_gpu/include/intel_gpu/plugin/{cldnn_config.h => device_config.hpp} (95%) rename src/plugins/intel_gpu/include/intel_gpu/plugin/{cldnn_graph.h => graph.hpp} (84%) rename src/plugins/intel_gpu/include/intel_gpu/plugin/{cldnn_infer_request.h => infer_request.hpp} (77%) create mode 100644 src/plugins/intel_gpu/include/intel_gpu/plugin/itt.hpp rename src/plugins/intel_gpu/include/intel_gpu/plugin/{cldnn_engine.h => plugin.hpp} (78%) rename src/plugins/intel_gpu/include/intel_gpu/plugin/{cldnn_primitives_list.hpp => primitives_list.hpp} (99%) rename src/plugins/intel_gpu/include/intel_gpu/plugin/{cldnn_program.h => program.hpp} (97%) rename src/plugins/intel_gpu/include/intel_gpu/plugin/{cldnn_remote_context.h => remote_context.hpp} (68%) rename src/plugins/intel_gpu/include/intel_gpu/plugin/{simple_math.h => simple_math.hpp} (100%) rename src/plugins/intel_gpu/include/intel_gpu/plugin/{cldnn_transformations_pipeline.h => transformations_pipeline.hpp} (72%) rename src/plugins/intel_gpu/src/plugin/{cldnn_async_infer_request.cpp => async_infer_request.cpp} (57%) rename src/plugins/intel_gpu/src/plugin/{cldnn_executable_network.cpp => compiled_model.cpp} (63%) rename src/plugins/intel_gpu/src/plugin/{cldnn_custom_layer.cpp => custom_layer.cpp} (90%) rename src/plugins/intel_gpu/src/plugin/{cldnn_config.cpp => device_config.cpp} (98%) rename src/plugins/intel_gpu/src/plugin/{cldnn_graph.cpp => graph.cpp} (94%) rename src/plugins/intel_gpu/src/plugin/{cldnn_infer_request.cpp => infer_request.cpp} (88%) rename src/plugins/intel_gpu/src/plugin/{cldnn_engine.cpp => plugin.cpp} (88%) rename src/plugins/intel_gpu/src/plugin/{cldnn_program.cpp => program.cpp} (96%) rename src/plugins/intel_gpu/src/plugin/{cldnn_remote_context.cpp => remote_context.cpp} (86%) rename src/plugins/intel_gpu/src/plugin/{cldnn_transformations_pipeline.cpp => transformations_pipeline.cpp} (97%) diff --git a/src/plugins/intel_gpu/CMakeLists.txt b/src/plugins/intel_gpu/CMakeLists.txt index 4c05edaef13..e109575acfb 100644 --- a/src/plugins/intel_gpu/CMakeLists.txt +++ b/src/plugins/intel_gpu/CMakeLists.txt @@ -16,14 +16,14 @@ if(ENABLE_GPU_DEBUG_CAPS) add_definitions(-DGPU_DEBUG_CONFIG=1) endif() -file(GLOB_RECURSE PLUGIN_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/src/plugin/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/include/intel_gpu/plugin/*.h) +file(GLOB_RECURSE PLUGIN_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/src/plugin/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/include/intel_gpu/plugin/*.hpp) -addVersionDefines(src/plugin/cldnn_engine.cpp CI_BUILD_NUMBER CLDNN_VERSION) +addVersionDefines(src/plugin/plugin.cpp CI_BUILD_NUMBER) ie_add_plugin(NAME ${TARGET_NAME} DEVICE_NAME "GPU" SOURCES ${PLUGIN_SOURCES} - VERSION_DEFINES_FOR src/plugin/cldnn_engine.cpp) + VERSION_DEFINES_FOR src/plugin/plugin.cpp) target_compile_options(${TARGET_NAME} PRIVATE $<$:$,/Os,-Os>>) @@ -35,7 +35,7 @@ target_link_libraries(${TARGET_NAME} PRIVATE ov_intel_gpu_graph ngraph) target_include_directories(${TARGET_NAME} PRIVATE - ${CMAKE_CURRENT_SOURCE_DIR}/include/intel_gpu/plugin/ + ${CMAKE_CURRENT_SOURCE_DIR}/include/ $) set_target_properties(${TARGET_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO}) diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/async_infer_request.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/async_infer_request.hpp new file mode 100644 index 00000000000..f864aa365b7 --- /dev/null +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/async_infer_request.hpp @@ -0,0 +1,36 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include "intel_gpu/plugin/infer_request.hpp" + +namespace ov { +namespace runtime { +namespace intel_gpu { + +class AsyncInferRequest : public InferenceEngine::AsyncInferRequestThreadSafeDefault { +public: + using Parent = InferenceEngine::AsyncInferRequestThreadSafeDefault; + AsyncInferRequest(const InferRequest::Ptr &inferRequest, + const InferenceEngine::ITaskExecutor::Ptr& taskExecutor, + const InferenceEngine::ITaskExecutor::Ptr& waitExecutor, + const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor); + + ~AsyncInferRequest(); + + void Infer_ThreadUnsafe() override; + void StartAsync_ThreadUnsafe() override; + +private: + InferRequest::Ptr _inferRequest; + InferenceEngine::ITaskExecutor::Ptr _waitExecutor; +}; + +} // namespace intel_gpu +} // namespace runtime +} // namespace ov diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/cldnn_async_infer_request.h b/src/plugins/intel_gpu/include/intel_gpu/plugin/cldnn_async_infer_request.h deleted file mode 100644 index d9d90d1db47..00000000000 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/cldnn_async_infer_request.h +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include -#include -#include "cldnn_infer_request.h" - -namespace CLDNNPlugin { - -class CLDNNAsyncInferRequest : public InferenceEngine::AsyncInferRequestThreadSafeDefault { -public: - using Parent = InferenceEngine::AsyncInferRequestThreadSafeDefault; - CLDNNAsyncInferRequest(const CLDNNInferRequest::Ptr &inferRequest, - const InferenceEngine::ITaskExecutor::Ptr& taskExecutor, - const InferenceEngine::ITaskExecutor::Ptr& waitExecutor, - const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor); - - ~CLDNNAsyncInferRequest(); - - void Infer_ThreadUnsafe() override; - void StartAsync_ThreadUnsafe() override; - -private: - CLDNNInferRequest::Ptr _inferRequest; - InferenceEngine::ITaskExecutor::Ptr _waitExecutor; -}; - -} // namespace CLDNNPlugin diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/cldnn_itt.h b/src/plugins/intel_gpu/include/intel_gpu/plugin/cldnn_itt.h deleted file mode 100644 index 5070eeb9099..00000000000 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/cldnn_itt.h +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -/** - * @brief Defines openvino domains for tracing - * @file cldnn_itt.h - */ - -#pragma once - -#include - -namespace CLDNNPlugin { -namespace itt { -namespace domains { - OV_ITT_DOMAIN(CLDNNPlugin); -} -} -} diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/cldnn_common_utils.h b/src/plugins/intel_gpu/include/intel_gpu/plugin/common_utils.hpp similarity index 96% rename from src/plugins/intel_gpu/include/intel_gpu/plugin/cldnn_common_utils.h rename to src/plugins/intel_gpu/include/intel_gpu/plugin/common_utils.hpp index 8c5e5959c2c..a1ef3160478 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/cldnn_common_utils.h +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/common_utils.hpp @@ -9,11 +9,13 @@ #include "ngraph/type/element_type.hpp" -namespace CLDNNPlugin { +namespace ov { +namespace runtime { +namespace intel_gpu { #define TensorValue(val) static_cast(val) -const auto CldnnTensorFromIEDims = [](const InferenceEngine::SizeVector& dims, int def = 1) { +inline cldnn::tensor tensor_from_dims(const InferenceEngine::SizeVector& dims, int def = 1) { switch (dims.size()) { case 0: return cldnn::tensor(cldnn::batch(def), cldnn::feature(def), cldnn::spatial(def, def)); case 1: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(def), cldnn::spatial(def, def)); @@ -22,9 +24,9 @@ const auto CldnnTensorFromIEDims = [](const InferenceEngine::SizeVector& dims, i case 4: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(dims[3], dims[2])); case 5: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(dims[4], dims[3], dims[2])); case 6: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(dims[5], dims[4], dims[3], dims[2])); - default: IE_THROW() << "Invalid dimensions size(" << dims.size() << ") for clDNN tensor"; + default: IE_THROW() << "Invalid dimensions size(" << dims.size() << ") for gpu tensor"; } -}; +} inline cldnn::data_types DataTypeFromPrecision(InferenceEngine::Precision p) { switch (p) { @@ -185,4 +187,6 @@ inline std::vector ConvertPermuteOrder(const std::vector& ie return cldnn_order; } -} // namespace CLDNNPlugin +} // namespace intel_gpu +} // namespace runtime +} // namespace ov diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/cldnn_executable_network.h b/src/plugins/intel_gpu/include/intel_gpu/plugin/compiled_model.hpp similarity index 71% rename from src/plugins/intel_gpu/include/intel_gpu/plugin/cldnn_executable_network.h rename to src/plugins/intel_gpu/include/intel_gpu/plugin/compiled_model.hpp index 7ecb72a738f..e13839e1006 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/cldnn_executable_network.h +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/compiled_model.hpp @@ -13,17 +13,19 @@ #include "ie_blob.h" #include "cpp/ie_cnn_network.h" #include -#include "cldnn_graph.h" -#include "cldnn_config.h" -#include "cldnn_remote_context.h" +#include "intel_gpu/plugin/graph.hpp" +#include "intel_gpu/plugin/device_config.hpp" +#include "intel_gpu/plugin/remote_context.hpp" -namespace CLDNNPlugin { +namespace ov { +namespace runtime { +namespace intel_gpu { -class CLDNNExecNetwork : public InferenceEngine::ExecutableNetworkThreadSafeDefault { +class CompiledModel : public InferenceEngine::ExecutableNetworkThreadSafeDefault { public: - typedef std::shared_ptr Ptr; + typedef std::shared_ptr Ptr; - CLDNNExecNetwork(InferenceEngine::CNNNetwork &network, std::shared_ptr context, Config config); + CompiledModel(InferenceEngine::CNNNetwork &network, std::shared_ptr context, Config config); std::shared_ptr GetExecGraphInfo() override; InferenceEngine::IInferRequestInternal::Ptr CreateInferRequest() override; @@ -36,11 +38,13 @@ public: InferenceEngine::Parameter GetConfig(const std::string &name) const override; std::shared_ptr GetContext() const override; - std::vector> m_graphs; + std::vector> m_graphs; InferenceEngine::gpu::ClContext::Ptr m_context; Config m_config; InferenceEngine::ITaskExecutor::Ptr m_taskExecutor; InferenceEngine::ITaskExecutor::Ptr m_waitExecutor; }; -}; // namespace CLDNNPlugin +} // namespace intel_gpu +} // namespace runtime +} // namespace ov diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/cldnn_custom_layer.h b/src/plugins/intel_gpu/include/intel_gpu/plugin/custom_layer.hpp similarity index 85% rename from src/plugins/intel_gpu/include/intel_gpu/plugin/cldnn_custom_layer.h rename to src/plugins/intel_gpu/include/intel_gpu/plugin/custom_layer.hpp index 39bc74282a0..fcc1bc804f3 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/cldnn_custom_layer.h +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/custom_layer.hpp @@ -12,15 +12,17 @@ #include "pugixml.hpp" #include "intel_gpu/runtime/tensor.hpp" -namespace CLDNNPlugin { +namespace ov { +namespace runtime { +namespace intel_gpu { -using CLDNNCustomLayerPtr = std::shared_ptr; -using CLDNNCustomLayerMap = std::map; -class CLDNNCustomLayer{ +using CustomLayerPtr = std::shared_ptr; +using CustomLayerMap = std::map; +class CustomLayer{ public: static void LoadFromFile( const std::string configFile, - CLDNNCustomLayerMap& customLayers, + CustomLayerMap& customLayers, bool can_be_missed = false); typedef enum { @@ -57,8 +59,8 @@ public: int InputDimSourceIndex() { return m_wgDimInputIdx; } protected: - CLDNNCustomLayer() : m_wgDimInputIdx(0) {} - explicit CLDNNCustomLayer(const std::string dirname) : m_configDir(dirname), m_wgDimInputIdx(0) {} + CustomLayer() : m_wgDimInputIdx(0) {} + explicit CustomLayer(const std::string dirname) : m_configDir(dirname), m_wgDimInputIdx(0) {} bool Error() const { return m_ErrorMessage.length() > 0; } void LoadSingleLayer(const pugi::xml_node& node); @@ -82,4 +84,6 @@ protected: std::string m_ErrorMessage; }; -}; // namespace CLDNNPlugin +} // namespace intel_gpu +} // namespace runtime +} // namespace ov diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/cldnn_config.h b/src/plugins/intel_gpu/include/intel_gpu/plugin/device_config.hpp similarity index 95% rename from src/plugins/intel_gpu/include/intel_gpu/plugin/cldnn_config.h rename to src/plugins/intel_gpu/include/intel_gpu/plugin/device_config.hpp index 202920bef95..a20afc8b973 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/cldnn_config.h +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/device_config.hpp @@ -7,12 +7,14 @@ #include #include -#include "cldnn_custom_layer.h" +#include "intel_gpu/plugin/custom_layer.hpp" #include #include "intel_gpu/graph/network.hpp" #include -namespace CLDNNPlugin { +namespace ov { +namespace runtime { +namespace intel_gpu { struct Config { Config(std::string device_id = "0") : device_id(device_id), @@ -61,7 +63,7 @@ struct Config { cldnn::priority_mode_types queuePriority; cldnn::throttle_mode_types queueThrottle; int max_dynamic_batch; - CLDNNCustomLayerMap customLayers; + CustomLayerMap customLayers; cldnn::tuning_config_options tuningConfig; std::string graph_dumps_dir; std::string sources_dumps_dir; @@ -93,4 +95,6 @@ private: std::map configs; }; -} // namespace CLDNNPlugin +} // namespace intel_gpu +} // namespace runtime +} // namespace ov diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/cldnn_graph.h b/src/plugins/intel_gpu/include/intel_gpu/plugin/graph.hpp similarity index 84% rename from src/plugins/intel_gpu/include/intel_gpu/plugin/cldnn_graph.h rename to src/plugins/intel_gpu/include/intel_gpu/plugin/graph.hpp index 6109898b0b1..fd51e38c483 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/cldnn_graph.h +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/graph.hpp @@ -21,24 +21,26 @@ #include "intel_gpu/graph/topology.hpp" #include -#include "cldnn_custom_layer.h" -#include "cldnn_config.h" -#include "cldnn_remote_context.h" -#include "cldnn_program.h" +#include "intel_gpu/plugin/custom_layer.hpp" +#include "intel_gpu/plugin/device_config.hpp" +#include "intel_gpu/plugin/remote_context.hpp" +#include "intel_gpu/plugin/program.hpp" -namespace CLDNNPlugin { +namespace ov { +namespace runtime { +namespace intel_gpu { -class CLDNNGraph { +class Graph { public: enum class Stage : uint32_t { PREPROC = 1, EXECUTE = 2, POSTPROC = 4 }; - typedef std::shared_ptr Ptr; + typedef std::shared_ptr Ptr; - CLDNNGraph(InferenceEngine::CNNNetwork& network, InferenceEngine::gpu::ClContext::Ptr context, Config config, uint16_t stream_id = 0); - explicit CLDNNGraph(std::shared_ptr graph, uint16_t stream_id = 0); + Graph(InferenceEngine::CNNNetwork& network, InferenceEngine::gpu::ClContext::Ptr context, Config config, uint16_t stream_id = 0); + explicit Graph(std::shared_ptr graph, uint16_t stream_id = 0); std::shared_ptr GetExecGraphInfo(); bool IsLoaded() const; @@ -102,4 +104,6 @@ protected: bool filter_const_primitives = true); }; -} // namespace CLDNNPlugin +} // namespace intel_gpu +} // namespace runtime +} // namespace ov diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/cldnn_infer_request.h b/src/plugins/intel_gpu/include/intel_gpu/plugin/infer_request.hpp similarity index 77% rename from src/plugins/intel_gpu/include/intel_gpu/plugin/cldnn_infer_request.h rename to src/plugins/intel_gpu/include/intel_gpu/plugin/infer_request.hpp index ae152855b45..d5aeacf95c3 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/cldnn_infer_request.h +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/infer_request.hpp @@ -9,21 +9,23 @@ #include #include #include -#include "cldnn_graph.h" +#include "intel_gpu/plugin/graph.hpp" #include -namespace CLDNNPlugin { +namespace ov { +namespace runtime { +namespace intel_gpu { struct buf_info { size_t buf_offset; size_t buf_size; }; -class CLDNNExecNetwork; +class CompiledModel; -class CLDNNInferRequest : public InferenceEngine::IInferRequestInternal { +class InferRequest : public InferenceEngine::IInferRequestInternal { public: - using Ptr = std::shared_ptr; + using Ptr = std::shared_ptr; // make sure all blobs and cldnn::memory objects // are in place and valid void checkBlobs() override; @@ -31,21 +33,21 @@ public: std::map GetPerformanceCounts() const override; - CLDNNInferRequest(InferenceEngine::InputsDataMap networkInputs, InferenceEngine::OutputsDataMap networkOutputs, - const std::shared_ptr& execNetwork); - CLDNNInferRequest(const std::vector>& inputs, - const std::vector>& outputs, - const std::shared_ptr& execNetwork); + InferRequest(InferenceEngine::InputsDataMap networkInputs, InferenceEngine::OutputsDataMap networkOutputs, + const std::shared_ptr& execNetwork); + InferRequest(const std::vector>& inputs, + const std::vector>& outputs, + const std::shared_ptr& execNetwork); - CLDNNInferRequest(const CLDNNInferRequest &) = delete; + InferRequest(const InferRequest &) = delete; - virtual ~CLDNNInferRequest() = default; + virtual ~InferRequest() = default; InferenceEngine::Blob::Ptr GetBlob(const std::string& name) override; void SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr &data) override; void SetBatch(int batch = -1) override; - void SetGraph(std::shared_ptr graph); + void SetGraph(std::shared_ptr graph); void EnableProfiling() { m_useProfiling = true; } void EnableStreams() { m_useStreams = true; } @@ -73,7 +75,7 @@ private: bool m_useProfiling = false; bool m_useStreams = false; bool m_useExternalQueue = false; - std::shared_ptr m_graph; + std::shared_ptr m_graph; // dynamic batch stuff std::map> batchInputs; @@ -102,4 +104,6 @@ private: std::vector> internal_outputs_dynamic; }; -}; // namespace CLDNNPlugin +} // namespace intel_gpu +} // namespace runtime +} // namespace ov diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/itt.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/itt.hpp new file mode 100644 index 00000000000..ba31ab5b7ed --- /dev/null +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/itt.hpp @@ -0,0 +1,24 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/** + * @brief Defines openvino domains for tracing + * @file itt.hpp + */ + +#pragma once + +#include + +namespace ov { +namespace runtime { +namespace intel_gpu { +namespace itt { +namespace domains { + OV_ITT_DOMAIN(intel_gpu_plugin); +} // namespace domains +} // namespace itt +} // namespace intel_gpu +} // namespace runtime +} // namespace ov diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/cldnn_engine.h b/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp similarity index 78% rename from src/plugins/intel_gpu/include/intel_gpu/plugin/cldnn_engine.h rename to src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp index ed2270fa17f..52f4091202b 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/cldnn_engine.h +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp @@ -10,14 +10,16 @@ #include "intel_gpu/runtime/engine.hpp" #include #include -#include "cldnn_remote_context.h" +#include "intel_gpu/plugin/remote_context.hpp" -namespace CLDNNPlugin { +namespace ov { +namespace runtime { +namespace intel_gpu { -using CLDNNCustomLayerPtr = std::shared_ptr; +using CustomLayerPtr = std::shared_ptr; -class clDNNEngine : public InferenceEngine::IInferencePlugin, - public InferenceEngine::gpu::details::param_map_obj_getter { +class Plugin : public InferenceEngine::IInferencePlugin, + public InferenceEngine::gpu::details::param_map_obj_getter { struct impl; std::shared_ptr _impl; bool streamsSet = false; @@ -26,23 +28,23 @@ class clDNNEngine : public InferenceEngine::IInferencePlugin, // key: device_id, value: cldnn device std::map device_map; // key: cldnn context, value: memory statistics - mutable std::map> statistics_map; + mutable std::map> statistics_map; mutable std::mutex engine_mutex; - mutable CLDNNRemoteCLContext::Ptr m_defaultContext; + mutable RemoteCLContext::Ptr m_defaultContext; cldnn::device_info GetDeviceInfo(const std::map &config) const; InferenceEngine::CNNNetwork CloneAndTransformNetwork(const InferenceEngine::CNNNetwork& network, - const CLDNNPlugin::Config& config) const; + const Config& config) const; std::map ConvertPerfHintsToConfig(const std::map& network_config, - const CLDNNPlugin::Config& plugin_config) const; + const Config& plugin_config) const; void RegisterPrimitives(); void UpdateConfig(Config& conf, const InferenceEngine::CNNNetwork &network, const std::map ¶ms) const; - void UpdateStatistics(const CLDNNRemoteCLContext::Ptr& context) const; + void UpdateStatistics(const RemoteCLContext::Ptr& context) const; public: - clDNNEngine(); + Plugin(); InferenceEngine::IExecutableNetworkInternal::Ptr LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std::map &config) override; @@ -61,7 +63,7 @@ public: std::shared_ptr CreateContext(const InferenceEngine::ParamMap& params) override; std::shared_ptr GetDefaultContext(const InferenceEngine::ParamMap& params) override; - struct clDNNEngineParams { + struct PluginParams { cldnn::queue_types queue_type; cldnn::engine_types engine_type; cldnn::runtime_types runtime_type; @@ -69,9 +71,9 @@ public: InferenceEngine::ITaskExecutor::Ptr task_executor; }; - static clDNNEngineParams GetEngineParams(const Config& config, const cldnn::device::ptr& dev, - InferenceEngine::gpu_handle_param external_queue = nullptr) { - clDNNEngineParams params; + static PluginParams GetParams(const Config& config, const cldnn::device::ptr& dev, + InferenceEngine::gpu_handle_param external_queue = nullptr) { + PluginParams params; params.engine_type = cldnn::engine_types::ocl; params.runtime_type = cldnn::runtime_types::ocl; if (external_queue) { @@ -87,4 +89,6 @@ public: } }; -}; // namespace CLDNNPlugin +} // namespace intel_gpu +} // namespace runtime +} // namespace ov diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/cldnn_primitives_list.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp similarity index 99% rename from src/plugins/intel_gpu/include/intel_gpu/plugin/cldnn_primitives_list.hpp rename to src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp index 15d7df5fd98..3becbec9ffb 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/cldnn_primitives_list.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp @@ -61,6 +61,7 @@ REGISTER_FACTORY(v0, Squeeze); REGISTER_FACTORY(v0, ShuffleChannels); REGISTER_FACTORY(v0, Tan); REGISTER_FACTORY(v0, Tanh); +REGISTER_FACTORY(v0, TensorIterator); REGISTER_FACTORY(v0, Tile); REGISTER_FACTORY(v0, Unsqueeze); @@ -88,7 +89,6 @@ REGISTER_FACTORY(v0, Unsqueeze); // REGISTER_FACTORY(v0, Range); // REGISTER_FACTORY(v0, RNNCell); // REGISTER_FACTORY(v0, ShapeOf); -REGISTER_FACTORY(v0, TensorIterator); // ------------------------------ Supported v1 ops ------------------------------ // REGISTER_FACTORY(v1, Add); @@ -156,6 +156,7 @@ REGISTER_FACTORY(v3, EmbeddingBagOffsetsSum); REGISTER_FACTORY(v3, EmbeddingBagPackedSum); REGISTER_FACTORY(v3, EmbeddingSegmentsSum); REGISTER_FACTORY(v3, ExtractImagePatches); +REGISTER_FACTORY(v3, ROIAlign); REGISTER_FACTORY(v3, ScatterUpdate); REGISTER_FACTORY(v3, ScatterElementsUpdate); REGISTER_FACTORY(v3, ScatterNDUpdate); @@ -166,7 +167,6 @@ REGISTER_FACTORY(v3, ScatterNDUpdate); // REGISTER_FACTORY(v3, Bucketize); // REGISTER_FACTORY(v3, GRUCell); // REGISTER_FACTORY(v3, NonZero); - REGISTER_FACTORY(v3, ROIAlign); // REGISTER_FACTORY(v3, ReadValue); // REGISTER_FACTORY(v3, ShapeOf); // REGISTER_FACTORY(v3, TopK); diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/cldnn_program.h b/src/plugins/intel_gpu/include/intel_gpu/plugin/program.hpp similarity index 97% rename from src/plugins/intel_gpu/include/intel_gpu/plugin/cldnn_program.h rename to src/plugins/intel_gpu/include/intel_gpu/plugin/program.hpp index e67c0574763..e365826653e 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/cldnn_program.h +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/program.hpp @@ -15,7 +15,7 @@ #include #include -#include "cldnn_config.h" +#include "intel_gpu/plugin/device_config.hpp" #include "intel_gpu/runtime/engine.hpp" #include "intel_gpu/graph/topology.hpp" @@ -40,7 +40,9 @@ void __register ## _ ## op_name ## _ ## op_version() { }); \ } -namespace CLDNNPlugin { +namespace ov { +namespace runtime { +namespace intel_gpu { std::string layer_type_lower(const ngraph::Node* op); std::string layer_type_name_ID(const ngraph::Node* op); @@ -135,7 +137,7 @@ public: template void AddPrimitive(PType prim) { if (m_topology == nullptr) { - IE_THROW() << "m_topology object was not created in clDNNPlugin::Program"; + IE_THROW() << "m_topology object was not created in ov::runtime::intel_gpu::Program"; } m_topology->add(prim); @@ -172,11 +174,13 @@ private: void ChangeInputBatch(int batch); }; -void CreateCustomOp(Program& p, const std::shared_ptr& node, CLDNNCustomLayerPtr customLayer); +void CreateCustomOp(Program& p, const std::shared_ptr& node, CustomLayerPtr customLayer); void CreateUnaryEltwiseOp(Program& p, const std::shared_ptr& node, cldnn::activation_func func, cldnn::activation_additional_params params); void CreateElementwiseOp(Program& p, const std::shared_ptr& node, cldnn::eltwise_mode mode); bool IsNodeOnConstPath(const std::shared_ptr& node); -} // namespace CLDNNPlugin +} // namespace intel_gpu +} // namespace runtime +} // namespace ov diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/cldnn_remote_context.h b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_context.hpp similarity index 68% rename from src/plugins/intel_gpu/include/intel_gpu/plugin/cldnn_remote_context.h rename to src/plugins/intel_gpu/include/intel_gpu/plugin/remote_context.hpp index 6e7fd786b10..8f00b37bd97 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/cldnn_remote_context.h +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_context.hpp @@ -6,12 +6,13 @@ #include "intel_gpu/runtime/memory.hpp" #include "intel_gpu/runtime/engine.hpp" +#include "intel_gpu/plugin/device_config.hpp" +#include "intel_gpu/plugin/common_utils.hpp" + #include #include #include #include -#include "cldnn_config.h" -#include "cldnn_common_utils.h" #ifndef NOMINMAX # define NOMINMAX @@ -28,11 +29,13 @@ #include #include -namespace CLDNNPlugin { -class CLDNNRemoteAllocator; +namespace ov { +namespace runtime { +namespace intel_gpu { +class RemoteAllocator; -class CLDNNRemoteBlobImpl : public InferenceEngine::gpu::details::param_map_obj_getter { - friend class CLDNNRemoteAllocator; +class RemoteBlobImpl : public InferenceEngine::gpu::details::param_map_obj_getter { + friend class RemoteAllocator; public: enum BlobType { BT_EMPTY, @@ -46,13 +49,13 @@ public: BT_DX_BUF_SHARED, }; - explicit CLDNNRemoteBlobImpl(InferenceEngine::gpu::ClContext::Ptr context, - cldnn::stream& stream, - const cldnn::layout& layout, - cldnn::shared_handle mem = nullptr, - cldnn::shared_surface surf = 0, - uint32_t plane = 0, - BlobType mem_type = BT_BUF_INTERNAL); + explicit RemoteBlobImpl(InferenceEngine::gpu::ClContext::Ptr context, + cldnn::stream& stream, + const cldnn::layout& layout, + cldnn::shared_handle mem = nullptr, + cldnn::shared_surface surf = 0, + uint32_t plane = 0, + BlobType mem_type = BT_BUF_INTERNAL); void allocate(); bool deallocate() noexcept; @@ -72,7 +75,7 @@ public: cldnn::memory::ptr getMemory() { return m_memObject; } protected: - static CLDNNRemoteAllocator m_allocator; + static RemoteAllocator m_allocator; std::weak_ptr m_context; cldnn::stream& m_stream; @@ -95,18 +98,18 @@ protected: }; template -class typedCLDNNRemoteBlob : public TpublicAPI { +class TypedRemoteBlob : public TpublicAPI { public: - using Ptr = std::shared_ptr; + using Ptr = std::shared_ptr; - explicit typedCLDNNRemoteBlob(InferenceEngine::gpu::ClContext::Ptr context, - cldnn::stream& stream, - const InferenceEngine::TensorDesc& desc, - const cldnn::layout& layout, - cldnn::shared_handle mem = nullptr, - cldnn::shared_surface surf = 0, - uint32_t plane = 0, - CLDNNRemoteBlobImpl::BlobType mem_type = CLDNNRemoteBlobImpl::BlobType::BT_BUF_INTERNAL) + explicit TypedRemoteBlob(InferenceEngine::gpu::ClContext::Ptr context, + cldnn::stream& stream, + const InferenceEngine::TensorDesc& desc, + const cldnn::layout& layout, + cldnn::shared_handle mem = nullptr, + cldnn::shared_surface surf = 0, + uint32_t plane = 0, + RemoteBlobImpl::BlobType mem_type = RemoteBlobImpl::BlobType::BT_BUF_INTERNAL) : _impl(context, stream, layout, mem, surf, plane, mem_type) , TpublicAPI(desc) {} @@ -124,62 +127,62 @@ public: InferenceEngine::LockedMemory rwmap() noexcept override { return _impl.rwmap(); } InferenceEngine::LockedMemory rmap() const noexcept override { return _impl.rmap(); } InferenceEngine::LockedMemory wmap()noexcept override { return _impl.wmap(); } - CLDNNRemoteBlobImpl* getImpl() { return &_impl; } + RemoteBlobImpl* getImpl() { return &_impl; } protected: const std::shared_ptr &getAllocator() const noexcept override { return _impl.getAllocator(); } void *getHandle() const noexcept override { return _impl.getHandle(); } - CLDNNRemoteBlobImpl _impl; + RemoteBlobImpl _impl; }; -using CLDNNRemoteCLbuffer = typedCLDNNRemoteBlob; -using CLDNNRemoteUSMbuffer = typedCLDNNRemoteBlob; -using CLDNNRemoteCLImage2D = typedCLDNNRemoteBlob; +using RemoteCLbuffer = TypedRemoteBlob; +using RemoteUSMbuffer = TypedRemoteBlob; +using RemoteCLImage2D = TypedRemoteBlob; #ifdef _WIN32 -using CLDNNRemoteD3DBuffer = typedCLDNNRemoteBlob; -using CLDNNRemoteD3DSurface = typedCLDNNRemoteBlob; +using RemoteD3DBuffer = TypedRemoteBlob; +using RemoteD3DSurface = TypedRemoteBlob; #else -using CLDNNRemoteVASurface = typedCLDNNRemoteBlob; +using RemoteVASurface = TypedRemoteBlob; #endif -inline CLDNNRemoteBlobImpl* getBlobImpl(InferenceEngine::gpu::ClBlob* blobPtr) { +inline RemoteBlobImpl* getBlobImpl(InferenceEngine::gpu::ClBlob* blobPtr) { #ifdef _WIN32 { - auto ptr = blobPtr->as(); + auto ptr = blobPtr->as(); if (ptr) return ptr->getImpl(); } { - auto ptr = blobPtr->as(); + auto ptr = blobPtr->as(); if (ptr) return ptr->getImpl(); } #else { - auto ptr = blobPtr->as(); + auto ptr = blobPtr->as(); if (ptr) return ptr->getImpl(); } #endif { - auto ptr = blobPtr->as(); + auto ptr = blobPtr->as(); if (ptr) return ptr->getImpl(); } { - auto ptr = blobPtr->as(); + auto ptr = blobPtr->as(); if (ptr) return ptr->getImpl(); } { - auto ptr = blobPtr->as(); + auto ptr = blobPtr->as(); if (ptr) return ptr->getImpl(); } return nullptr; } -class CLDNNRemoteAllocator : public InferenceEngine::IAllocator { +class RemoteAllocator : public InferenceEngine::IAllocator { protected: - friend class CLDNNRemoteBlobImpl; + friend class RemoteBlobImpl; std::atomic_flag _lock; - std::map m_lockedBlobs; + std::map m_lockedBlobs; - void regLockedBlob(void* handle, const CLDNNRemoteBlobImpl* blob); + void regLockedBlob(void* handle, const RemoteBlobImpl* blob); void acquire_lock() { while (_lock.test_and_set(std::memory_order_acquire)) {} @@ -190,9 +193,9 @@ protected: } public: - using Ptr = std::shared_ptr; + using Ptr = std::shared_ptr; - CLDNNRemoteAllocator() { _lock.clear(std::memory_order_relaxed); } + RemoteAllocator() { _lock.clear(std::memory_order_relaxed); } /** * @brief Maps handle to heap memory accessible by any memory manipulation routines. * @return Generic pointer to memory @@ -269,19 +272,19 @@ public: }; -class CLDNNExecutionContextImpl : public InferenceEngine::gpu::details::param_map_obj_getter { +class ExecutionContextImpl : public InferenceEngine::gpu::details::param_map_obj_getter { public: enum ContextType { OCL, DEV_SHARED }; - using Ptr = std::shared_ptr; - using CPtr = std::shared_ptr; + using Ptr = std::shared_ptr; + using CPtr = std::shared_ptr; - explicit CLDNNExecutionContextImpl(std::shared_ptr plugin, - const InferenceEngine::ParamMap& params, - const Config& config = {}); + explicit ExecutionContextImpl(std::shared_ptr plugin, + const InferenceEngine::ParamMap& params, + const Config& config = {}); InferenceEngine::ParamMap getParams() const; std::string getDeviceName() const noexcept; @@ -313,7 +316,7 @@ protected: }; template -class typedCLDNNExecutionContext : public TpublicContextAPI { +class TypedExecutionContext : public TpublicContextAPI { template struct _Key { T1 _surf; @@ -357,17 +360,17 @@ class typedCLDNNExecutionContext : public TpublicContextAPI { // unlickily, not found - create new and insert into registry cldnn::layout layout(DataTypeFromPrecision(tensorDesc.getPrecision()), ImageFormatFromLayout(tensorDesc.getLayout()), - CldnnTensorFromIEDims(tensorDesc.getDims())); + tensor_from_dims(tensorDesc.getDims())); auto smart_this = std::dynamic_pointer_cast(this->shared_from_this()); #ifdef _WIN32 - ret = std::make_shared(smart_this, stream, + ret = std::make_shared(smart_this, stream, tensorDesc, layout, mem, 0, plane, - CLDNNRemoteBlobImpl::BlobType::BT_SURF_SHARED); + RemoteBlobImpl::BlobType::BT_SURF_SHARED); #else - ret = std::make_shared(smart_this, stream, + ret = std::make_shared(smart_this, stream, tensorDesc, layout, nullptr, surf, plane, - CLDNNRemoteBlobImpl::BlobType::BT_SURF_SHARED); + RemoteBlobImpl::BlobType::BT_SURF_SHARED); #endif shared_surf_reg[skey] = ret; } @@ -378,7 +381,7 @@ class typedCLDNNExecutionContext : public TpublicContextAPI { InferenceEngine::RemoteBlob::Ptr reuse_obj(const InferenceEngine::TensorDesc& tensorDesc, cldnn::shared_handle mem, - CLDNNRemoteBlobImpl::BlobType blob_type) { + RemoteBlobImpl::BlobType blob_type) { InferenceEngine::RemoteBlob::Ptr ret = nullptr; _impl.acquire_lock(); @@ -392,24 +395,24 @@ class typedCLDNNExecutionContext : public TpublicContextAPI { // unlickily, not found - create new and insert into registry cldnn::layout layout(DataTypeFromPrecision(tensorDesc.getPrecision()), FormatFromLayout(tensorDesc.getLayout()), - CldnnTensorFromIEDims(tensorDesc.getDims())); + tensor_from_dims(tensorDesc.getDims())); auto smart_this = std::dynamic_pointer_cast(this->shared_from_this()); switch (blob_type) { - case CLDNNRemoteBlobImpl::BlobType::BT_BUF_SHARED: - ret = std::make_shared(smart_this, stream, tensorDesc, layout, mem, 0, 0, blob_type); + case RemoteBlobImpl::BlobType::BT_BUF_SHARED: + ret = std::make_shared(smart_this, stream, tensorDesc, layout, mem, 0, 0, blob_type); break; - case CLDNNRemoteBlobImpl::BlobType::BT_USM_SHARED: - ret = std::make_shared(smart_this, stream, tensorDesc, layout, mem, 0, 0, blob_type); + case RemoteBlobImpl::BlobType::BT_USM_SHARED: + ret = std::make_shared(smart_this, stream, tensorDesc, layout, mem, 0, 0, blob_type); break; - case CLDNNRemoteBlobImpl::BlobType::BT_IMG_SHARED: + case RemoteBlobImpl::BlobType::BT_IMG_SHARED: layout.format = ImageFormatFromLayout(tensorDesc.getLayout()); - ret = std::make_shared(smart_this, stream, tensorDesc, layout, mem, 0, 0, blob_type); + ret = std::make_shared(smart_this, stream, tensorDesc, layout, mem, 0, 0, blob_type); break; #ifdef _WIN32 - case CLDNNRemoteBlobImpl::BlobType::BT_DX_BUF_SHARED: - ret = std::make_shared(smart_this, stream, tensorDesc, layout, mem, 0, 0, blob_type); + case RemoteBlobImpl::BlobType::BT_DX_BUF_SHARED: + ret = std::make_shared(smart_this, stream, tensorDesc, layout, mem, 0, 0, blob_type); break; #endif default: @@ -425,44 +428,44 @@ class typedCLDNNExecutionContext : public TpublicContextAPI { InferenceEngine::RemoteBlob::Ptr create_buffer(const InferenceEngine::TensorDesc& tensorDesc) { cldnn::layout layout(DataTypeFromPrecision(tensorDesc.getPrecision()), FormatFromLayout(tensorDesc.getLayout()), - CldnnTensorFromIEDims(tensorDesc.getDims())); + tensor_from_dims(tensorDesc.getDims())); auto smart_this = std::dynamic_pointer_cast(this->shared_from_this()); auto& stream = _impl.GetEngine()->get_program_stream(); - return std::make_shared(smart_this, - stream, - tensorDesc, - layout, - nullptr, 0, 0, - CLDNNRemoteBlobImpl::BlobType::BT_BUF_INTERNAL); + return std::make_shared(smart_this, + stream, + tensorDesc, + layout, + nullptr, 0, 0, + RemoteBlobImpl::BlobType::BT_BUF_INTERNAL); } - InferenceEngine::RemoteBlob::Ptr create_usm(const InferenceEngine::TensorDesc& tensorDesc, CLDNNRemoteBlobImpl::BlobType alloc_type) { + InferenceEngine::RemoteBlob::Ptr create_usm(const InferenceEngine::TensorDesc& tensorDesc, RemoteBlobImpl::BlobType alloc_type) { cldnn::layout layout(DataTypeFromPrecision(tensorDesc.getPrecision()), FormatFromLayout(tensorDesc.getLayout()), - CldnnTensorFromIEDims(tensorDesc.getDims())); + tensor_from_dims(tensorDesc.getDims())); auto smart_this = std::dynamic_pointer_cast(this->shared_from_this()); auto& stream = _impl.GetEngine()->get_program_stream(); - return std::make_shared(smart_this, - stream, - tensorDesc, - layout, - nullptr, 0, 0, - alloc_type); + return std::make_shared(smart_this, + stream, + tensorDesc, + layout, + nullptr, 0, 0, + alloc_type); } void check_if_shared() { - if (GetType() != CLDNNExecutionContextImpl::ContextType::DEV_SHARED) + if (GetType() != ExecutionContextImpl::ContextType::DEV_SHARED) IE_THROW() << "Shared context is required to to share this type of memory"; } public: - using Ptr = std::shared_ptr; - using CPtr = std::shared_ptr; + using Ptr = std::shared_ptr; + using CPtr = std::shared_ptr; - explicit typedCLDNNExecutionContext(std::shared_ptr plugin, - const InferenceEngine::ParamMap& params, - const Config& config = {}) + explicit TypedExecutionContext(std::shared_ptr plugin, + const InferenceEngine::ParamMap& params, + const Config& config = {}) : _impl(plugin, params, config) {} InferenceEngine::ParamMap getParams() const override { return _impl.getParams(); } @@ -479,7 +482,7 @@ public: using namespace InferenceEngine; using InferenceEngine::gpu::details::param_map_obj_getter; if (params.empty()) { - // user wants clDNN to allocate blob by itself and return handle + // user wants plugin to allocate blob by itself and return handle return create_buffer(tensorDesc); } else { // user will supply shared object handle @@ -497,25 +500,25 @@ public: check_if_shared(); return reuse_surf(tensorDesc, params); } else if (GPU_PARAM_VALUE(USM_HOST_BUFFER) == memTypeStr) { - return create_usm(tensorDesc, CLDNNRemoteBlobImpl::BlobType::BT_USM_HOST_INTERNAL); + return create_usm(tensorDesc, RemoteBlobImpl::BlobType::BT_USM_HOST_INTERNAL); } else if (GPU_PARAM_VALUE(USM_DEVICE_BUFFER) == memTypeStr) { - return create_usm(tensorDesc, CLDNNRemoteBlobImpl::BlobType::BT_USM_DEVICE_INTERNAL); + return create_usm(tensorDesc, RemoteBlobImpl::BlobType::BT_USM_DEVICE_INTERNAL); } else { - CLDNNRemoteBlobImpl::BlobType blob_type; + RemoteBlobImpl::BlobType blob_type; cldnn::shared_handle mem = nullptr; if (GPU_PARAM_VALUE(OCL_BUFFER) == memTypeStr) { - blob_type = CLDNNRemoteBlobImpl::BlobType::BT_BUF_SHARED; + blob_type = RemoteBlobImpl::BlobType::BT_BUF_SHARED; mem = param_map_obj_getter::_ObjFromParamSimple(params, GPU_PARAM_KEY(MEM_HANDLE)); } else if (GPU_PARAM_VALUE(USM_USER_BUFFER) == memTypeStr) { - blob_type = CLDNNRemoteBlobImpl::BlobType::BT_USM_SHARED; + blob_type = RemoteBlobImpl::BlobType::BT_USM_SHARED; mem = param_map_obj_getter::_ObjFromParamSimple(params, GPU_PARAM_KEY(MEM_HANDLE)); } else if (GPU_PARAM_VALUE(OCL_IMAGE2D) == memTypeStr) { - blob_type = CLDNNRemoteBlobImpl::BlobType::BT_IMG_SHARED; + blob_type = RemoteBlobImpl::BlobType::BT_IMG_SHARED; mem = param_map_obj_getter::_ObjFromParamSimple(params, GPU_PARAM_KEY(MEM_HANDLE)); #ifdef _WIN32 } else if (GPU_PARAM_VALUE(DX_BUFFER) == memTypeStr) { - blob_type = CLDNNRemoteBlobImpl::BlobType::BT_DX_BUF_SHARED; + blob_type = RemoteBlobImpl::BlobType::BT_DX_BUF_SHARED; mem = param_map_obj_getter::_ObjFromParamSimple(params, GPU_PARAM_KEY(DEV_OBJECT_HANDLE)); check_if_shared(); #endif @@ -529,38 +532,40 @@ public: } Config& GetConfig() { return _impl.GetConfig(); } - CLDNNExecutionContextImpl::ContextType GetType() const { return _impl.GetType(); } + ExecutionContextImpl::ContextType GetType() const { return _impl.GetType(); } - CLDNNExecutionContextImpl* getImpl() { return &_impl; } + ExecutionContextImpl* getImpl() { return &_impl; } protected: - CLDNNExecutionContextImpl _impl; + ExecutionContextImpl _impl; }; -using CLDNNRemoteCLContext = typedCLDNNExecutionContext; +using RemoteCLContext = TypedExecutionContext; #ifdef _WIN32 -using CLDNNRemoteD3DContext = typedCLDNNExecutionContext; +using RemoteD3DContext = TypedExecutionContext; #else -using CLDNNRemoteVAContext = typedCLDNNExecutionContext; +using RemoteVAContext = TypedExecutionContext; #endif -inline CLDNNExecutionContextImpl* getContextImpl(InferenceEngine::gpu::ClContext::Ptr ctxPtr) { +inline ExecutionContextImpl* getContextImpl(InferenceEngine::gpu::ClContext::Ptr ctxPtr) { #ifdef _WIN32 { - auto ptr = ctxPtr->as(); + auto ptr = ctxPtr->as(); if (ptr) return ptr->getImpl(); } #else { - auto ptr = ctxPtr->as(); + auto ptr = ctxPtr->as(); if (ptr) return ptr->getImpl(); } #endif { - auto ptr = ctxPtr->as(); + auto ptr = ctxPtr->as(); if (ptr) return ptr->getImpl(); } return nullptr; } -} // namespace CLDNNPlugin +} // namespace intel_gpu +} // namespace runtime +} // namespace ov diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/simple_math.h b/src/plugins/intel_gpu/include/intel_gpu/plugin/simple_math.hpp similarity index 100% rename from src/plugins/intel_gpu/include/intel_gpu/plugin/simple_math.h rename to src/plugins/intel_gpu/include/intel_gpu/plugin/simple_math.hpp diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/cldnn_transformations_pipeline.h b/src/plugins/intel_gpu/include/intel_gpu/plugin/transformations_pipeline.hpp similarity index 72% rename from src/plugins/intel_gpu/include/intel_gpu/plugin/cldnn_transformations_pipeline.h rename to src/plugins/intel_gpu/include/intel_gpu/plugin/transformations_pipeline.hpp index 2638af7add1..2fe7bc0b29d 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/cldnn_transformations_pipeline.h +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/transformations_pipeline.hpp @@ -8,9 +8,11 @@ #include -#include "cldnn_config.h" +#include "intel_gpu/plugin/device_config.hpp" -namespace CLDNNPlugin { +namespace ov { +namespace runtime { +namespace intel_gpu { class TransformationsPipeline { public: @@ -23,4 +25,6 @@ private: cldnn::device_info device_info; }; -} // namespace CLDNNPlugin +} // namespace intel_gpu +} // namespace runtime +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/cldnn_async_infer_request.cpp b/src/plugins/intel_gpu/src/plugin/async_infer_request.cpp similarity index 57% rename from src/plugins/intel_gpu/src/plugin/cldnn_async_infer_request.cpp rename to src/plugins/intel_gpu/src/plugin/async_infer_request.cpp index c6c5faf11c4..fc2031de48b 100644 --- a/src/plugins/intel_gpu/src/plugin/cldnn_async_infer_request.cpp +++ b/src/plugins/intel_gpu/src/plugin/async_infer_request.cpp @@ -2,21 +2,25 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "cldnn_async_infer_request.h" -#include "cldnn_itt.h" +#include "intel_gpu/plugin/async_infer_request.hpp" +#include "intel_gpu/plugin/itt.hpp" #include -CLDNNPlugin::CLDNNAsyncInferRequest::CLDNNAsyncInferRequest(const CLDNNInferRequest::Ptr &inferRequest, - const InferenceEngine::ITaskExecutor::Ptr& taskExecutor, - const InferenceEngine::ITaskExecutor::Ptr& waitExecutor, - const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor) +namespace ov { +namespace runtime { +namespace intel_gpu { + +AsyncInferRequest::AsyncInferRequest(const InferRequest::Ptr &inferRequest, + const InferenceEngine::ITaskExecutor::Ptr& taskExecutor, + const InferenceEngine::ITaskExecutor::Ptr& waitExecutor, + const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor) : AsyncInferRequestThreadSafeDefault(inferRequest, taskExecutor, callbackExecutor), _inferRequest(inferRequest), _waitExecutor(waitExecutor) { _pipeline = {}; if (!_inferRequest->use_external_queue()) { _pipeline.push_back({taskExecutor, [this] { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNAsyncInferRequest::PreprocessingAndStartPipeline"); + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "AsyncInferRequest::PreprocessingAndStartPipeline"); _inferRequest->setup_stream_graph(); _inferRequest->preprocess(); _inferRequest->enqueue(); @@ -25,13 +29,13 @@ CLDNNPlugin::CLDNNAsyncInferRequest::CLDNNAsyncInferRequest(const CLDNNInferRequ } else { _pipeline.push_back({ _waitExecutor, [this] { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNAsyncInferRequest::WaitPipeline"); + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "AsyncInferRequest::WaitPipeline"); _inferRequest->wait_notify(); } }); } } -void CLDNNPlugin::CLDNNAsyncInferRequest::Infer_ThreadUnsafe() { +void AsyncInferRequest::Infer_ThreadUnsafe() { if (_inferRequest->use_external_queue()) { _inferRequest->setup_stream_graph(); _inferRequest->preprocess_notify(); @@ -40,7 +44,7 @@ void CLDNNPlugin::CLDNNAsyncInferRequest::Infer_ThreadUnsafe() { Parent::Infer_ThreadUnsafe(); } -void CLDNNPlugin::CLDNNAsyncInferRequest::StartAsync_ThreadUnsafe() { +void AsyncInferRequest::StartAsync_ThreadUnsafe() { if (_inferRequest->use_external_queue()) { _inferRequest->setup_stream_graph(); _inferRequest->preprocess_notify(); @@ -49,6 +53,10 @@ void CLDNNPlugin::CLDNNAsyncInferRequest::StartAsync_ThreadUnsafe() { Parent::StartAsync_ThreadUnsafe(); } -CLDNNPlugin::CLDNNAsyncInferRequest::~CLDNNAsyncInferRequest() { +AsyncInferRequest::~AsyncInferRequest() { StopAndWait(); } + +} // namespace intel_gpu +} // namespace runtime +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/cldnn_executable_network.cpp b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp similarity index 63% rename from src/plugins/intel_gpu/src/plugin/cldnn_executable_network.cpp rename to src/plugins/intel_gpu/src/plugin/compiled_model.cpp index 8c1eeef3e71..4b8c032a917 100644 --- a/src/plugins/intel_gpu/src/plugin/cldnn_executable_network.cpp +++ b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp @@ -2,48 +2,44 @@ // SPDX-License-Identifier: Apache-2.0 // - #include "ie_metric_helpers.hpp" -#include -#include -#include - -#include "ie_metric_helpers.hpp" -#include -#include -#include -#include "cldnn_graph.h" -#include "cldnn_itt.h" +#include "intel_gpu/plugin/graph.hpp" +#include "intel_gpu/plugin/itt.hpp" +#include "intel_gpu/plugin/infer_request.hpp" +#include "intel_gpu/plugin/compiled_model.hpp" +#include "intel_gpu/plugin/async_infer_request.hpp" #include -#include "cldnn_infer_request.h" #include -#include "cldnn_async_infer_request.h" -#include -#include -#include - -#include "cldnn_executable_network.h" #include "threading/ie_cpu_streams_executor.hpp" #include "cpp_interfaces/interface/ie_iinfer_request_internal.hpp" #include "ie_icore.hpp" +#include +#include +#include +#include +#include +#include + using namespace InferenceEngine; using namespace InferenceEngine::details; -namespace CLDNNPlugin { +namespace ov { +namespace runtime { +namespace intel_gpu { -CLDNNExecNetwork::CLDNNExecNetwork(InferenceEngine::CNNNetwork &network, std::shared_ptr context, Config config) : - InferenceEngine::ExecutableNetworkThreadSafeDefault{[&]()->InferenceEngine::ITaskExecutor::Ptr { +CompiledModel::CompiledModel(InferenceEngine::CNNNetwork &network, std::shared_ptr context, Config config) : + InferenceEngine::ExecutableNetworkThreadSafeDefault{[&]() -> InferenceEngine::ITaskExecutor::Ptr { if (config.exclusiveAsyncRequests) { //exclusiveAsyncRequests essentially disables the streams (and hence should be checked first) => aligned with the CPU behavior return ExecutorManager::getInstance()->getExecutor("GPU"); } else if (config.throughput_streams > 1) { return std::make_shared( - IStreamsExecutor::Config{"CLDNNPlugin executor", config.throughput_streams}); + IStreamsExecutor::Config{"Intel GPU plugin executor", config.throughput_streams}); } else { return std::make_shared( - IStreamsExecutor::Config{"CLDNNPlugin executor", 1}); + IStreamsExecutor::Config{"Intel GPU plugin executor", 1}); } }()}, m_config(config), @@ -57,18 +53,18 @@ CLDNNExecNetwork::CLDNNExecNetwork(InferenceEngine::CNNNetwork &network, std::sh m_context = casted_context; - auto graph_base = std::make_shared(network, m_context, m_config, 0); + auto graph_base = std::make_shared(network, m_context, m_config, 0); for (uint16_t n = 0; n < m_config.throughput_streams; n++) { - auto graph = n == 0 ? graph_base : std::make_shared(graph_base, n); + auto graph = n == 0 ? graph_base : std::make_shared(graph_base, n); m_graphs.push_back(graph); } } -IInferRequestInternal::Ptr CLDNNExecNetwork::CreateInferRequestImpl(InputsDataMap networkInputs, - OutputsDataMap networkOutputs) { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNExecNetwork::CreateInferRequestImpl"); - auto ptr = std::make_shared(networkInputs, networkOutputs, - std::static_pointer_cast(shared_from_this())); +IInferRequestInternal::Ptr CompiledModel::CreateInferRequestImpl(InputsDataMap networkInputs, + OutputsDataMap networkOutputs) { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "CompiledModel::CreateInferRequestImpl"); + auto ptr = std::make_shared(networkInputs, networkOutputs, + std::static_pointer_cast(shared_from_this())); if (m_config.throughput_streams > 1) { ptr->EnableStreams(); } @@ -82,11 +78,11 @@ IInferRequestInternal::Ptr CLDNNExecNetwork::CreateInferRequestImpl(InputsDataMa return ptr; } -IInferRequestInternal::Ptr CLDNNExecNetwork::CreateInferRequestImpl(const std::vector>& inputs, - const std::vector>& outputs) { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNExecNetwork::CreateInferRequestImpl"); - auto ptr = std::make_shared(inputs, outputs, - std::static_pointer_cast(shared_from_this())); +IInferRequestInternal::Ptr CompiledModel::CreateInferRequestImpl(const std::vector>& inputs, + const std::vector>& outputs) { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "CompiledModel::CreateInferRequestImpl"); + auto ptr = std::make_shared(inputs, outputs, + std::static_pointer_cast(shared_from_this())); if (m_config.throughput_streams > 1) { ptr->EnableStreams(); } @@ -101,8 +97,8 @@ IInferRequestInternal::Ptr CLDNNExecNetwork::CreateInferRequestImpl(const std::v return ptr; } -IInferRequestInternal::Ptr CLDNNExecNetwork::CreateInferRequest() { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNExecNetwork::CreateInferRequest"); +IInferRequestInternal::Ptr CompiledModel::CreateInferRequest() { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "CompiledModel::CreateInferRequest"); InferenceEngine::IInferRequestInternal::Ptr internalRequest; if (m_graphs.empty()) { IE_THROW(NetworkNotLoaded); @@ -123,20 +119,20 @@ IInferRequestInternal::Ptr CLDNNExecNetwork::CreateInferRequest() { if (!internalRequest) internalRequest = CreateInferRequestImpl(_networkInputs, _networkOutputs); internalRequest->setPointerToExecutableNetworkInternal(shared_from_this()); - return std::make_shared(std::static_pointer_cast(internalRequest), - m_taskExecutor, - m_waitExecutor, - _callbackExecutor); + return std::make_shared(std::static_pointer_cast(internalRequest), + m_taskExecutor, + m_waitExecutor, + _callbackExecutor); } -std::shared_ptr CLDNNExecNetwork::GetExecGraphInfo() { +std::shared_ptr CompiledModel::GetExecGraphInfo() { if (m_graphs.empty()) IE_THROW(NetworkNotLoaded); return m_graphs.front()->GetExecGraphInfo(); } -InferenceEngine::Parameter CLDNNExecNetwork::GetConfig(const std::string &name) const { +InferenceEngine::Parameter CompiledModel::GetConfig(const std::string &name) const { auto it = m_config.key_config_map.find(name); if (it != m_config.key_config_map.end()) { return it->second; @@ -145,7 +141,7 @@ InferenceEngine::Parameter CLDNNExecNetwork::GetConfig(const std::string &name) } } -InferenceEngine::Parameter CLDNNExecNetwork::GetMetric(const std::string &name) const { +InferenceEngine::Parameter CompiledModel::GetMetric(const std::string &name) const { if (name == METRIC_KEY(NETWORK_NAME)) { IE_ASSERT(!m_graphs.empty()); IE_SET_METRIC_RETURN(NETWORK_NAME, m_graphs[0]->getName()); @@ -171,8 +167,10 @@ InferenceEngine::Parameter CLDNNExecNetwork::GetMetric(const std::string &name) } } -std::shared_ptr CLDNNExecNetwork::GetContext() const { +std::shared_ptr CompiledModel::GetContext() const { return m_context; } -}; // namespace CLDNNPlugin +} // namespace intel_gpu +} // namespace runtime +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/cldnn_custom_layer.cpp b/src/plugins/intel_gpu/src/plugin/custom_layer.cpp similarity index 90% rename from src/plugins/intel_gpu/src/plugin/cldnn_custom_layer.cpp rename to src/plugins/intel_gpu/src/plugin/custom_layer.cpp index 82b06eb0286..314bf559a1f 100644 --- a/src/plugins/intel_gpu/src/plugin/cldnn_custom_layer.cpp +++ b/src/plugins/intel_gpu/src/plugin/custom_layer.cpp @@ -2,7 +2,10 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "cldnn_custom_layer.h" +#include "intel_gpu/plugin/custom_layer.hpp" +#include "intel_gpu/plugin/simple_math.hpp" +#include "intel_gpu/plugin/itt.hpp" + #include "xml_parse_utils.h" #include #include @@ -14,9 +17,6 @@ # include #endif -#include "simple_math.h" -#include "cldnn_itt.h" - using namespace InferenceEngine; using namespace XMLParseUtils; @@ -29,9 +29,11 @@ using namespace XMLParseUtils; #define CheckIntAttrAndReturnError(node, attr, value) \ CheckAndReturnError(GetIntAttr(node, attr, -1) != (value), "Wrong attribute value! expected: " << value << " found: " << GetIntAttr(node, attr, -1)) -namespace CLDNNPlugin { +namespace ov { +namespace runtime { +namespace intel_gpu { -void CLDNNCustomLayer::LoadSingleLayer(const pugi::xml_node & node) { +void CustomLayer::LoadSingleLayer(const pugi::xml_node & node) { // Root checks CheckNodeTypeAndReturnError(node, "CustomLayer"); CheckStrAttrAndReturnError(node, "type", "SimpleGPU"); @@ -46,7 +48,7 @@ void CLDNNCustomLayer::LoadSingleLayer(const pugi::xml_node & node) { ProcessWorkSizesNode(node.child("WorkSizes")); } -void CLDNNCustomLayer::ProcessKernelNode(const pugi::xml_node & node) { +void CustomLayer::ProcessKernelNode(const pugi::xml_node & node) { CheckNodeTypeAndReturnError(node, "Kernel"); CheckAndReturnError(m_kernelSource.length() > 0, "Multiple definition of Kernel"); m_kernelEntry = GetStrAttr(node, "entry", ""); @@ -89,7 +91,7 @@ void CLDNNCustomLayer::ProcessKernelNode(const pugi::xml_node & node) { } } -void CLDNNCustomLayer::ProcessBuffersNode(const pugi::xml_node & node) { +void CustomLayer::ProcessBuffersNode(const pugi::xml_node & node) { CheckNodeTypeAndReturnError(node, "Buffers"); FOREACH_CHILD(tensorNode, node, "Tensor") { KerenlParam kp; @@ -120,7 +122,7 @@ void CLDNNCustomLayer::ProcessBuffersNode(const pugi::xml_node & node) { } } -void CLDNNCustomLayer::ProcessCompilerOptionsNode(const pugi::xml_node & node) { +void CustomLayer::ProcessCompilerOptionsNode(const pugi::xml_node & node) { if (node.empty()) { return; // Optional node doesn't exist } @@ -129,7 +131,7 @@ void CLDNNCustomLayer::ProcessCompilerOptionsNode(const pugi::xml_node & node) { m_compilerOptions = GetStrAttr(node, "options", ""); } -void CLDNNCustomLayer::ProcessWorkSizesNode(const pugi::xml_node & node) { +void CustomLayer::ProcessWorkSizesNode(const pugi::xml_node & node) { if (node.empty()) { return; // Optional node doesn't exist } @@ -180,7 +182,7 @@ void CLDNNCustomLayer::ProcessWorkSizesNode(const pugi::xml_node & node) { } } -bool CLDNNCustomLayer::IsLegalSizeRule(const std::string & rule) { +bool CustomLayer::IsLegalSizeRule(const std::string & rule) { SimpleMathExpression expr; expr.SetVariables({ { 'b', 1 }, { 'B', 1 }, @@ -200,7 +202,7 @@ bool CLDNNCustomLayer::IsLegalSizeRule(const std::string & rule) { return true; } -cldnn::format CLDNNCustomLayer::FormatFromString(const std::string & str) { +cldnn::format CustomLayer::FormatFromString(const std::string & str) { static const std::map FormatNameToType = { { "BFYX" , cldnn::format::bfyx }, { "bfyx" , cldnn::format::bfyx }, @@ -224,8 +226,8 @@ cldnn::format CLDNNCustomLayer::FormatFromString(const std::string & str) { return cldnn::format::format_num; } -void CLDNNCustomLayer::LoadFromFile(const std::string configFile, CLDNNCustomLayerMap& customLayers, bool can_be_missed) { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNCustomLayer::LoadFromFile"); +void CustomLayer::LoadFromFile(const std::string configFile, CustomLayerMap& customLayers, bool can_be_missed) { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "CustomLayer::LoadFromFile"); pugi::xml_document xmlDoc; pugi::xml_parse_result res = xmlDoc.load_file(configFile.c_str()); if (res.status != pugi::status_ok) { @@ -267,7 +269,7 @@ void CLDNNCustomLayer::LoadFromFile(const std::string configFile, CLDNNCustomLay } for (auto r = xmlDoc.document_element(); r; r = r.next_sibling()) { - CLDNNCustomLayerPtr layer = std::make_shared(CLDNNCustomLayer(dir_path)); + CustomLayerPtr layer = std::make_shared(CustomLayer(dir_path)); layer->LoadSingleLayer(r); if (layer->Error()) { customLayers.clear(); @@ -278,4 +280,6 @@ void CLDNNCustomLayer::LoadFromFile(const std::string configFile, CLDNNCustomLay } } -}; // namespace CLDNNPlugin +} // namespace intel_gpu +} // namespace runtime +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/cldnn_config.cpp b/src/plugins/intel_gpu/src/plugin/device_config.cpp similarity index 98% rename from src/plugins/intel_gpu/src/plugin/cldnn_config.cpp rename to src/plugins/intel_gpu/src/plugin/device_config.cpp index fd3f88bdc9a..e921aca0dc9 100644 --- a/src/plugins/intel_gpu/src/plugin/cldnn_config.cpp +++ b/src/plugins/intel_gpu/src/plugin/device_config.cpp @@ -6,11 +6,11 @@ #include #include -#include "cldnn_config.h" #include "cpp_interfaces/interface/ie_internal_plugin_config.hpp" #include "ie_api.h" #include "file_utils.h" -#include "cldnn_itt.h" +#include "intel_gpu/plugin/device_config.hpp" +#include "intel_gpu/plugin/itt.hpp" #include #include @@ -25,7 +25,9 @@ using namespace InferenceEngine; -namespace CLDNNPlugin { +namespace ov { +namespace runtime { +namespace intel_gpu { static void createDirectory(std::string _path) { #if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32) @@ -57,7 +59,7 @@ static int getNumberOfCores(const IStreamsExecutor::Config::PreferredCoreType co IE_SUPPRESS_DEPRECATED_START void Config::UpdateFromMap(const std::map& configMap) { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "Config::UpdateFromMap"); + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Config::UpdateFromMap"); for (auto& kvp : configMap) { std::string key = kvp.first; std::string val = kvp.second; @@ -199,7 +201,7 @@ void Config::UpdateFromMap(const std::map& configMap) std::istream_iterator end; std::vector configFiles(begin, end); for (auto& file : configFiles) { - CLDNNCustomLayer::LoadFromFile(file, customLayers); + CustomLayer::LoadFromFile(file, customLayers); } } else if (key.compare(PluginConfigParams::KEY_TUNING_MODE) == 0) { if (val.compare(PluginConfigParams::TUNING_DISABLED) == 0) { @@ -329,7 +331,7 @@ void Config::UpdateFromMap(const std::map& configMap) } void Config::adjustKeyMapValues() { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "Config::AdjustKeyMapValues"); + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Config::AdjustKeyMapValues"); if (useProfiling) key_config_map[PluginConfigParams::KEY_PERF_COUNT] = PluginConfigParams::YES; else @@ -465,4 +467,6 @@ Config& Configs::GetDefaultDeviceConfig() { IE_SUPPRESS_DEPRECATED_END -} // namespace CLDNNPlugin +} // namespace intel_gpu +} // namespace runtime +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/cldnn_graph.cpp b/src/plugins/intel_gpu/src/plugin/graph.cpp similarity index 94% rename from src/plugins/intel_gpu/src/plugin/cldnn_graph.cpp rename to src/plugins/intel_gpu/src/plugin/graph.cpp index 7c64fe6107d..4b67bae7082 100644 --- a/src/plugins/intel_gpu/src/plugin/cldnn_graph.cpp +++ b/src/plugins/intel_gpu/src/plugin/graph.cpp @@ -6,10 +6,11 @@ #include "intel_gpu/runtime/profiling.hpp" #include "intel_gpu/runtime/debug_configuration.hpp" -#include "cldnn_graph.h" -#include "simple_math.h" +#include "intel_gpu/plugin/graph.hpp" +#include "intel_gpu/plugin/simple_math.hpp" #include -#include "cldnn_infer_request.h" +#include "intel_gpu/plugin/infer_request.hpp" +#include "intel_gpu/plugin/itt.hpp" #include #include @@ -33,14 +34,15 @@ #include #include #include -#include "cldnn_itt.h" using namespace InferenceEngine; using namespace InferenceEngine::details; -namespace CLDNNPlugin { +namespace ov { +namespace runtime { +namespace intel_gpu { -CLDNNGraph::CLDNNGraph(InferenceEngine::CNNNetwork& network, gpu::ClContext::Ptr context, Config config, uint16_t stream_id) +Graph::Graph(InferenceEngine::CNNNetwork& network, gpu::ClContext::Ptr context, Config config, uint16_t stream_id) : m_context(context) , m_networkName(network.getName()) , m_config(config) @@ -50,7 +52,7 @@ CLDNNGraph::CLDNNGraph(InferenceEngine::CNNNetwork& network, gpu::ClContext::Ptr Build(); } -CLDNNGraph::CLDNNGraph(std::shared_ptr graph, uint16_t stream_id) +Graph::Graph(std::shared_ptr graph, uint16_t stream_id) : m_context(graph->m_context) , m_program(graph->m_program) , m_networkName(graph->m_networkName) @@ -60,8 +62,8 @@ CLDNNGraph::CLDNNGraph(std::shared_ptr graph, uint16_t stream_id) Build(); } -void CLDNNGraph::UpdateLayersMaps() { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNGraph::UpdateLayersMaps"); +void Graph::UpdateLayersMaps() { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Graph::UpdateLayersMaps"); primitiveIDs = m_program->primitiveIDs; prevPrimitiveIDs = m_program->prevPrimitiveIDs; profilingIDs = m_program->profilingIDs; @@ -69,8 +71,8 @@ void CLDNNGraph::UpdateLayersMaps() { outputDims = m_program->outputDims; } -void CLDNNGraph::Build() { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNGraph::Build"); +void Graph::Build() { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Graph::Build"); UpdateLayersMaps(); if (GetMaxDynamicBatchSize() > 1) { @@ -92,13 +94,13 @@ void CLDNNGraph::Build() { } } -bool CLDNNGraph::use_external_queue() const { +bool Graph::use_external_queue() const { auto impl = getContextImpl(m_context); return impl->GetExternalQueue() != nullptr; } -std::shared_ptr CLDNNGraph::BuildNetwork(std::shared_ptr program) { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNGraph::BuildNetwork"); +std::shared_ptr Graph::BuildNetwork(std::shared_ptr program) { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Graph::BuildNetwork"); std::shared_ptr network = nullptr; auto impl = getContextImpl(m_context); @@ -129,9 +131,9 @@ std::shared_ptr CLDNNGraph::BuildNetwork(std::shared_ptr CLDNNGraph::GetExecGraphInfoByPrimitivesInfo(std::vector& primitives_info, - bool filter_const_primitives) { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNGraph::GetExecGraphInfoByPrimitivesInfo"); +std::shared_ptr Graph::GetExecGraphInfoByPrimitivesInfo(std::vector& primitives_info, + bool filter_const_primitives) { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Graph::GetExecGraphInfoByPrimitivesInfo"); if (m_config.useProfiling) { try { // Update may throw an exception for step-by-step runtime graph dump, @@ -289,7 +291,7 @@ std::shared_ptr CLDNNGraph::GetExecGraphInfoByPrimitivesInfo(s auto desc_from_layout = [&](cldnn::layout layout) -> TensorDesc { Precision precision = data_type_to_precision(layout.data_type); SizeVector dims; - Layout l = Layout::NCHW; + auto l = InferenceEngine::Layout::NCHW; auto size = layout.size; if (layout.format.dimension() == 4) { dims = {static_cast(size.batch[0]), @@ -302,7 +304,7 @@ std::shared_ptr CLDNNGraph::GetExecGraphInfoByPrimitivesInfo(s static_cast(size.spatial[2]), static_cast(size.spatial[1]), static_cast(size.spatial[0])}; - l = Layout::NCDHW; + l = InferenceEngine::Layout::NCDHW; } else if (layout.format.dimension() == 6) { dims = {static_cast(size.batch[0]), static_cast(size.feature[0]), @@ -311,7 +313,7 @@ std::shared_ptr CLDNNGraph::GetExecGraphInfoByPrimitivesInfo(s static_cast(size.spatial[1]), static_cast(size.spatial[0])}; // Should be NC?DHW but there is no such layout yet - l = Layout::BLOCKED; + l = InferenceEngine::Layout::BLOCKED; } TensorDesc dst{precision, dims, l}; return dst; @@ -465,14 +467,14 @@ std::shared_ptr CLDNNGraph::GetExecGraphInfoByPrimitivesInfo(s return std::make_shared(results, params, "runtime_gpu_graph"); } -std::shared_ptr CLDNNGraph::GetExecGraphInfo() { +std::shared_ptr Graph::GetExecGraphInfo() { auto primitives_info = GetNetwork()->get_primitives_info(); return GetExecGraphInfoByPrimitivesInfo(primitives_info, true); } -void CLDNNGraph::UpdatePerfStatistics() { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNGraph::UpdatePerfStatistics"); +void Graph::UpdatePerfStatistics() { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Graph::UpdatePerfStatistics"); if (GetNetworksCount() == 0) { return; } @@ -539,12 +541,12 @@ void CLDNNGraph::UpdatePerfStatistics() { } } -bool CLDNNGraph::IsLoaded() const { +bool Graph::IsLoaded() const { return GetNetwork() != nullptr; } -std::map CLDNNGraph::GetPerformanceCounts() const { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNGraph::GetPerformanceCounts"); +std::map Graph::GetPerformanceCounts() const { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Graph::GetPerformanceCounts"); std::map result; bool combinePrimByIRLayers = false; unsigned i = 0; @@ -624,7 +626,7 @@ std::map CLDNNGraph::G return true; }; - // Step 1. Get all primitives in execution order which was added by clDNNPlugin + // Step 1. Get all primitives in execution order which was added by GPU plugin for (auto& primId : profilingIDs) { getFromProfiling(primId); } @@ -693,7 +695,7 @@ std::map CLDNNGraph::G } } - // Step 3. Checking primitives which has been deleted from execution order but added by clDNNPlugin + // Step 3. Checking primitives which has been deleted from execution order but added by GPU plugin for (auto& primId : profilingIDs) { if (std::find(allIds.begin(), allIds.end(), primId) == allIds.end()) { getFromProfiling(primId); @@ -718,7 +720,7 @@ std::map CLDNNGraph::G return result; } -std::shared_ptr CLDNNGraph::GetNetwork(size_t idx) const { +std::shared_ptr Graph::GetNetwork(size_t idx) const { if (idx >= GetNetworksCount()) IE_THROW() << "Unable to find network with id=" << idx << ". Stored networks count: " << GetNetworksCount(); @@ -726,7 +728,7 @@ std::shared_ptr CLDNNGraph::GetNetwork(size_t idx) const { } -std::string CLDNNGraph::MapOutputName(std::string outName) const { +std::string Graph::MapOutputName(std::string outName) const { auto networkOutputsIDs = GetNetwork()->get_output_ids(); auto allPrimitiveIds = GetNetwork()->get_all_primitives(); @@ -751,7 +753,7 @@ std::string CLDNNGraph::MapOutputName(std::string outName) const { return outputID; } -InferenceEngine::SizeVector CLDNNGraph::GetOutputSize(std::string outName) const { +InferenceEngine::SizeVector Graph::GetOutputSize(std::string outName) const { auto res_output = outputDims.find(outName); InferenceEngine::SizeVector sz; @@ -763,4 +765,6 @@ InferenceEngine::SizeVector CLDNNGraph::GetOutputSize(std::string outName) const return sz; } -}; // namespace CLDNNPlugin +} // namespace intel_gpu +} // namespace runtime +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/cldnn_infer_request.cpp b/src/plugins/intel_gpu/src/plugin/infer_request.cpp similarity index 88% rename from src/plugins/intel_gpu/src/plugin/cldnn_infer_request.cpp rename to src/plugins/intel_gpu/src/plugin/infer_request.cpp index eb0900d9b66..cf5fa11a1ee 100644 --- a/src/plugins/intel_gpu/src/plugin/cldnn_infer_request.cpp +++ b/src/plugins/intel_gpu/src/plugin/infer_request.cpp @@ -8,10 +8,10 @@ #include #include #include -#include "cldnn_infer_request.h" -#include "cldnn_remote_context.h" -#include "cldnn_executable_network.h" -#include "cldnn_itt.h" +#include "intel_gpu/plugin/infer_request.hpp" +#include "intel_gpu/plugin/remote_context.hpp" +#include "intel_gpu/plugin/compiled_model.hpp" +#include "intel_gpu/plugin/itt.hpp" #include "intel_gpu/runtime/debug_configuration.hpp" #include #include @@ -47,7 +47,7 @@ void copyToFloat(float* dst, const InferenceEngine::Blob* src) { } template -void copyResultToOutputBlob(cldnn::memory::ptr src, Blob::Ptr dst, CLDNNPlugin::buf_info* bi, cldnn::stream& stream) { +void copyResultToOutputBlob(cldnn::memory::ptr src, Blob::Ptr dst, ov::runtime::intel_gpu::buf_info* bi, cldnn::stream& stream) { size_t n = (bi == nullptr) ? dst->size() : bi->buf_size; size_t offset = (bi == nullptr) ? 0 : bi->buf_offset; @@ -89,7 +89,7 @@ inline void checkAlloc(const Blob::Ptr& blob, const std::string& err_str) { if (!blob->is()) { not_allocated = (blob->buffer() == nullptr); } else { - not_allocated = !CLDNNPlugin::getBlobImpl(blob->as())->is_allocated(); + not_allocated = !ov::runtime::intel_gpu::getBlobImpl(blob->as())->is_allocated(); } if (not_allocated) { IE_THROW(NotAllocated) << err_str; @@ -173,14 +173,16 @@ bool same_host_mem(cldnn::memory::ptr memPtr, uint8_t* hostPtr) { } // namespace -namespace CLDNNPlugin { +namespace ov { +namespace runtime { +namespace intel_gpu { // ----------------------------------------------------------------------------------------- // // ---------------------------- IE API impl ------------------------------------------------ // // ----------------------------------------------------------------------------------------- // -Blob::Ptr CLDNNInferRequest::GetBlob(const std::string& name) { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::GetBlob"); +Blob::Ptr InferRequest::GetBlob(const std::string& name) { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::GetBlob"); Blob::Ptr data; InputInfo::Ptr foundInput; DataPtr foundOutput; @@ -202,8 +204,8 @@ Blob::Ptr CLDNNInferRequest::GetBlob(const std::string& name) { return data; } -void CLDNNInferRequest::SetBlob(const std::string& name, const Blob::Ptr& data) { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::SetBlob"); +void InferRequest::SetBlob(const std::string& name, const Blob::Ptr& data) { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::SetBlob"); // perform all common checks first if (name.empty()) { @@ -339,8 +341,8 @@ void CLDNNInferRequest::SetBlob(const std::string& name, const Blob::Ptr& data) } } -void CLDNNInferRequest::checkBlobs() { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::checkBlobs"); +void InferRequest::checkBlobs() { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::checkBlobs"); for (auto const &input : _inputs) { InputInfo::Ptr foundInput = nullptr; auto foundInputPair = std::find_if(std::begin(_networkInputs), std::end(_networkInputs), @@ -369,8 +371,8 @@ void CLDNNInferRequest::checkBlobs() { } } -void CLDNNInferRequest::SetGraph(std::shared_ptr graph) { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::SetGraph"); +void InferRequest::SetGraph(std::shared_ptr graph) { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::SetGraph"); m_graph = graph; if (m_graph == nullptr) { @@ -387,8 +389,8 @@ void CLDNNInferRequest::SetGraph(std::shared_ptr graph) } } -void CLDNNInferRequest::SetBatch(int new_batch) { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::SetBatch"); +void InferRequest::SetBatch(int new_batch) { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::SetBatch"); if (m_graph->GetMaxDynamicBatchSize() < 0) IE_THROW() << "Dynamic batch is not enabled."; @@ -456,16 +458,16 @@ void CLDNNInferRequest::SetBatch(int new_batch) { m_curBatch = new_batch; } -CLDNNInferRequest::CLDNNInferRequest(InputsDataMap networkInputs, OutputsDataMap networkOutputs, - const CLDNNExecNetwork::Ptr& execNetwork) +InferRequest::InferRequest(InputsDataMap networkInputs, OutputsDataMap networkOutputs, + const CompiledModel::Ptr& execNetwork) : IInferRequestInternal(networkInputs, networkOutputs) { IE_ASSERT(nullptr != execNetwork); streamExecutor = dynamic_cast(execNetwork->m_taskExecutor.get()); } -CLDNNInferRequest::CLDNNInferRequest(const std::vector>& inputs, +InferRequest::InferRequest(const std::vector>& inputs, const std::vector>& outputs, - const CLDNNExecNetwork::Ptr& execNetwork) + const CompiledModel::Ptr& execNetwork) : IInferRequestInternal(inputs, outputs) { IE_ASSERT(nullptr != execNetwork); streamExecutor = dynamic_cast(execNetwork->m_taskExecutor.get()); @@ -474,17 +476,17 @@ CLDNNInferRequest::CLDNNInferRequest(const std::vectorwait(CLDNNGraph::Stage::PREPROC); +void InferRequest::preprocess_notify() { + m_graph->wait(Graph::Stage::PREPROC); if (m_graph->GetMaxDynamicBatchSize() > 1) { preprocess_dynamic(); } else { execDataPreprocessing(_inputs, true); // "true" stands for serial preprocessing in case of OpenMP } - m_graph->notify(CLDNNGraph::Stage::PREPROC); + m_graph->notify(Graph::Stage::PREPROC); } -void CLDNNInferRequest::preprocess() { +void InferRequest::preprocess() { if (m_graph->GetMaxDynamicBatchSize() > 1) { preprocess_dynamic(); } else { @@ -492,12 +494,12 @@ void CLDNNInferRequest::preprocess() { } } -void CLDNNInferRequest::enqueue_notify() { - m_graph->wait(CLDNNGraph::Stage::EXECUTE); +void InferRequest::enqueue_notify() { + m_graph->wait(Graph::Stage::EXECUTE); enqueue(); } -void CLDNNInferRequest::enqueue() { +void InferRequest::enqueue() { if (m_graph->GetMaxDynamicBatchSize() > 1) { enqueue_dynamic(); return; @@ -546,12 +548,12 @@ void CLDNNInferRequest::enqueue() { internal_outputs = m_graph->GetNetwork()->execute(dependencies); } -void CLDNNInferRequest::wait_notify() { +void InferRequest::wait_notify() { wait(); - m_graph->notify(CLDNNGraph::Stage::EXECUTE); + m_graph->notify(Graph::Stage::EXECUTE); } -void CLDNNInferRequest::wait() { +void InferRequest::wait() { if (m_graph->GetMaxDynamicBatchSize() > 1) { wait_dynamic(); return; @@ -588,12 +590,12 @@ void CLDNNInferRequest::wait() { } } -void CLDNNInferRequest::preprocess_dynamic() { +void InferRequest::preprocess_dynamic() { // execute input pre-processing. execDataPreprocessing(_inputs, true); // "true" stands for serial preprocessing in case of OpenMP } -void CLDNNInferRequest::enqueue_dynamic() { +void InferRequest::enqueue_dynamic() { internal_outputs_dynamic.clear(); auto numNets = m_graph->GetNetworksCount(); internal_outputs_dynamic.resize(numNets); @@ -616,7 +618,7 @@ void CLDNNInferRequest::enqueue_dynamic() { } } -void CLDNNInferRequest::wait_dynamic() { +void InferRequest::wait_dynamic() { if (internal_outputs_dynamic.empty()) { IE_THROW() << "Inference was not started!\n"; } @@ -640,9 +642,9 @@ void CLDNNInferRequest::wait_dynamic() { // ----------------------------------------------------------------------------------------- // // ---------------------------- internal utils --------- ----------------------------------- // // ----------------------------------------------------------------------------------------- // -void CLDNNInferRequest::setup_stream_graph() { +void InferRequest::setup_stream_graph() { int streamID = 0; - auto& streamGraphs = static_cast(_exeNetwork.get())->m_graphs; + auto& streamGraphs = static_cast(_exeNetwork.get())->m_graphs; if (nullptr != streamExecutor) { streamID = streamExecutor->GetStreamId(); int numGraphs = streamGraphs.size(); @@ -651,8 +653,8 @@ void CLDNNInferRequest::setup_stream_graph() { m_graph = streamGraphs[streamID]; } -Blob::Ptr CLDNNInferRequest::create_host_blob(const TensorDesc& desc, uint8_t* mem_ptr) { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::create_host_blob"); +Blob::Ptr InferRequest::create_host_blob(const TensorDesc& desc, uint8_t* mem_ptr) { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::create_host_blob"); const Precision& p = desc.getPrecision(); switch (p) { @@ -706,8 +708,8 @@ Blob::Ptr CLDNNInferRequest::create_host_blob(const TensorDesc& desc, uint8_t* m } } -void CLDNNInferRequest::copy_output_data(cldnn::memory::ptr src, Blob::Ptr dst, buf_info* bi) { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::copy_output_data"); +void InferRequest::copy_output_data(cldnn::memory::ptr src, Blob::Ptr dst, buf_info* bi) { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::copy_output_data"); auto& stream = m_graph->GetNetwork()->get_stream(); switch (dst->getTensorDesc().getPrecision()) { case Precision::FP32: copyResultToOutputBlob(src, dst, bi, stream); break; @@ -720,11 +722,11 @@ void CLDNNInferRequest::copy_output_data(cldnn::memory::ptr src, Blob::Ptr dst, } } -void CLDNNInferRequest::copy_input_data(std::shared_ptr network, +void InferRequest::copy_input_data(std::shared_ptr network, const cldnn::primitive_id &inputName, const cldnn::layout& inputLayout, const Blob &inputBlob, buf_info* bi) { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::copy_input_data"); + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::copy_input_data"); size_t offset = (bi == nullptr) ? 0 : bi->buf_offset; @@ -771,7 +773,7 @@ void CLDNNInferRequest::copy_input_data(std::shared_ptr network, } } -Blob::Ptr CLDNNInferRequest::host_blob_from_device_blob(Blob::Ptr blobPtr) { +Blob::Ptr InferRequest::host_blob_from_device_blob(Blob::Ptr blobPtr) { uint8_t* bufferMem = nullptr; auto clblobPtr = std::dynamic_pointer_cast(blobPtr); if (clblobPtr) { @@ -786,8 +788,8 @@ Blob::Ptr CLDNNInferRequest::host_blob_from_device_blob(Blob::Ptr blobPtr) { return hostBlob; } -void CLDNNInferRequest::allocate_inputs() { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::allocate_inputs"); +void InferRequest::allocate_inputs() { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::allocate_inputs"); auto inputLayouts = m_graph->GetInputLayouts(); // allocate inputs for (auto& ni : _networkInputs) { @@ -823,8 +825,8 @@ void CLDNNInferRequest::allocate_inputs() { } } -void CLDNNInferRequest::allocate_inputs_dynamic() { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::allocate_inputs_dynamic"); +void InferRequest::allocate_inputs_dynamic() { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::allocate_inputs_dynamic"); // allocate inputs for (auto &input : m_graph->GetInputLayouts()) { InputInfo::Ptr ni = _networkInputs.at(input.first); @@ -849,8 +851,8 @@ void CLDNNInferRequest::allocate_inputs_dynamic() { } } -void CLDNNInferRequest::allocate_outputs() { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::allocate_outputs"); +void InferRequest::allocate_outputs() { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::allocate_outputs"); // allocate outputs for (auto& no : _networkOutputs) { std::string outputID = m_graph->MapOutputName(no.first); @@ -868,8 +870,8 @@ void CLDNNInferRequest::allocate_outputs() { } } -void CLDNNInferRequest::allocate_outputs_dynamic() { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::allocate_outputs_dynamic"); +void InferRequest::allocate_outputs_dynamic() { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::allocate_outputs_dynamic"); // allocate outputs for (auto& no : _networkOutputs) { std::string outputID = m_graph->MapOutputName(no.first); @@ -890,8 +892,8 @@ void CLDNNInferRequest::allocate_outputs_dynamic() { } } -void CLDNNInferRequest::InferImpl() { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::InferImpl"); +void InferRequest::InferImpl() { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::InferImpl"); setup_stream_graph(); std::lock_guard lk(m_graph->get_mutex()); preprocess(); @@ -899,8 +901,8 @@ void CLDNNInferRequest::InferImpl() { wait(); } -std::map CLDNNInferRequest::GetPerformanceCounts() const { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::GetPerformanceCounts"); +std::map InferRequest::GetPerformanceCounts() const { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::GetPerformanceCounts"); if (!m_useProfiling) { IE_THROW() << "Performance counters were not enabled"; } else { @@ -908,9 +910,9 @@ std::map CLDNNInferRequest::GetPerforma } } -void CLDNNInferRequest::prepare_input(const cldnn::primitive_id& inputName, Blob::Ptr& inputBlob, +void InferRequest::prepare_input(const cldnn::primitive_id& inputName, Blob::Ptr& inputBlob, std::vector& dependencies) { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::prepare_input"); + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::prepare_input"); auto inputLayoutItr = m_graph->GetInputLayouts().find(inputName); if (inputLayoutItr == m_graph->GetInputLayouts().end()) { IE_THROW() << "Input name mismatch."; @@ -943,7 +945,7 @@ void CLDNNInferRequest::prepare_input(const cldnn::primitive_id& inputName, Blob if (!is_dev_input) { if (prec == Precision::I16 || prec == Precision::U16) { - // clDNN doesn't support I16 input precision, + // GPU plugin doesn't support I16 input precision, // so have to convert input data to fp32 precision cldnn::mem_lock ptr{ inputMem, stream }; if (prec == Precision::I16) { @@ -968,8 +970,8 @@ void CLDNNInferRequest::prepare_input(const cldnn::primitive_id& inputName, Blob } } -void CLDNNInferRequest::prepare_output(const cldnn::primitive_id& outputName, Blob::Ptr& outputBlob) { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::prepare_output"); +void InferRequest::prepare_output(const cldnn::primitive_id& outputName, Blob::Ptr& outputBlob) { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::prepare_output"); Blob::Ptr reqBlob = _deviceOutputs.at(outputName); cldnn::primitive_id internalName = outputsMap[outputName]; auto _nw_ptr = m_graph->GetNetwork(); @@ -985,26 +987,28 @@ void CLDNNInferRequest::prepare_output(const cldnn::primitive_id& outputName, Bl _nw_ptr->set_output_memory(internalName, outputMem); } -InferenceEngine::Blob::Ptr CLDNNInferRequest::create_device_blob(const InferenceEngine::TensorDesc& desc, const cldnn::layout& layout) { +InferenceEngine::Blob::Ptr InferRequest::create_device_blob(const InferenceEngine::TensorDesc& desc, const cldnn::layout& layout) { if (m_graph->GetEngine()->use_unified_shared_memory()) { - auto blobPtr = std::make_shared(m_graph->GetContext(), - m_graph->GetNetwork()->get_stream(), - desc, - layout, - nullptr, - 0, - 0, - CLDNNRemoteBlobImpl::BlobType::BT_USM_HOST_INTERNAL); + auto blobPtr = std::make_shared(m_graph->GetContext(), + m_graph->GetNetwork()->get_stream(), + desc, + layout, + nullptr, + 0, + 0, + RemoteBlobImpl::BlobType::BT_USM_HOST_INTERNAL); getBlobImpl(blobPtr.get())->allocate(); return blobPtr; } else { - auto blobPtr = std::make_shared(m_graph->GetContext(), - m_graph->GetNetwork()->get_stream(), - desc, - layout); + auto blobPtr = std::make_shared(m_graph->GetContext(), + m_graph->GetNetwork()->get_stream(), + desc, + layout); getBlobImpl(blobPtr.get())->allocate(); return blobPtr; } } -} // namespace CLDNNPlugin +} // namespace intel_gpu +} // namespace runtime +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/ops/batch_to_space.cpp b/src/plugins/intel_gpu/src/plugin/ops/batch_to_space.cpp index 83a5b3f6903..915236f59dc 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/batch_to_space.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/batch_to_space.cpp @@ -2,15 +2,17 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "cldnn_program.h" -#include "cldnn_common_utils.h" +#include "intel_gpu/plugin/program.hpp" +#include "intel_gpu/plugin/common_utils.hpp" #include "ngraph/op/batch_to_space.hpp" #include "ngraph/op/constant.hpp" #include "intel_gpu/primitives/batch_to_space.hpp" -namespace CLDNNPlugin { +namespace ov { +namespace runtime { +namespace intel_gpu { static void CreateBatchToSpaceOp(Program& p, const std::shared_ptr& op) { p.ValidateInputs(op, {4}); @@ -35,7 +37,7 @@ static void CreateBatchToSpaceOp(Program& p, const std::shared_ptrget_output_shape(0)); + auto out_size = tensor_from_dims(op->get_output_shape(0)); auto batchToSpacePrim = cldnn::batch_to_space(layerName, inputPrimitives[0], // input @@ -51,4 +53,6 @@ static void CreateBatchToSpaceOp(Program& p, const std::shared_ptr& op, const ngraph::AxisSet axis_mapping) { auto inputPrimitives = p.GetInputPrimitiveIDs(op); @@ -69,7 +71,7 @@ static void CreateCommonBroadcastOp(Program& p, const std::shared_ptrget_friendly_name()); p.AddPrimitive(reshapePrim); @@ -80,7 +82,7 @@ static void CreateCommonBroadcastOp(Program& p, const std::shared_ptrget_output_shape(0)), + tensor_from_dims(op->get_output_shape(0)), {}, op->get_friendly_name()); @@ -119,4 +121,6 @@ static void CreateBroadcastOp(Program& p, const std::shared_ptr= 0 ? axis : axis + static_cast(rank); if (cldnn_axis >= rank) IE_THROW() << "Concatenation axis exceeds number of dimensions"; - // Difference in dimension ordering between IE and clDNN, + // Difference in dimension ordering between IE and GPU plugin, // reverse spatial dimensions after batch and feature. if (cldnn_axis >= 2) { auto spatial_axis = cldnn_axis - 2; @@ -54,4 +56,6 @@ static void CreateConcatOp(Program& p, const std::shared_ptr& op) { p.ValidateInputs(op, {2}); @@ -52,4 +54,6 @@ static void CreateConvertOp(Program& p, const std::shared_ptrget_friendly_name()); @@ -111,7 +113,7 @@ static void CreateConvolutionOp(Program& p, const std::shared_ptrget_friendly_name()); @@ -168,7 +170,7 @@ static void CreateConvolutionBackpropDataOp(Program& p, const std::shared_ptrget_output_tensor(0).get_shape()), + tensor_from_dims(op->get_output_tensor(0).get_shape()), weights_have_group_dim, op->get_friendly_name()); @@ -225,7 +227,7 @@ static void CreateGroupConvolutionBackpropDataOp(Program& p, const std::shared_p params.groups, params.stride, params.padding, - CldnnTensorFromIEDims(op->get_output_tensor(0).get_shape()), + tensor_from_dims(op->get_output_tensor(0).get_shape()), weights_have_group_dim, op->get_friendly_name()); @@ -272,7 +274,7 @@ static void DeformableConvolutionImpl(Program& p, params.stride, params.padding, params.dilation, - CldnnTensorFromIEDims(outDims), + tensor_from_dims(outDims), kernel, bilinearInterpolationPad, op->get_friendly_name()); @@ -283,7 +285,7 @@ static void DeformableConvolutionImpl(Program& p, weights, {}, params.groups, - CldnnTensorFromIEDims(outDims), + tensor_from_dims(outDims), op->get_friendly_name()); p.AddPrimitive(defConvPrim); p.AddPrimitiveToProfiler(defConvLayerNameConv, op); @@ -297,7 +299,7 @@ static void DeformableConvolutionImpl(Program& p, params.stride, params.padding, params.dilation, - CldnnTensorFromIEDims(outDims), + tensor_from_dims(outDims), bilinearInterpolationPad, op->get_friendly_name()); @@ -334,7 +336,7 @@ static void CreateBinaryConvolutionOp(Program& p, const std::shared_ptrget_pad_value(), calc_precision, @@ -352,4 +354,6 @@ REGISTER_FACTORY_IMPL(v1, DeformableConvolution); REGISTER_FACTORY_IMPL(v8, DeformableConvolution); REGISTER_FACTORY_IMPL(v1, BinaryConvolution); -} // namespace CLDNNPlugin +} // namespace intel_gpu +} // namespace runtime +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/ops/ctc_greedy_decoder.cpp b/src/plugins/intel_gpu/src/plugin/ops/ctc_greedy_decoder.cpp index 8f07a157d3e..740112971de 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/ctc_greedy_decoder.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/ctc_greedy_decoder.cpp @@ -2,8 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "cldnn_program.h" -#include "cldnn_common_utils.h" +#include "intel_gpu/plugin/program.hpp" +#include "intel_gpu/plugin/common_utils.hpp" #include "ngraph/op/ctc_greedy_decoder.hpp" #include "ngraph/op/ctc_greedy_decoder_seq_len.hpp" @@ -15,7 +15,9 @@ #include "transformations/utils/utils.hpp" -namespace CLDNNPlugin { +namespace ov { +namespace runtime { +namespace intel_gpu { static void CreateCommonCTCGreedyDecoderOp(Program& p, const std::shared_ptr& op, bool ctc_merge_repeated) { p.ValidateInputs(op, {2, 3}); @@ -27,7 +29,7 @@ static void CreateCommonCTCGreedyDecoderOp(Program& p, const std::shared_ptrget_input_element_type(portIndex)); if (inputDataType == cldnn::data_types::i64) { - // clDNN primitive supports only i32 data type for 'sequence_length' and 'blank_index' inputs + // GPU primitive supports only i32 data type for 'sequence_length' and 'blank_index' inputs // so we need additional reorder if it's provided as i64 auto reorderPrimName = inputPrimitives[portIndex] + "_" + op->get_friendly_name() + Program::m_preProcessTag; auto targetFormat = DefaultFormatForDims(op->get_input_shape(portIndex).size()); @@ -72,7 +74,7 @@ static void CreateCommonCTCGreedyDecoderOp(Program& p, const std::shared_ptrget_output_shape(1).size()), - CldnnTensorFromIEDims(op->get_output_shape(1))); + tensor_from_dims(op->get_output_shape(1))); GPU_DEBUG_GET_INSTANCE(debug_config); GPU_DEBUG_IF(debug_config->verbose >= 2) { @@ -95,10 +97,10 @@ static void CreateCommonCTCGreedyDecoderOp(Program& p, const std::shared_ptrget_output_shape(0)), + tensor_from_dims(op->get_output_shape(0)), op->get_friendly_name()); - // clDNN primitive supports only i32 as output data type + // GPU primitive supports only i32 as output data type primitive.output_data_type = DataTypeFromPrecision(ngraph::element::i32); if (num_output == 2) { @@ -131,4 +133,6 @@ static void CreateCTCGreedyDecoderSeqLenOp(Program& p, const std::shared_ptr= rank) IE_THROW() << "CumSum axis is not correspond to number of dimensions"; - // Difference in dimension ordering between IE and clDNN, + // Difference in dimension ordering between IE and GPU plugin, // reverse spatial dimensions after batch and feature. uint32_t cldnn_axis = axis; if (axis >= 2) { @@ -72,4 +74,6 @@ static void CreateCumSumOp(Program& p, const std::shared_ptr static inline std::string vecToString(std::vector vec) { @@ -100,7 +102,7 @@ protected: std::map m_values; }; -void CreateCustomOp(Program& p, const std::shared_ptr& op, CLDNNCustomLayerPtr customLayer) { +void CreateCustomOp(Program& p, const std::shared_ptr& op, CustomLayerPtr customLayer) { auto inputPrimitives = p.GetInputPrimitiveIDs(op); std::string layerName = layer_type_name_ID(op); @@ -130,7 +132,7 @@ void CreateCustomOp(Program& p, const std::shared_ptr& op, CLDNNCu cldnn::format outputFormat(cldnn::format::any); for (const auto& param : customLayer->KernelParams()) { switch (param.type) { - case CLDNNCustomLayer::ParamType::Input: { + case CustomLayer::ParamType::Input: { kernelParameters.resize(kernelParameters.size() > size_t(param.paramIndex + 1) ? kernelParameters.size() : size_t(param.paramIndex + 1)); kernelParameters[param.paramIndex].type = cldnn::custom_gpu_primitive::arg_input; kernelParameters[param.paramIndex].index = @@ -159,7 +161,7 @@ void CreateCustomOp(Program& p, const std::shared_ptr& op, CLDNNCu } break; } - case CLDNNCustomLayer::ParamType::Output: { + case CustomLayer::ParamType::Output: { kernelParameters.resize(kernelParameters.size() > size_t(param.paramIndex + 1) ? kernelParameters.size() : size_t(param.paramIndex + 1)); kernelParameters[param.paramIndex].type = cldnn::custom_gpu_primitive::arg_output; kernelParameters[param.paramIndex].index = @@ -255,4 +257,6 @@ void CreateCustomOp(Program& p, const std::shared_ptr& op, CLDNNCu p.primitiveIDs[genericLayerName] = prevLayerName; } -} // namespace CLDNNPlugin +} // namespace intel_gpu +} // namespace runtime +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/ops/depth_to_space.cpp b/src/plugins/intel_gpu/src/plugin/ops/depth_to_space.cpp index ec3ac592ac1..c5beb34f490 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/depth_to_space.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/depth_to_space.cpp @@ -2,14 +2,16 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "cldnn_program.h" -#include "cldnn_common_utils.h" +#include "intel_gpu/plugin/program.hpp" +#include "intel_gpu/plugin/common_utils.hpp" #include "ngraph/op/depth_to_space.hpp" #include "intel_gpu/primitives/depth_to_space.hpp" -namespace CLDNNPlugin { +namespace ov { +namespace runtime { +namespace intel_gpu { static cldnn::depth_to_space_mode GetDepthMode(ngraph::op::v0::DepthToSpace::DepthToSpaceMode mode) { switch (mode) { @@ -42,4 +44,6 @@ static void CreateDepthToSpaceOp(Program& p, const std::shared_ptr CodeNameToType = { @@ -84,4 +86,6 @@ static void CreateDetectionOutputOp(Program& p, const std::shared_ptr& op, cldnn::eltwise_mode mode) { auto inputPrimitives = p.GetInputPrimitiveIDs(op); @@ -65,7 +67,7 @@ void CreateElementwiseOp(Program& p, const std::shared_ptr& op, cl // Extend input dimensions by prepending ones inputShape.insert(inputShape.begin(), outRank - inputRank, 1ul); - auto targetShape = CldnnTensorFromIEDims(inputShape); + auto targetShape = tensor_from_dims(inputShape); auto reshapePrim = cldnn::reshape(reshapeName, inputPrimitives[i], targetShape, op->get_friendly_name()); p.AddPrimitive(reshapePrim); @@ -194,4 +196,6 @@ REGISTER_FACTORY_IMPL(v1, Power); REGISTER_FACTORY_IMPL(v1, FloorMod); REGISTER_FACTORY_IMPL(v1, Mod); -} // namespace CLDNNPlugin +} // namespace intel_gpu +} // namespace runtime +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/ops/embedding_bag.cpp b/src/plugins/intel_gpu/src/plugin/ops/embedding_bag.cpp index f7884822990..30f50ef5d05 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/embedding_bag.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/embedding_bag.cpp @@ -2,8 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "cldnn_program.h" -#include "cldnn_common_utils.h" +#include "intel_gpu/plugin/program.hpp" +#include "intel_gpu/plugin/common_utils.hpp" #include "ngraph/op/embedding_segments_sum.hpp" #include "ngraph/op/embeddingbag_offsets_sum.hpp" @@ -14,7 +14,9 @@ #include "transformations/utils/utils.hpp" -namespace CLDNNPlugin { +namespace ov { +namespace runtime { +namespace intel_gpu { static void CreateEmbeddingBagOffsetsSumOp(Program& p, const std::shared_ptr& op) { p.ValidateInputs(op, {3, 4, 5}); @@ -42,7 +44,7 @@ static void CreateEmbeddingBagOffsetsSumOp(Program& p, const std::shared_ptrget_input_element_type(portIndex)); if (((portIndex == 1) || (portIndex == 2)) && (inputDataType == cldnn::data_types::i64)) { - // clDNN primitive supports only i32 data type for indices inputs, + // GPU primitive supports only i32 data type for indices inputs, // so we need additional reorders if they are provided as i64 auto reorderPrimName = inputPrimitives[portIndex] + "_" + op->get_friendly_name() + Program::m_preProcessTag; auto targetFormat = DefaultFormatForDims(op->get_input_shape(portIndex).size()); @@ -64,7 +66,7 @@ static void CreateEmbeddingBagOffsetsSumOp(Program& p, const std::shared_ptrget_output_shape(0)), + tensor_from_dims(op->get_output_shape(0)), defaultIndex, op->get_friendly_name()); @@ -83,7 +85,7 @@ static void CreateEmbeddingBagPackedSumOp(Program& p, const std::shared_ptrget_input_element_type(portIndex)); if ((portIndex == 1) && (inputDataType == cldnn::data_types::i64)) { - // clDNN primitive supports only i32 data type for indices input, + // GPU primitive supports only i32 data type for indices input, // so we need additional reorder if it's provided as i64 auto reorderPrimName = inputPrimitives[portIndex] + "_" + op->get_friendly_name() + Program::m_preProcessTag; auto targetFormat = DefaultFormatForDims(op->get_input_shape(portIndex).size()); @@ -105,7 +107,7 @@ static void CreateEmbeddingBagPackedSumOp(Program& p, const std::shared_ptrget_output_shape(0)), + tensor_from_dims(op->get_output_shape(0)), -1, op->get_friendly_name()); @@ -142,7 +144,7 @@ static void CreateEmbeddingSegmentsSumOp(Program& p, const std::shared_ptrget_input_element_type(portIndex)); if (((portIndex == 1) || (portIndex == 2)) && (inputDataType == cldnn::data_types::i64)) { - // clDNN primitive supports only i32 data type for indices inputs, + // GPU primitive supports only i32 data type for indices inputs, // so we need additional reorders if they are provided as i64 auto reorderPrimName = inputPrimitives[portIndex] + "_" + op->get_friendly_name() + Program::m_preProcessTag; auto targetFormat = DefaultFormatForDims(op->get_input_shape(portIndex).size()); @@ -164,7 +166,7 @@ static void CreateEmbeddingSegmentsSumOp(Program& p, const std::shared_ptrget_output_shape(0)), + tensor_from_dims(op->get_output_shape(0)), defaultIndex, op->get_friendly_name()); @@ -176,4 +178,6 @@ REGISTER_FACTORY_IMPL(v3, EmbeddingBagOffsetsSum); REGISTER_FACTORY_IMPL(v3, EmbeddingBagPackedSum); REGISTER_FACTORY_IMPL(v3, EmbeddingSegmentsSum); -} // namespace CLDNNPlugin +} // namespace intel_gpu +} // namespace runtime +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/ops/experimental_detectron_roi_feature_extractor.cpp b/src/plugins/intel_gpu/src/plugin/ops/experimental_detectron_roi_feature_extractor.cpp index a4d9b67e10c..c60d87c6590 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/experimental_detectron_roi_feature_extractor.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/experimental_detectron_roi_feature_extractor.cpp @@ -2,15 +2,17 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "cldnn_program.h" -#include "cldnn_common_utils.h" +#include "intel_gpu/plugin/program.hpp" +#include "intel_gpu/plugin/common_utils.hpp" #include "ngraph/op/experimental_detectron_roi_feature.hpp" #include "intel_gpu/primitives/mutable_data.hpp" #include "intel_gpu/primitives/experimental_detectron_roi_feature_extractor.hpp" -namespace CLDNNPlugin { +namespace ov { +namespace runtime { +namespace intel_gpu { static void CreateExperimentalDetectronROIFeatureExtractorOp(Program& p, const std::shared_ptr& op) { auto inputPrimitives = p.GetInputPrimitiveIDs(op); @@ -19,7 +21,7 @@ static void CreateExperimentalDetectronROIFeatureExtractorOp(Program& p, const s cldnn::layout mutableLayout = cldnn::layout( DataTypeFromPrecision(op->get_output_element_type(1)), DefaultFormatForDims(op->get_output_shape(1).size()), - CldnnTensorFromIEDims(op->get_output_shape(1))); + tensor_from_dims(op->get_output_shape(1))); cldnn::memory::ptr shared_memory {p.GetEngine().allocate_memory(mutableLayout)}; @@ -54,4 +56,6 @@ static void CreateExperimentalDetectronROIFeatureExtractorOp(Program& p, const s REGISTER_FACTORY_IMPL(v6, ExperimentalDetectronROIFeatureExtractor); -} // namespace CLDNNPlugin +} // namespace intel_gpu +} // namespace runtime +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/ops/extract_image_patches.cpp b/src/plugins/intel_gpu/src/plugin/ops/extract_image_patches.cpp index 7dcd855b316..74b72db050e 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/extract_image_patches.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/extract_image_patches.cpp @@ -2,14 +2,16 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "cldnn_program.h" -#include "cldnn_common_utils.h" +#include "intel_gpu/plugin/program.hpp" +#include "intel_gpu/plugin/common_utils.hpp" #include "ngraph/op/extractimagepatches.hpp" #include "intel_gpu/primitives/extract_image_patches.hpp" -namespace CLDNNPlugin { +namespace ov { +namespace runtime { +namespace intel_gpu { static inline std::string PadToString(ngraph::op::PadType pad) { switch (pad) { @@ -38,7 +40,7 @@ static void CreateExtractImagePatchesOp(Program& p, const std::shared_ptrget_output_shape(0)), + tensor_from_dims(op->get_output_shape(0)), op->get_friendly_name()); p.AddPrimitive(extractImagePatchesPrim); @@ -47,4 +49,6 @@ static void CreateExtractImagePatchesOp(Program& p, const std::shared_ptr& op) { p.ValidateInputs(op, {5}); @@ -40,4 +42,6 @@ static void CreateFakeQuantizeOp(Program& p, const std::shared_ptr& op) { p.ValidateInputs(op, {4}); @@ -23,7 +25,7 @@ static void CreateGatherTreeOp(Program& p, const std::shared_ptrget_input_element_type(portIndex)); if (inputDataType == cldnn::data_types::i64) { - // clDNN primitive does not support i64 inputs, + // GPU primitive does not support i64 inputs, // so we need additional reorders to convert them to i32 auto reorderPrimName = inputPrimitives[portIndex] + "_" + op->get_friendly_name() + Program::m_preProcessTag; auto targetFormat = DefaultFormatForDims(op->get_input_shape(portIndex).size()); @@ -55,4 +57,6 @@ static void CreateGatherTreeOp(Program& p, const std::shared_ptr& op, const int64_t for (size_t portIndex = 0; portIndex < inputPrimitives.size(); portIndex++) { auto inputDataType = DataTypeFromPrecision(op->get_input_element_type(portIndex)); if (inputDataType == cldnn::data_types::i64) { - // clDNN primitive does not support i64 inputs, + // GPU primitive does not support i64 inputs, // so we need additional reorders to convert them to i32 auto reorderPrimName = inputPrimitives[portIndex] + "_" + op->get_friendly_name() + Program::m_preProcessTag; auto targetFormat = DefaultFormatForDims(op->get_input_shape(portIndex).size()); @@ -95,7 +97,7 @@ void CreateGatherOpBase(Program& p, const std::shared_ptr& op, const int64_t reorderedInputs[1], GetGatherAxis(axis, DefaultFormatForDims(op->get_input_shape(0).size())), outLayout, - CldnnTensorFromIEDims(op->get_output_shape(0)), + tensor_from_dims(op->get_output_shape(0)), batch_dim, support_neg_ind, op->get_friendly_name()); @@ -125,4 +127,6 @@ static void CreateGatherOp(Program& p, const std::shared_ptr= rank) IE_THROW() << "GatherElements axis is not correspond to number of dimensions"; - // Difference in dimension ordering between IE and clDNN, + // Difference in dimension ordering between IE and GPU plugin, // reverse spatial dimensions after batch and feature. unsigned cldnn_axis = axis; if (axis >= 2) { @@ -54,7 +56,7 @@ static void CreateGatherElementsOp(Program& p, const std::shared_ptrget_output_shape(0)), + tensor_from_dims(op->get_output_shape(0)), GetGatherAxis(axis, rank), op->get_friendly_name()); @@ -64,4 +66,6 @@ static void CreateGatherElementsOp(Program& p, const std::shared_ptr& op) { p.ValidateInputs(op, {2}); @@ -62,4 +64,6 @@ static void CreateGatherNDOp(Program& p, const std::shared_ptr& op) { p.ValidateInputs(op, {1}); @@ -28,4 +30,6 @@ static void CreateGRNOp(Program& p, const std::shared_ptr& REGISTER_FACTORY_IMPL(v0, GRN); -} // namespace CLDNNPlugin +} // namespace intel_gpu +} // namespace runtime +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/ops/interpolate.cpp b/src/plugins/intel_gpu/src/plugin/ops/interpolate.cpp index d4ae76d48dc..e22045f324a 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/interpolate.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/interpolate.cpp @@ -2,8 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "cldnn_program.h" -#include "cldnn_common_utils.h" +#include "intel_gpu/plugin/program.hpp" +#include "intel_gpu/plugin/common_utils.hpp" #include "caseless.hpp" #include "ngraph/op/interpolate.hpp" @@ -11,7 +11,9 @@ #include "intel_gpu/primitives/resample.hpp" -namespace CLDNNPlugin { +namespace ov { +namespace runtime { +namespace intel_gpu { static cldnn::coordinate_transformation_mode GetCoordinateTransformationMode(ngraph::op::v4::Interpolate::CoordinateTransformMode mode) { switch (mode) { @@ -71,7 +73,7 @@ static cldnn::resample::resample_axis GetInterpolationAxis(int32_t axis, uint32_ if (axis < 0 || axis >= sz) IE_THROW() << "Interpolate axis is not correspond to number of dimensions"; - // Difference in dimension ordering between IE and clDNN, + // Difference in dimension ordering between IE and GPU plugin, // reverse spatial dimensions after batch and feature. uint32_t cldnn_axis = axis; if (axis >= 2) { @@ -111,7 +113,7 @@ static void CreateInterpolateOp(Program& p, const std::shared_ptrget_attrs(); auto inputRank = op->get_input_shape(0).size(); auto outDims = op->get_output_shape(0).size(); - auto outTensor = CldnnTensorFromIEDims(op->get_output_shape(0)); + auto outTensor = tensor_from_dims(op->get_output_shape(0)); std::vector pad_begin(attrs.pads_begin.begin(), attrs.pads_begin.end()); std::vector pad_end(attrs.pads_end.begin(), attrs.pads_end.end()); @@ -202,4 +204,6 @@ static void CreateInterpolateOp(Program& p, const std::shared_ptr @@ -26,7 +26,9 @@ using Loop = ngraph::op::v5::Loop; -namespace CLDNNPlugin { +namespace ov { +namespace runtime { +namespace intel_gpu { template static DATA_TYPE CreateScalarData(Program &p, const cldnn::primitive_id& id, int64_t num, const cldnn::primitive_id& ext_prim_id) { @@ -41,7 +43,7 @@ static cldnn::mutable_data CreateAdditionalOutputData(Program &p, const std::sha const int32_t output_idx) { const auto precision = DataTypeFromPrecision(op->get_output_element_type(output_idx)); const auto format = DefaultFormatForDims(op->get_output_shape(output_idx).size()); - const auto tensor = CldnnTensorFromIEDims(op->get_output_shape(output_idx)); + const auto tensor = tensor_from_dims(op->get_output_shape(output_idx)); cldnn::layout output_layout = cldnn::layout(precision, format, tensor); auto mem = p.GetEngine().allocate_memory(output_layout); auto md = cldnn::mutable_data(id, {input}, mem, op->get_friendly_name()); // cldnn::data cannot set dependency @@ -224,4 +226,6 @@ static void CreateLoopOp(Program& p, const std::shared_ptr& op) { REGISTER_FACTORY_IMPL(v5, Loop); -} // namespace CLDNNPlugin +} // namespace intel_gpu +} // namespace runtime +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/ops/lrn.cpp b/src/plugins/intel_gpu/src/plugin/ops/lrn.cpp index e6000881a69..b9bf0b80c41 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/lrn.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/lrn.cpp @@ -2,15 +2,17 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "cldnn_program.h" -#include "cldnn_common_utils.h" +#include "intel_gpu/plugin/program.hpp" +#include "intel_gpu/plugin/common_utils.hpp" #include "ngraph/op/lrn.hpp" #include "ngraph/op/constant.hpp" #include "intel_gpu/primitives/lrn.hpp" -namespace CLDNNPlugin { +namespace ov { +namespace runtime { +namespace intel_gpu { static cldnn::lrn_norm_region GetNormRegion(std::vector axis_value) { if (axis_value.size() == 1 && axis_value[0] == 1) { @@ -47,4 +49,6 @@ static void CreateLRNOp(Program& p, const std::shared_ptr& REGISTER_FACTORY_IMPL(v0, LRN); -} // namespace CLDNNPlugin +} // namespace intel_gpu +} // namespace runtime +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/ops/matmul.cpp b/src/plugins/intel_gpu/src/plugin/ops/matmul.cpp index 66ef9d5b80f..e3eab2c6cc8 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/matmul.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/matmul.cpp @@ -2,8 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "cldnn_program.h" -#include "cldnn_common_utils.h" +#include "intel_gpu/plugin/program.hpp" +#include "intel_gpu/plugin/common_utils.hpp" #include "ngraph/op/matmul.hpp" #include "ngraph/op/constant.hpp" @@ -15,7 +15,9 @@ #include "intel_gpu/primitives/reorder.hpp" #include "intel_gpu/primitives/permute.hpp" -namespace CLDNNPlugin { +namespace ov { +namespace runtime { +namespace intel_gpu { /* * get_aligned_shapes function align two input shapes to have the same size and @@ -128,7 +130,7 @@ static void CreateMatMulOp(Program& p, const std::shared_ptrget_friendly_name() + suffix; auto reshapeInPrim = cldnn::reshape(reshapeInName, inputName, - CldnnTensorFromIEDims(reshapeSize), + tensor_from_dims(reshapeSize), op->get_friendly_name()); p.AddPrimitive(reshapeInPrim); p.AddInnerPrimitiveToProfiler(reshapeInName, layerName, op); @@ -157,7 +159,7 @@ static void CreateMatMulOp(Program& p, const std::shared_ptrget_output_shape(0)); + auto outputShape = tensor_from_dims(op->get_output_shape(0)); auto outReshapeName = layerName + "_cldnn_out_reshape"; auto outReshapePrim = cldnn::reshape(outReshapeName, layerName, outputShape, op->get_friendly_name()); @@ -269,7 +271,7 @@ static void CreateMatMulOp(Program& p, const std::shared_ptrget_friendly_name()); @@ -285,4 +287,6 @@ static void CreateMatMulOp(Program& p, const std::shared_ptr -namespace CLDNNPlugin { +namespace ov { +namespace runtime { +namespace intel_gpu { static void CreateCommonMVNOp(Program& p, const std::shared_ptr& op, bool across_channels, bool normalize_variance, float eps, bool eps_inside_sqrt = true) { @@ -65,4 +67,6 @@ static void CreateMVNOp(Program& p, const std::shared_ptr& REGISTER_FACTORY_IMPL(v0, MVN); REGISTER_FACTORY_IMPL(v6, MVN); -} // namespace CLDNNPlugin +} // namespace intel_gpu +} // namespace runtime +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/ops/non_max_suppression.cpp b/src/plugins/intel_gpu/src/plugin/ops/non_max_suppression.cpp index d5889d03daf..a72658c1c49 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/non_max_suppression.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/non_max_suppression.cpp @@ -2,8 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "cldnn_program.h" -#include "cldnn_common_utils.h" +#include "intel_gpu/plugin/program.hpp" +#include "intel_gpu/plugin/common_utils.hpp" #include "ngraph/op/non_max_suppression.hpp" #include @@ -14,7 +14,9 @@ #include "intel_gpu/primitives/non_max_suppression.hpp" #include "intel_gpu/runtime/debug_configuration.hpp" -namespace CLDNNPlugin { +namespace ov { +namespace runtime { +namespace intel_gpu { static bool GetCenterPointBox(ngraph::op::v5::NonMaxSuppression::BoxEncodingType encoding) { switch (encoding) { @@ -35,7 +37,7 @@ static void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_pt for (size_t portIndex = 0; portIndex < inputPrimitives.size(); portIndex++) { auto inputDataType = DataTypeFromPrecision(op->get_input_element_type(portIndex)); if ((portIndex == 2) && (inputDataType == cldnn::data_types::i64)) { - // clDNN primitive supports only i32 data type for 'max_output_boxes_per_class' input + // GPU primitive supports only i32 data type for 'max_output_boxes_per_class' input // so we need additional reorder if it's provided as i64 auto reorderPrimName = inputPrimitives[portIndex] + "_" + op->get_friendly_name() + Program::m_preProcessTag; auto targetFormat = DefaultFormatForDims(op->get_input_shape(portIndex).size()); @@ -54,7 +56,7 @@ static void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_pt } } - // clDNN primitive supports only i32 as output data type + // GPU primitive supports only i32 as output data type auto out_type = op->get_output_element_type(0); if (out_type == ngraph::element::i64) { out_type = ngraph::element::i32; @@ -77,7 +79,7 @@ static void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_pt cldnn::layout mutableLayoutSecond = cldnn::layout( DataTypeFromPrecision(mutable_precision_second), DefaultFormatForDims(op->get_output_shape(2).size()), - CldnnTensorFromIEDims(op->get_output_shape(2))); + tensor_from_dims(op->get_output_shape(2))); GPU_DEBUG_IF(debug_config->verbose >= 2) { GPU_DEBUG_COUT << "[" << layer_type_name_ID(op) << ": mutable data]" << std::endl; @@ -175,4 +177,6 @@ static void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_pt REGISTER_FACTORY_IMPL(internal, NonMaxSuppressionIEInternal); -} // namespace CLDNNPlugin +} // namespace intel_gpu +} // namespace runtime +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/ops/normalize_l2.cpp b/src/plugins/intel_gpu/src/plugin/ops/normalize_l2.cpp index 81393d96fa3..b939763ba87 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/normalize_l2.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/normalize_l2.cpp @@ -2,8 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "cldnn_program.h" -#include "cldnn_common_utils.h" +#include "intel_gpu/plugin/program.hpp" +#include "intel_gpu/plugin/common_utils.hpp" #include "ngraph/op/normalize_l2.hpp" #include "ngraph/op/constant.hpp" @@ -11,7 +11,9 @@ #include "intel_gpu/primitives/normalize.hpp" #include "intel_gpu/primitives/data.hpp" -namespace CLDNNPlugin { +namespace ov { +namespace runtime { +namespace intel_gpu { static void CreateNormalizeL2Op(Program& p, const std::shared_ptr& op) { p.ValidateInputs(op, {2}); @@ -61,4 +63,6 @@ static void CreateNormalizeL2Op(Program& p, const std::shared_ptr& op) { p.ValidateInputs(op, {4}); @@ -49,7 +51,7 @@ static void CreateOneHotOp(Program& p, const std::shared_ptrget_output_shape(0)), + tensor_from_dims(op->get_output_shape(0)), DataTypeFromPrecision(op->get_output_element_type(0)), static_cast(axis), on_value, @@ -62,4 +64,6 @@ static void CreateOneHotOp(Program& p, const std::shared_ptr& REGISTER_FACTORY_IMPL(v1, Pad); -} // namespace CLDNNPlugin +} // namespace intel_gpu +} // namespace runtime +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/ops/parameter.cpp b/src/plugins/intel_gpu/src/plugin/ops/parameter.cpp index f474aebe6a4..253f489a303 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/parameter.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/parameter.cpp @@ -2,8 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "cldnn_program.h" -#include "cldnn_common_utils.h" +#include "intel_gpu/plugin/program.hpp" +#include "intel_gpu/plugin/common_utils.hpp" #include "ngraph/op/parameter.hpp" @@ -14,7 +14,9 @@ using namespace InferenceEngine; -namespace CLDNNPlugin { +namespace ov { +namespace runtime { +namespace intel_gpu { static void CreateParameterOp(Program& p, const std::shared_ptr& op) { auto networkInputs = p.GetNetworkInputs(); @@ -26,11 +28,11 @@ static void CreateParameterOp(Program& p, const std::shared_ptrgetTensorDesc(); const auto inputDims = inputDesc.getDims(); - Layout l = inputDesc.getLayout(); - Precision ip = inputDesc.getPrecision(); + InferenceEngine::Layout l = inputDesc.getLayout(); + InferenceEngine::Precision ip = inputDesc.getPrecision(); cldnn::format inputFormat = cldnn::format::bfyx; - if (Layout::BLOCKED == l && 6 == inputDims.size()) { + if (InferenceEngine::Layout::BLOCKED == l && 6 == inputDims.size()) { inputFormat = cldnn::format::bfwzyx; } else { inputFormat = FormatFromLayout(l); @@ -46,7 +48,7 @@ static void CreateParameterOp(Program& p, const std::shared_ptrname(); } break; case 2: - if (Layout::NCHW == l || NC == l) { + if (InferenceEngine::Layout::NCHW == l || NC == l) { dataTensor = cldnn::tensor(batch, TensorValue(inputDims[1]), 1, 1); } else { IE_THROW() << "Unsupported layout (" << l << ") in 2D input " << inputInfo->name(); @@ -177,8 +179,8 @@ static void CreateParameterOp(Program& p, const std::shared_ptrname(); } @@ -280,4 +282,6 @@ static void CreateParameterOp(Program& p, const std::shared_ptrget_output_shape(0)), + tensor_from_dims(op->get_output_shape(0)), DataTypeFromPrecision(op->get_output_element_type(0)), op->get_friendly_name()); poolPrim.pad_end = params.pad_end; @@ -89,7 +91,7 @@ static void CreateMaxPoolOp(Program& p, const std::shared_ptrget_output_shape(0)), + tensor_from_dims(op->get_output_shape(0)), DataTypeFromPrecision(op->get_output_element_type(0)), op->get_friendly_name()); poolPrim.pad_end = params.pad_end; @@ -100,4 +102,6 @@ static void CreateMaxPoolOp(Program& p, const std::shared_ptr& op) { p.ValidateInputs(op, {2}); @@ -114,4 +116,6 @@ static void CreatePriorBoxOp(Program& p, const std::shared_ptr& op) { p.ValidateInputs(op, {3}); @@ -61,7 +63,7 @@ static void CreateProposalOp(Program& p, const std::shared_ptrget_output_shape(1).size()), - CldnnTensorFromIEDims(op->get_output_shape(1))); + tensor_from_dims(op->get_output_shape(1))); GPU_DEBUG_GET_INSTANCE(debug_config); GPU_DEBUG_IF(debug_config->verbose >= 2) { @@ -153,4 +155,6 @@ static void CreateProposalOp(Program& p, const std::shared_ptr& op, cldnn::reduce_mode mode, bool keep_dims) { p.ValidateInputs(op, {2}); @@ -173,4 +175,6 @@ REGISTER_FACTORY_IMPL(v1, ReduceSum); REGISTER_FACTORY_IMPL(v4, ReduceL1); REGISTER_FACTORY_IMPL(v4, ReduceL2); -} // namespace CLDNNPlugin +} // namespace intel_gpu +} // namespace runtime +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/ops/region_yolo.cpp b/src/plugins/intel_gpu/src/plugin/ops/region_yolo.cpp index fac0cb0573b..ffa80c02c51 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/region_yolo.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/region_yolo.cpp @@ -2,14 +2,16 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "cldnn_program.h" -#include "cldnn_common_utils.h" +#include "intel_gpu/plugin/program.hpp" +#include "intel_gpu/plugin/common_utils.hpp" #include "ngraph/op/region_yolo.hpp" #include "intel_gpu/primitives/region_yolo.hpp" -namespace CLDNNPlugin { +namespace ov { +namespace runtime { +namespace intel_gpu { static void CreateRegionYoloOp(Program& p, const std::shared_ptr& op) { p.ValidateInputs(op, {1}); @@ -37,4 +39,6 @@ static void CreateRegionYoloOp(Program& p, const std::shared_ptr& op) { p.ValidateInputs(op, {1}); @@ -29,4 +31,6 @@ static void CreateReorgYoloOp(Program& p, const std::shared_ptr& op) { p.ValidateInputs(op, {1, 2}); @@ -21,7 +23,7 @@ static void CreateCommonReshapeOp(Program& p, const std::shared_ptrget_input_shape(0); auto outDims = op->get_output_shape(0); - auto outTensor = CldnnTensorFromIEDims(outDims); + auto outTensor = tensor_from_dims(outDims); // if we convert from or to 5D/6D, additional reorder also required to change format cldnn::primitive_id reshapeInputId = inputPrimitives[0]; @@ -74,4 +76,6 @@ REGISTER_FACTORY_IMPL(v1, Reshape); REGISTER_FACTORY_IMPL(v0, Squeeze); REGISTER_FACTORY_IMPL(v0, Unsqueeze); -} // namespace CLDNNPlugin +} // namespace intel_gpu +} // namespace runtime +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/ops/result.cpp b/src/plugins/intel_gpu/src/plugin/ops/result.cpp index d591a47da55..2f6a061c8ff 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/result.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/result.cpp @@ -2,8 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "cldnn_program.h" -#include "cldnn_common_utils.h" +#include "intel_gpu/plugin/program.hpp" +#include "intel_gpu/plugin/common_utils.hpp" #include "ngraph/op/result.hpp" @@ -11,7 +11,9 @@ using namespace InferenceEngine; -namespace CLDNNPlugin { +namespace ov { +namespace runtime { +namespace intel_gpu { static void CreateResultOp(Program& p, const std::shared_ptr& op) { OutputsDataMap networkOutputs = p.GetNetworkOutputs(); @@ -73,4 +75,6 @@ static void CreateResultOp(Program& p, const std::shared_ptr& op) { p.ValidateInputs(op, {2}); @@ -31,4 +33,6 @@ static void CreateReverseSequenceOp(Program& p, const std::shared_ptr name_mapping = { {"sigmoid", cldnn::activation_func::logistic}, @@ -276,7 +278,7 @@ static void CreateLSTMSequenceOp(Program& p, const std::shared_ptr WRreshapeSize = { 4 * size_t(lstm_hidden_size), size_t(lstm_input_size + lstm_hidden_size) }; cldnn::primitive_id WRreshapeID = WRconcatID + "_reshape"; - auto reshapeInPrim = cldnn::reshape(WRreshapeID, WRconcatID, CldnnTensorFromIEDims(WRreshapeSize), op->get_friendly_name()); + auto reshapeInPrim = cldnn::reshape(WRreshapeID, WRconcatID, tensor_from_dims(WRreshapeSize), op->get_friendly_name()); p.AddPrimitive(reshapeInPrim); p.AddInnerPrimitiveToProfiler(WRreshapeID, op->get_friendly_name(), op); @@ -353,4 +355,6 @@ static void CreateLSTMSequenceOp(Program& p, const std::shared_ptr -namespace CLDNNPlugin { +namespace ov { +namespace runtime { +namespace intel_gpu { namespace { @@ -39,4 +41,6 @@ void CreateROIAlignOp(Program& p, const std::shared_ptr= rank) IE_THROW() << "ScatterElementsUpdate axis is not correspond to number of dimensions"; - // Difference in dimension ordering between IE and clDNN, + // Difference in dimension ordering between IE and GPU plugin, // reverse spatial dimensions after batch and feature. unsigned cldnn_axis = axis; if (axis >= 2) { @@ -66,4 +68,6 @@ static void CreateScatterElementsUpdateOp(Program& p, const std::shared_ptr& op) { p.ValidateInputs(op, {3}); @@ -31,4 +33,6 @@ static void CreateScatterNDUpdateOp(Program& p, const std::shared_ptr= rank) IE_THROW() << "ScatterUpdate axis is not correspond to number of dimensions"; - // Difference in dimension ordering between IE and clDNN, + // Difference in dimension ordering between IE and GPU plugin, // reverse spatial dimensions after batch and feature. unsigned cldnn_axis = axis; if (axis >= 2) { @@ -66,4 +68,6 @@ static void CreateScatterUpdateOp(Program& p, const std::shared_ptr& op) { p.ValidateInputs(op, {3}); @@ -61,7 +63,7 @@ static void CreateSelectOp(Program& p, const std::shared_ptrget_friendly_name()); @@ -89,4 +91,6 @@ static void CreateSelectOp(Program& p, const std::shared_ptr& op) { p.ValidateInputs(op, {1, 2}); @@ -45,4 +47,6 @@ static void CreateShuffleChannelsOp(Program& p, const std::shared_ptr& op) { p.ValidateInputs(op, {4}); @@ -35,7 +37,7 @@ static void CreateSpaceToBatchOp(Program& p, const std::shared_ptrget_output_shape(0)); + auto out_size = tensor_from_dims(op->get_output_shape(0)); auto batchToSpacePrim = cldnn::space_to_batch(layerName, inputPrimitives[0], // input @@ -51,4 +53,6 @@ static void CreateSpaceToBatchOp(Program& p, const std::shared_ptr& op) { auto inputPrimitives = p.GetInputPrimitiveIDs(op); @@ -37,8 +39,8 @@ static void CreateCommonSplitOp(Program& p, const std::shared_ptr& } NGRAPH_SUPPRESS_DEPRECATED_END - auto outTensor = CldnnTensorFromIEDims(outLayerDims, 1); - auto offsetTensor = CldnnTensorFromIEDims(startOffset, 0); + auto outTensor = tensor_from_dims(outLayerDims, 1); + auto offsetTensor = tensor_from_dims(startOffset, 0); auto cropPrim = cldnn::crop(outLayerName, inputPrimitives[0], outTensor, offsetTensor, op->get_friendly_name()); p.primitiveIDs[outLayerName] = outLayerName; @@ -71,4 +73,6 @@ static void CreateVariadicSplitOp(Program& p, const std::shared_ptr& op) { p.ValidateInputs(op, {4}); @@ -187,7 +189,7 @@ static void CreateStridedSliceOp(Program& p, const std::shared_ptrget_friendly_name() + "/Reshape_before"; auto reshapePrim = cldnn::reshape(reshapeInName, inputPrimitives[0], targetShape, op->get_friendly_name()); p.AddPrimitive(reshapePrim); @@ -211,8 +213,8 @@ static void CreateStridedSliceOp(Program& p, const std::shared_ptrget_friendly_name()); @@ -221,7 +223,7 @@ static void CreateStridedSliceOp(Program& p, const std::shared_ptrget_friendly_name() + "/Crop"; auto reshapePrim = cldnn::reshape(reshapeOutName, layerName, targetShape, op->get_friendly_name()); p.AddPrimitive(reshapePrim); @@ -249,7 +251,7 @@ static void CreateStridedSliceOp(Program& p, const std::shared_ptrget_output_shape(0)); + auto out_size = tensor_from_dims(op->get_output_shape(0)); auto stridedSlicePrim = cldnn::strided_slice(layerName, inputPrimitives[0], @@ -269,4 +271,6 @@ static void CreateStridedSliceOp(Program& p, const std::shared_ptr @@ -24,7 +23,9 @@ using TensorIterator = ngraph::op::v0::TensorIterator; -namespace CLDNNPlugin { +namespace ov { +namespace runtime { +namespace intel_gpu { template static DATA_TYPE CreateScalarData(Program &p, const cldnn::primitive_id& id, int64_t num, const cldnn::primitive_id& ext_prim_id) { @@ -39,7 +40,7 @@ static cldnn::mutable_data CreateAdditionalOutputData(Program &p, const std::sha const int32_t output_idx) { const auto precision = DataTypeFromPrecision(op->get_output_element_type(output_idx)); const auto format = DefaultFormatForDims(op->get_output_shape(output_idx).size()); - const auto tensor = CldnnTensorFromIEDims(op->get_output_shape(output_idx)); + const auto tensor = tensor_from_dims(op->get_output_shape(output_idx)); cldnn::layout output_layout = cldnn::layout(precision, format, tensor); auto mem = p.GetEngine().allocate_memory(output_layout); auto md = cldnn::mutable_data(id, {input}, mem, op->get_friendly_name()); // cldnn::data cannot set dependency @@ -199,4 +200,6 @@ static void CreateTensorIteratorOp(Program &p, const std::shared_ptr& op) { p.ValidateInputs(op, {2}); @@ -18,7 +20,7 @@ static void CreateTileOp(Program& p, const std::shared_ptr auto tilePrim = cldnn::tile(layerName, inputPrimitives[0], - CldnnTensorFromIEDims(op->get_output_shape(0)), + tensor_from_dims(op->get_output_shape(0)), op->get_friendly_name()); p.AddPrimitive(tilePrim); @@ -27,4 +29,6 @@ static void CreateTileOp(Program& p, const std::shared_ptr REGISTER_FACTORY_IMPL(v0, Tile); -} // namespace CLDNNPlugin +} // namespace intel_gpu +} // namespace runtime +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/ops/topk.cpp b/src/plugins/intel_gpu/src/plugin/ops/topk.cpp index 423e53968bf..e74b2004d79 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/topk.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/topk.cpp @@ -2,8 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "cldnn_program.h" -#include "cldnn_common_utils.h" +#include "intel_gpu/plugin/program.hpp" +#include "intel_gpu/plugin/common_utils.hpp" #include "ngraph/op/topk.hpp" @@ -11,7 +11,9 @@ #include "intel_gpu/primitives/mutable_data.hpp" #include "intel_gpu/runtime/debug_configuration.hpp" -namespace CLDNNPlugin { +namespace ov { +namespace runtime { +namespace intel_gpu { static cldnn::arg_max_min::axis_name GetAxis(int32_t axis, size_t in_rank) { if (in_rank == 5) { @@ -70,7 +72,7 @@ static void CreateTopKOp(Program& p, const std::shared_ptr cldnn::layout mutableLayout = cldnn::layout(DataTypeFromPrecision(mutable_precision), DefaultFormatForDims(op->get_output_shape(1).size()), - CldnnTensorFromIEDims(op->get_output_shape(1))); + tensor_from_dims(op->get_output_shape(1))); GPU_DEBUG_GET_INSTANCE(debug_config); GPU_DEBUG_IF(debug_config->verbose >= 2) { @@ -130,4 +132,6 @@ static void CreateTopKOp(Program& p, const std::shared_ptr REGISTER_FACTORY_IMPL(v1, TopK); -} // namespace CLDNNPlugin +} // namespace intel_gpu +} // namespace runtime +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/ops/transpose.cpp b/src/plugins/intel_gpu/src/plugin/ops/transpose.cpp index db1a481acf6..7e888ca333a 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/transpose.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/transpose.cpp @@ -2,15 +2,17 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "cldnn_program.h" -#include "cldnn_common_utils.h" +#include "intel_gpu/plugin/program.hpp" +#include "intel_gpu/plugin/common_utils.hpp" #include "ngraph/op/transpose.hpp" #include "ngraph/op/constant.hpp" #include "intel_gpu/primitives/permute.hpp" -namespace CLDNNPlugin { +namespace ov { +namespace runtime { +namespace intel_gpu { static void CreateTransposeOp(Program& p, const std::shared_ptr& op) { p.ValidateInputs(op, {1, 2}); @@ -46,4 +48,6 @@ static void CreateTransposeOp(Program& p, const std::shared_ptr& op, cldnn::activation_func func, cldnn::activation_additional_params params) { @@ -313,4 +315,6 @@ REGISTER_FACTORY_IMPL(v0, Sign); REGISTER_FACTORY_IMPL(v5, HSigmoid); REGISTER_FACTORY_IMPL(v5, Round); -} // namespace CLDNNPlugin +} // namespace intel_gpu +} // namespace runtime +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/cldnn_engine.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp similarity index 88% rename from src/plugins/intel_gpu/src/plugin/cldnn_engine.cpp rename to src/plugins/intel_gpu/src/plugin/plugin.cpp index dc7efc47bcd..3afb4e75289 100644 --- a/src/plugins/intel_gpu/src/plugin/cldnn_engine.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -16,11 +16,11 @@ #include #include -#include "cldnn_engine.h" -#include "cldnn_executable_network.h" -#include "cldnn_transformations_pipeline.h" -#include "cldnn_custom_layer.h" -#include "cldnn_itt.h" +#include "intel_gpu/plugin/plugin.hpp" +#include "intel_gpu/plugin/compiled_model.hpp" +#include "intel_gpu/plugin/transformations_pipeline.hpp" +#include "intel_gpu/plugin/custom_layer.hpp" +#include "intel_gpu/plugin/itt.hpp" #include "gpu/gpu_config.hpp" #include "cpp_interfaces/interface/ie_internal_plugin_config.hpp" @@ -41,7 +41,9 @@ using namespace InferenceEngine; using namespace InferenceEngine::gpu; using namespace InferenceEngine::details; -namespace CLDNNPlugin { +namespace ov { +namespace runtime { +namespace intel_gpu { #define FACTORY_DECLARATION(op_version, op_name) \ void __register ## _ ## op_name ## _ ## op_version(); @@ -50,20 +52,20 @@ namespace CLDNNPlugin { __register ## _ ## op_name ## _ ## op_version(); #define REGISTER_FACTORY(op_version, op_name) FACTORY_DECLARATION(op_version, op_name) -#include "cldnn_primitives_list.hpp" +#include "intel_gpu/plugin/primitives_list.hpp" #undef REGISTER_FACTORY -void clDNNEngine::RegisterPrimitives() { +void Plugin::RegisterPrimitives() { #define REGISTER_FACTORY(op_version, op_name) FACTORY_CALL(op_version, op_name) - #include "cldnn_primitives_list.hpp" + #include "intel_gpu/plugin/primitives_list.hpp" #undef REGISTER_FACTORY } -struct clDNNEngine::impl { - CLDNNPlugin::Configs m_configs; +struct Plugin::impl { + Configs m_configs; }; -std::string clDNNEngine::GetDeviceIDFromConfig(const std::map& config) const { +std::string Plugin::GetDeviceIDFromConfig(const std::map& config) const { std::string device_id; if (config.find(PluginConfigParams::KEY_DEVICE_ID) != config.end()) { device_id = config.at(PluginConfigParams::KEY_DEVICE_ID); @@ -71,7 +73,7 @@ std::string clDNNEngine::GetDeviceIDFromConfig(const std::map &config) const { +cldnn::device_info Plugin::GetDeviceInfo(const std::map &config) const { auto device_info = device_map.begin()->second->get_info(); std::string device_id = GetDeviceIDFromConfig(config); if (!device_id.empty()) { @@ -84,9 +86,9 @@ cldnn::device_info clDNNEngine::GetDeviceInfo(const std::map(); RegisterPrimitives(); - // try loading clDNN engine and get info from it + // try loading gpu engine and get info from it { // Set OCL runtime which should be always available cldnn::device_query device_query(cldnn::engine_types::ocl, cldnn::runtime_types::ocl); @@ -124,12 +126,12 @@ clDNNEngine::clDNNEngine() : m_defaultContext(nullptr) { CHAR mpath[MAX_PATH + 1]; HMODULE nModule; GetModuleHandleEx(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, - (LPCSTR)CLDNNCustomLayer::LoadFromFile, + (LPCSTR)CustomLayer::LoadFromFile, &nModule); GetModuleFileName(nModule, mpath, sizeof(mpath)); #elif __linux__ Dl_info dl_info; - dladdr(reinterpret_cast(CLDNNCustomLayer::LoadFromFile), &dl_info); + dladdr(reinterpret_cast(CustomLayer::LoadFromFile), &dl_info); const char* mpath = dl_info.dli_fname; #endif std::string configFile(mpath); @@ -142,7 +144,7 @@ clDNNEngine::clDNNEngine() : m_defaultContext(nullptr) { } config_path += "/cldnn_global_custom_kernels/cldnn_global_custom_kernels.xml"; for (auto& config : _impl->m_configs) { - CLDNNCustomLayer::LoadFromFile(config_path, config.second.customLayers, true); + CustomLayer::LoadFromFile(config_path, config.second.customLayers, true); } } @@ -164,8 +166,8 @@ auto check_inputs = [](InferenceEngine::InputsDataMap _networkInputs) { } }; -void clDNNEngine::UpdateConfig(CLDNNPlugin::Config& conf, const InferenceEngine::CNNNetwork &network, const std::map ¶ms) const { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::UpdateConfig"); +void Plugin::UpdateConfig(Config& conf, const InferenceEngine::CNNNetwork &network, const std::map ¶ms) const { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::UpdateConfig"); auto device_info = GetDeviceInfo(params); conf.enableInt8 = device_info.supports_imad || device_info.supports_immad; conf.UpdateFromMap(params); @@ -174,8 +176,8 @@ void clDNNEngine::UpdateConfig(CLDNNPlugin::Config& conf, const InferenceEngine: } } -void clDNNEngine::UpdateStatistics(const CLDNNRemoteCLContext::Ptr& context) const { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::UpdateStatistics"); +void Plugin::UpdateStatistics(const RemoteCLContext::Ptr& context) const { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::UpdateStatistics"); { std::lock_guard lock(engine_mutex); @@ -193,9 +195,9 @@ void clDNNEngine::UpdateStatistics(const CLDNNRemoteCLContext::Ptr& context) con } } -std::map clDNNEngine::ConvertPerfHintsToConfig( +std::map Plugin::ConvertPerfHintsToConfig( const std::map& network_config, - const CLDNNPlugin::Config& plugin_config) const { + const Config& plugin_config) const { // deduces the actual settings from the performance hints and returns fully-defined config auto config = network_config; const auto &mode = config.find(PluginConfigParams::KEY_PERFORMANCE_HINT); @@ -223,21 +225,21 @@ std::map clDNNEngine::ConvertPerfHintsToConfig( return config; } -IExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, +IExecutableNetworkInternal::Ptr Plugin::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std::map &orig_config) { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::LoadExeNetworkImpl"); + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::LoadExeNetworkImpl"); // verification of supported input InferenceEngine::InputsDataMap _networkInputs = network.getInputsInfo(); check_inputs(_networkInputs); - CLDNNPlugin::Configs confs = _impl->m_configs; + Configs confs = _impl->m_configs; std::string device_id = GetDeviceIDFromConfig(orig_config); - CLDNNPlugin::Config conf = confs.GetConfig(device_id); + Config conf = confs.GetConfig(device_id); auto config = ConvertPerfHintsToConfig(orig_config, conf); UpdateConfig(conf, network, config); - CLDNNRemoteCLContext::Ptr context; + RemoteCLContext::Ptr context; auto canReuseDefaultContext = [&]() -> bool { if (m_defaultContext == nullptr) @@ -263,10 +265,10 @@ IExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceE }; { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::LoadExeNetworkImpl::CreateContext"); + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::LoadExeNetworkImpl::CreateContext"); std::lock_guard lock(engine_mutex); if (!canReuseDefaultContext()) { - m_defaultContext.reset(new CLDNNRemoteCLContext(shared_from_this(), ParamMap(), conf)); + m_defaultContext.reset(new RemoteCLContext(shared_from_this(), ParamMap(), conf)); } } @@ -274,16 +276,16 @@ IExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceE auto transformedNetwork = CloneAndTransformNetwork(network, conf); { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::LoadExeNetworkImpl::CreateExeNetwork"); - CLDNNExecNetwork::Ptr exeNetwork = std::make_shared(transformedNetwork, context, conf); + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::LoadExeNetworkImpl::CreateExeNetwork"); + CompiledModel::Ptr exeNetwork = std::make_shared(transformedNetwork, context, conf); UpdateStatistics(context); return exeNetwork; } } -IExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, - const RemoteContext::Ptr &context, - const std::map &orig_config) { +IExecutableNetworkInternal::Ptr Plugin::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, + const InferenceEngine::RemoteContext::Ptr &context, + const std::map &orig_config) { InferenceEngine::InputsDataMap _networkInputs = network.getInputsInfo(); check_inputs(_networkInputs); @@ -292,39 +294,39 @@ IExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceE IE_THROW() << "Invalid context"; } - CLDNNPlugin::Config conf = getContextImpl(casted)->GetConfig(); + Config conf = getContextImpl(casted)->GetConfig(); auto config = ConvertPerfHintsToConfig(orig_config, conf); UpdateConfig(conf, network, config); auto transformedNetwork = CloneAndTransformNetwork(network, conf); - return std::make_shared(transformedNetwork, casted, conf); + return std::make_shared(transformedNetwork, casted, conf); } -RemoteContext::Ptr clDNNEngine::CreateContext(const ParamMap& params) { +InferenceEngine::RemoteContext::Ptr Plugin::CreateContext(const ParamMap& params) { // parameter map is non-empty std::string contextTypeStr = _StrFromParams(params, GPU_PARAM_KEY(CONTEXT_TYPE)); if (GPU_PARAM_VALUE(OCL) == contextTypeStr) { - return std::make_shared(shared_from_this(), params, _impl->m_configs.GetDefaultDeviceConfig()); + return std::make_shared(shared_from_this(), params, _impl->m_configs.GetDefaultDeviceConfig()); } else if (GPU_PARAM_VALUE(VA_SHARED) == contextTypeStr) { #ifdef _WIN32 - return std::make_shared(shared_from_this(), params, _impl->m_configs.GetDefaultDeviceConfig()); + return std::make_shared(shared_from_this(), params, _impl->m_configs.GetDefaultDeviceConfig()); #else - return std::make_shared(shared_from_this(), params, _impl->m_configs.GetDefaultDeviceConfig()); + return std::make_shared(shared_from_this(), params, _impl->m_configs.GetDefaultDeviceConfig()); #endif } else { IE_THROW() << "Invalid remote context type" << contextTypeStr; } } -RemoteContext::Ptr clDNNEngine::GetDefaultContext(const ParamMap& params) { +InferenceEngine::RemoteContext::Ptr Plugin::GetDefaultContext(const ParamMap& params) { if (nullptr == m_defaultContext) { - m_defaultContext.reset(new CLDNNRemoteCLContext(shared_from_this(), params, _impl->m_configs.GetDefaultDeviceConfig())); + m_defaultContext.reset(new RemoteCLContext(shared_from_this(), params, _impl->m_configs.GetDefaultDeviceConfig())); } return m_defaultContext; } -void clDNNEngine::SetConfig(const std::map &config) { +void Plugin::SetConfig(const std::map &config) { streamsSet = (config.find(PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS) != config.end()); throttlingSet = config.find(GPUConfigParams::KEY_GPU_PLUGIN_THROTTLE) != config.end() || config.find(CLDNNConfigParams::KEY_CLDNN_PLUGIN_THROTTLE) != config.end(); @@ -345,18 +347,18 @@ void clDNNEngine::SetConfig(const std::map &config) { } } -QueryNetworkResult clDNNEngine::QueryNetwork(const CNNNetwork& network, - const std::map& config) const { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::QueryNetwork"); +QueryNetworkResult Plugin::QueryNetwork(const CNNNetwork& network, + const std::map& config) const { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::QueryNetwork"); QueryNetworkResult res; - CLDNNPlugin::Configs confs = _impl->m_configs; + Configs confs = _impl->m_configs; std::string device_id = GetDeviceIDFromConfig(config); - CLDNNPlugin::Config conf = confs.GetConfig(device_id); + Config conf = confs.GetConfig(device_id); UpdateConfig(conf, network, config); if (m_defaultContext == nullptr) { - m_defaultContext.reset(new CLDNNRemoteCLContext( + m_defaultContext.reset(new RemoteCLContext( std::const_pointer_cast(shared_from_this()), ParamMap(), conf)); } @@ -568,8 +570,8 @@ QueryNetworkResult clDNNEngine::QueryNetwork(const CNNNetwork& network, return res; } -Parameter clDNNEngine::GetConfig(const std::string& name, const std::map& options) const { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::GetConfig"); +Parameter Plugin::GetConfig(const std::string& name, const std::map& options) const { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::GetConfig"); Parameter result; std::string device_id; @@ -655,8 +657,8 @@ static float GetGOPS(cldnn::device_info info, cldnn::data_types dt) { return freqGHz * opsPerComputeBlock * computeBlockIPC * numEUs; } -Parameter clDNNEngine::GetMetric(const std::string& name, const std::map& options) const { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::GetMetric"); +Parameter Plugin::GetMetric(const std::string& name, const std::map& options) const { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::GetMetric"); std::string device_id = GetConfig(CONFIG_KEY(DEVICE_ID), options); auto iter = device_map.find(device_id); @@ -813,7 +815,7 @@ Parameter clDNNEngine::GetMetric(const std::string& name, const std::mapsecond, nullptr); + auto engine_params = Plugin::GetParams(config, iter->second, nullptr); auto engine = cldnn::engine::create(engine_params.engine_type, engine_params.runtime_type, iter->second, cldnn::engine_configuration(false, engine_params.queue_type, std::string(), config.queuePriority, config.queueThrottle, config.memory_pool_on, @@ -835,7 +837,7 @@ Parameter clDNNEngine::GetMetric(const std::string& name, const std::mapgetLayout(); + InferenceEngine::Layout layout = info.second->getLayout(); auto data = info.second->getInputData(); if (!data) continue; @@ -885,7 +887,9 @@ Parameter clDNNEngine::GetMetric(const std::string& name, const std::map Program::BuildProgram(const std::vector Program::BuildProgram(const std::vector Program::BuildProgram(const std::vector& op) { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "Program::IsOpSupported"); + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Program::IsOpSupported"); cldnn::topology topology; try { // Query mode disables checks that input primitives are created, @@ -232,7 +234,7 @@ bool Program::IsOpSupported(const InferenceEngine::CNNNetwork& network, const st } void Program::CreateSingleLayerPrimitive(cldnn::topology& topology, const std::shared_ptr& op) { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "Program::CreateSingleLayerPrimitive"); + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Program::CreateSingleLayerPrimitive"); InitProfileInfo(op->get_friendly_name(), op->get_type_name()); GPU_DEBUG_GET_INSTANCE(debug_config); @@ -355,4 +357,6 @@ bool IsNodeOnConstPath(const std::shared_ptr& node) { return is_const_node(node); } -} // namespace CLDNNPlugin +} // namespace intel_gpu +} // namespace runtime +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/cldnn_remote_context.cpp b/src/plugins/intel_gpu/src/plugin/remote_context.cpp similarity index 86% rename from src/plugins/intel_gpu/src/plugin/cldnn_remote_context.cpp rename to src/plugins/intel_gpu/src/plugin/remote_context.cpp index d18c6ceb286..45ace3d9195 100644 --- a/src/plugins/intel_gpu/src/plugin/cldnn_remote_context.cpp +++ b/src/plugins/intel_gpu/src/plugin/remote_context.cpp @@ -3,19 +3,21 @@ // #include -#include "cldnn_remote_context.h" -#include "cldnn_itt.h" -#include "cldnn_engine.h" +#include "intel_gpu/plugin/remote_context.hpp" +#include "intel_gpu/plugin/itt.hpp" +#include "intel_gpu/plugin/plugin.hpp" #include "intel_gpu/runtime/device_query.hpp" using namespace InferenceEngine; using namespace InferenceEngine::gpu; using namespace InferenceEngine::details; -namespace CLDNNPlugin { -CLDNNRemoteAllocator CLDNNRemoteBlobImpl::m_allocator; +namespace ov { +namespace runtime { +namespace intel_gpu { +RemoteAllocator RemoteBlobImpl::m_allocator; -CLDNNRemoteBlobImpl::CLDNNRemoteBlobImpl(ClContext::Ptr context, +RemoteBlobImpl::RemoteBlobImpl(ClContext::Ptr context, cldnn::stream& stream, const cldnn::layout& layout, cldnn::shared_handle mem, @@ -26,7 +28,7 @@ CLDNNRemoteBlobImpl::CLDNNRemoteBlobImpl(ClContext::Ptr context, _handle(nullptr), _allocator(nullptr), m_memObject(nullptr), lockedHolder(nullptr) { } -ParamMap CLDNNRemoteBlobImpl::getParams() const { +ParamMap RemoteBlobImpl::getParams() const { assert(m_memObject != nullptr); auto params = m_memObject->get_internal_params(); @@ -86,21 +88,21 @@ ParamMap CLDNNRemoteBlobImpl::getParams() const { } } -bool CLDNNRemoteBlobImpl::deallocate() noexcept { +bool RemoteBlobImpl::deallocate() noexcept { m_memObject.reset(); return m_memObject == nullptr; } -bool CLDNNRemoteBlobImpl::is_allocated() const noexcept { +bool RemoteBlobImpl::is_allocated() const noexcept { return m_memObject != nullptr; } -bool CLDNNRemoteBlobImpl::is_locked() const noexcept { +bool RemoteBlobImpl::is_locked() const noexcept { return lockedHolder != nullptr; } -void CLDNNRemoteBlobImpl::allocate() { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNRemoteBlobImpl::Allocate"); +void RemoteBlobImpl::allocate() { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "RemoteBlobImpl::Allocate"); assert(m_memObject == nullptr); auto _impl = getContextImpl(m_context.lock()); @@ -153,22 +155,22 @@ void CLDNNRemoteBlobImpl::allocate() { _impl->release_lock(); } -const std::shared_ptr& CLDNNRemoteBlobImpl::getAllocator() const noexcept { +const std::shared_ptr& RemoteBlobImpl::getAllocator() const noexcept { if (!_allocator) { _allocator = std::shared_ptr(&m_allocator, [] (IAllocator*) {}); } return _allocator; }; -std::string CLDNNRemoteBlobImpl::getDeviceName() const noexcept { +std::string RemoteBlobImpl::getDeviceName() const noexcept { return getContextImpl(m_context.lock())->getDeviceName(); }; -std::shared_ptr CLDNNRemoteBlobImpl::getContext() const noexcept { +std::shared_ptr RemoteBlobImpl::getContext() const noexcept { return m_context.lock(); } -void CLDNNRemoteBlobImpl::lock() const { +void RemoteBlobImpl::lock() const { if (!is_allocated()) { IE_THROW(NotAllocated) << "[GPU] Remote blob can't be locked as it's not allocated"; } @@ -178,36 +180,36 @@ void CLDNNRemoteBlobImpl::lock() const { m_allocator.regLockedBlob(_handle, this); } -void CLDNNRemoteBlobImpl::unlock() const { +void RemoteBlobImpl::unlock() const { lockedHolder.reset(); } -LockedMemory CLDNNRemoteBlobImpl::buffer() noexcept { +LockedMemory RemoteBlobImpl::buffer() noexcept { lock(); return LockedMemory(reinterpret_cast(&m_allocator), _handle, 0); } -LockedMemory CLDNNRemoteBlobImpl::cbuffer() const noexcept { +LockedMemory RemoteBlobImpl::cbuffer() const noexcept { lock(); return LockedMemory(reinterpret_cast(&m_allocator), _handle, 0); } -LockedMemory CLDNNRemoteBlobImpl::rwmap()noexcept { +LockedMemory RemoteBlobImpl::rwmap()noexcept { lock(); return LockedMemory(reinterpret_cast(&m_allocator), _handle, 0); } -LockedMemory CLDNNRemoteBlobImpl::rmap() const noexcept { +LockedMemory RemoteBlobImpl::rmap() const noexcept { lock(); return LockedMemory(reinterpret_cast(&m_allocator), _handle, 0); } -LockedMemory CLDNNRemoteBlobImpl::wmap()noexcept { +LockedMemory RemoteBlobImpl::wmap()noexcept { lock(); return LockedMemory(reinterpret_cast(&m_allocator), _handle, 0); } -void CLDNNRemoteAllocator::regLockedBlob(void* handle, const CLDNNRemoteBlobImpl* blob) { +void RemoteAllocator::regLockedBlob(void* handle, const RemoteBlobImpl* blob) { acquire_lock(); auto iter = m_lockedBlobs.find(handle); if (iter == m_lockedBlobs.end()) { @@ -216,7 +218,7 @@ void CLDNNRemoteAllocator::regLockedBlob(void* handle, const CLDNNRemoteBlobImpl release_lock(); } -void CLDNNRemoteAllocator::unlock(void* handle) noexcept { +void RemoteAllocator::unlock(void* handle) noexcept { acquire_lock(); auto iter = m_lockedBlobs.find(handle); if (iter != m_lockedBlobs.end()) { @@ -226,7 +228,7 @@ void CLDNNRemoteAllocator::unlock(void* handle) noexcept { release_lock(); } -CLDNNExecutionContextImpl::CLDNNExecutionContextImpl(const std::shared_ptr plugin, +ExecutionContextImpl::ExecutionContextImpl(const std::shared_ptr plugin, const ParamMap& params, const Config& config) : m_plugin(plugin), @@ -278,7 +280,7 @@ CLDNNExecutionContextImpl::CLDNNExecutionContextImpl(const std::shared_ptrget_user_context() } }; switch (m_type) { @@ -312,7 +314,7 @@ ParamMap CLDNNExecutionContextImpl::getParams() const { return ret; } -std::string CLDNNExecutionContextImpl::getDeviceName() const noexcept { +std::string ExecutionContextImpl::getDeviceName() const noexcept { auto devName = m_plugin.lock()->GetName(); auto engine_type = cldnn::engine_types::ocl; @@ -334,4 +336,6 @@ std::string CLDNNExecutionContextImpl::getDeviceName() const noexcept { return devName; } -}; // namespace CLDNNPlugin +} // namespace intel_gpu +} // namespace runtime +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/simple_math.cpp b/src/plugins/intel_gpu/src/plugin/simple_math.cpp index d8bf376a575..02a7d5bc185 100644 --- a/src/plugins/intel_gpu/src/plugin/simple_math.cpp +++ b/src/plugins/intel_gpu/src/plugin/simple_math.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "simple_math.h" +#include "intel_gpu/plugin/simple_math.hpp" #include #include #include diff --git a/src/plugins/intel_gpu/src/plugin/cldnn_transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp similarity index 97% rename from src/plugins/intel_gpu/src/plugin/cldnn_transformations_pipeline.cpp rename to src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index f32d4be4383..fed8401da43 100644 --- a/src/plugins/intel_gpu/src/plugin/cldnn_transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -12,7 +12,7 @@ #include #include -#include "cldnn_transformations_pipeline.h" +#include "intel_gpu/plugin/transformations_pipeline.hpp" #include "ie_metric_helpers.hpp" #include "ie_plugin_config.hpp" @@ -86,7 +86,7 @@ #include #include -#include "cldnn_itt.h" +#include "intel_gpu/plugin/itt.hpp" namespace { template @@ -99,10 +99,12 @@ static bool disableReduceDecomposition(const std::shared_ptr } } // namespace -namespace CLDNNPlugin { +namespace ov { +namespace runtime { +namespace intel_gpu { void TransformationsPipeline::apply(std::shared_ptr func) { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "TransformationsPipeline::apply"); + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "TransformationsPipeline::apply"); using const_node_ptr = const std::shared_ptr; bool use_onednn = false; @@ -336,7 +338,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { } if (enableInt8) { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "TransformationsPipeline::apply::lpt"); + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "TransformationsPipeline::apply::lpt"); using namespace ngraph::pass::low_precision; // Conversion to FP32 might be needed for quantized models that face any fp16 related issues (e.g. overflow) for non-quantized layers @@ -421,7 +423,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { } { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "TransformationsPipeline::apply::run_passes"); + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "TransformationsPipeline::apply::run_passes"); ngraph::pass::Manager manager; // This ConstantFolding pass is added to fold reshapes added for constant inputs on NMS internal operation which prevents upper-bound calculation // TODO: check why we have these reshapes @@ -442,4 +444,6 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.run_passes(func); } } -} // namespace CLDNNPlugin +} // namespace intel_gpu +} // namespace runtime +} // namespace ov