[GPU] Plugin refactoring (#9068)

* [GPU] Plugin files renaming

* [GPU] Updated plugin namespace to ov::runtime::intel_gpu

* [GPU] Renamed plugin classes to get rid of cldnn prefix
This commit is contained in:
Vladimir Paramuzov 2021-12-08 11:03:24 +03:00 committed by GitHub
parent ee4643d97e
commit e04ca1516d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
86 changed files with 1160 additions and 842 deletions

View File

@ -16,14 +16,14 @@ if(ENABLE_GPU_DEBUG_CAPS)
add_definitions(-DGPU_DEBUG_CONFIG=1)
endif()
file(GLOB_RECURSE PLUGIN_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/src/plugin/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/include/intel_gpu/plugin/*.h)
file(GLOB_RECURSE PLUGIN_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/src/plugin/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/include/intel_gpu/plugin/*.hpp)
addVersionDefines(src/plugin/cldnn_engine.cpp CI_BUILD_NUMBER CLDNN_VERSION)
addVersionDefines(src/plugin/plugin.cpp CI_BUILD_NUMBER)
ie_add_plugin(NAME ${TARGET_NAME}
DEVICE_NAME "GPU"
SOURCES ${PLUGIN_SOURCES}
VERSION_DEFINES_FOR src/plugin/cldnn_engine.cpp)
VERSION_DEFINES_FOR src/plugin/plugin.cpp)
target_compile_options(${TARGET_NAME} PRIVATE
$<$<CONFIG:Release>:$<IF:$<CXX_COMPILER_ID:MSVC>,/Os,-Os>>)
@ -35,7 +35,7 @@ target_link_libraries(${TARGET_NAME} PRIVATE ov_intel_gpu_graph
ngraph)
target_include_directories(${TARGET_NAME} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/include/intel_gpu/plugin/
${CMAKE_CURRENT_SOURCE_DIR}/include/
$<TARGET_PROPERTY:inference_engine_transformations,INTERFACE_INCLUDE_DIRECTORIES>)
set_target_properties(${TARGET_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO})

View File

@ -0,0 +1,36 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <string>
#include <map>
#include <cpp_interfaces/impl/ie_infer_async_request_thread_safe_default.hpp>
#include "intel_gpu/plugin/infer_request.hpp"
namespace ov {
namespace runtime {
namespace intel_gpu {
class AsyncInferRequest : public InferenceEngine::AsyncInferRequestThreadSafeDefault {
public:
using Parent = InferenceEngine::AsyncInferRequestThreadSafeDefault;
AsyncInferRequest(const InferRequest::Ptr &inferRequest,
const InferenceEngine::ITaskExecutor::Ptr& taskExecutor,
const InferenceEngine::ITaskExecutor::Ptr& waitExecutor,
const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor);
~AsyncInferRequest();
void Infer_ThreadUnsafe() override;
void StartAsync_ThreadUnsafe() override;
private:
InferRequest::Ptr _inferRequest;
InferenceEngine::ITaskExecutor::Ptr _waitExecutor;
};
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -1,32 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <string>
#include <map>
#include <cpp_interfaces/impl/ie_infer_async_request_thread_safe_default.hpp>
#include "cldnn_infer_request.h"
namespace CLDNNPlugin {
class CLDNNAsyncInferRequest : public InferenceEngine::AsyncInferRequestThreadSafeDefault {
public:
using Parent = InferenceEngine::AsyncInferRequestThreadSafeDefault;
CLDNNAsyncInferRequest(const CLDNNInferRequest::Ptr &inferRequest,
const InferenceEngine::ITaskExecutor::Ptr& taskExecutor,
const InferenceEngine::ITaskExecutor::Ptr& waitExecutor,
const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor);
~CLDNNAsyncInferRequest();
void Infer_ThreadUnsafe() override;
void StartAsync_ThreadUnsafe() override;
private:
CLDNNInferRequest::Ptr _inferRequest;
InferenceEngine::ITaskExecutor::Ptr _waitExecutor;
};
} // namespace CLDNNPlugin

View File

@ -1,20 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
/**
* @brief Defines openvino domains for tracing
* @file cldnn_itt.h
*/
#pragma once
#include <openvino/itt.hpp>
namespace CLDNNPlugin {
namespace itt {
namespace domains {
OV_ITT_DOMAIN(CLDNNPlugin);
}
}
}

View File

@ -9,11 +9,13 @@
#include "ngraph/type/element_type.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
#define TensorValue(val) static_cast<cldnn::tensor::value_type>(val)
const auto CldnnTensorFromIEDims = [](const InferenceEngine::SizeVector& dims, int def = 1) {
inline cldnn::tensor tensor_from_dims(const InferenceEngine::SizeVector& dims, int def = 1) {
switch (dims.size()) {
case 0: return cldnn::tensor(cldnn::batch(def), cldnn::feature(def), cldnn::spatial(def, def));
case 1: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(def), cldnn::spatial(def, def));
@ -22,9 +24,9 @@ const auto CldnnTensorFromIEDims = [](const InferenceEngine::SizeVector& dims, i
case 4: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(dims[3], dims[2]));
case 5: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(dims[4], dims[3], dims[2]));
case 6: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(dims[5], dims[4], dims[3], dims[2]));
default: IE_THROW() << "Invalid dimensions size(" << dims.size() << ") for clDNN tensor";
default: IE_THROW() << "Invalid dimensions size(" << dims.size() << ") for gpu tensor";
}
};
}
inline cldnn::data_types DataTypeFromPrecision(InferenceEngine::Precision p) {
switch (p) {
@ -185,4 +187,6 @@ inline std::vector<uint16_t> ConvertPermuteOrder(const std::vector<uint16_t>& ie
return cldnn_order;
}
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -13,17 +13,19 @@
#include "ie_blob.h"
#include "cpp/ie_cnn_network.h"
#include <cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp>
#include "cldnn_graph.h"
#include "cldnn_config.h"
#include "cldnn_remote_context.h"
#include "intel_gpu/plugin/graph.hpp"
#include "intel_gpu/plugin/device_config.hpp"
#include "intel_gpu/plugin/remote_context.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
class CLDNNExecNetwork : public InferenceEngine::ExecutableNetworkThreadSafeDefault {
class CompiledModel : public InferenceEngine::ExecutableNetworkThreadSafeDefault {
public:
typedef std::shared_ptr<CLDNNExecNetwork> Ptr;
typedef std::shared_ptr<CompiledModel> Ptr;
CLDNNExecNetwork(InferenceEngine::CNNNetwork &network, std::shared_ptr<InferenceEngine::RemoteContext> context, Config config);
CompiledModel(InferenceEngine::CNNNetwork &network, std::shared_ptr<InferenceEngine::RemoteContext> context, Config config);
std::shared_ptr<ngraph::Function> GetExecGraphInfo() override;
InferenceEngine::IInferRequestInternal::Ptr CreateInferRequest() override;
@ -36,11 +38,13 @@ public:
InferenceEngine::Parameter GetConfig(const std::string &name) const override;
std::shared_ptr<InferenceEngine::RemoteContext> GetContext() const override;
std::vector<std::shared_ptr<CLDNNGraph>> m_graphs;
std::vector<std::shared_ptr<Graph>> m_graphs;
InferenceEngine::gpu::ClContext::Ptr m_context;
Config m_config;
InferenceEngine::ITaskExecutor::Ptr m_taskExecutor;
InferenceEngine::ITaskExecutor::Ptr m_waitExecutor;
};
}; // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -12,15 +12,17 @@
#include "pugixml.hpp"
#include "intel_gpu/runtime/tensor.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
using CLDNNCustomLayerPtr = std::shared_ptr<class CLDNNCustomLayer>;
using CLDNNCustomLayerMap = std::map<std::string, CLDNNCustomLayerPtr>;
class CLDNNCustomLayer{
using CustomLayerPtr = std::shared_ptr<class CustomLayer>;
using CustomLayerMap = std::map<std::string, CustomLayerPtr>;
class CustomLayer{
public:
static void LoadFromFile(
const std::string configFile,
CLDNNCustomLayerMap& customLayers,
CustomLayerMap& customLayers,
bool can_be_missed = false);
typedef enum {
@ -57,8 +59,8 @@ public:
int InputDimSourceIndex() { return m_wgDimInputIdx; }
protected:
CLDNNCustomLayer() : m_wgDimInputIdx(0) {}
explicit CLDNNCustomLayer(const std::string dirname) : m_configDir(dirname), m_wgDimInputIdx(0) {}
CustomLayer() : m_wgDimInputIdx(0) {}
explicit CustomLayer(const std::string dirname) : m_configDir(dirname), m_wgDimInputIdx(0) {}
bool Error() const { return m_ErrorMessage.length() > 0; }
void LoadSingleLayer(const pugi::xml_node& node);
@ -82,4 +84,6 @@ protected:
std::string m_ErrorMessage;
};
}; // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -7,12 +7,14 @@
#include <map>
#include <string>
#include "cldnn_custom_layer.h"
#include "intel_gpu/plugin/custom_layer.hpp"
#include <ie_performance_hints.hpp>
#include "intel_gpu/graph/network.hpp"
#include <threading/ie_cpu_streams_executor.hpp>
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
struct Config {
Config(std::string device_id = "0") : device_id(device_id),
@ -61,7 +63,7 @@ struct Config {
cldnn::priority_mode_types queuePriority;
cldnn::throttle_mode_types queueThrottle;
int max_dynamic_batch;
CLDNNCustomLayerMap customLayers;
CustomLayerMap customLayers;
cldnn::tuning_config_options tuningConfig;
std::string graph_dumps_dir;
std::string sources_dumps_dir;
@ -93,4 +95,6 @@ private:
std::map<std::string, Config> configs;
};
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -21,24 +21,26 @@
#include "intel_gpu/graph/topology.hpp"
#include <cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp>
#include "cldnn_custom_layer.h"
#include "cldnn_config.h"
#include "cldnn_remote_context.h"
#include "cldnn_program.h"
#include "intel_gpu/plugin/custom_layer.hpp"
#include "intel_gpu/plugin/device_config.hpp"
#include "intel_gpu/plugin/remote_context.hpp"
#include "intel_gpu/plugin/program.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
class CLDNNGraph {
class Graph {
public:
enum class Stage : uint32_t {
PREPROC = 1,
EXECUTE = 2,
POSTPROC = 4
};
typedef std::shared_ptr<CLDNNGraph> Ptr;
typedef std::shared_ptr<Graph> Ptr;
CLDNNGraph(InferenceEngine::CNNNetwork& network, InferenceEngine::gpu::ClContext::Ptr context, Config config, uint16_t stream_id = 0);
explicit CLDNNGraph(std::shared_ptr<CLDNNGraph> graph, uint16_t stream_id = 0);
Graph(InferenceEngine::CNNNetwork& network, InferenceEngine::gpu::ClContext::Ptr context, Config config, uint16_t stream_id = 0);
explicit Graph(std::shared_ptr<Graph> graph, uint16_t stream_id = 0);
std::shared_ptr<ngraph::Function> GetExecGraphInfo();
bool IsLoaded() const;
@ -102,4 +104,6 @@ protected:
bool filter_const_primitives = true);
};
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -9,21 +9,23 @@
#include <vector>
#include <memory>
#include <atomic>
#include "cldnn_graph.h"
#include "intel_gpu/plugin/graph.hpp"
#include <threading/ie_istreams_executor.hpp>
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
struct buf_info {
size_t buf_offset;
size_t buf_size;
};
class CLDNNExecNetwork;
class CompiledModel;
class CLDNNInferRequest : public InferenceEngine::IInferRequestInternal {
class InferRequest : public InferenceEngine::IInferRequestInternal {
public:
using Ptr = std::shared_ptr<CLDNNInferRequest>;
using Ptr = std::shared_ptr<InferRequest>;
// make sure all blobs and cldnn::memory objects
// are in place and valid
void checkBlobs() override;
@ -31,21 +33,21 @@ public:
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> GetPerformanceCounts() const override;
CLDNNInferRequest(InferenceEngine::InputsDataMap networkInputs, InferenceEngine::OutputsDataMap networkOutputs,
const std::shared_ptr<CLDNNExecNetwork>& execNetwork);
CLDNNInferRequest(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
const std::vector<std::shared_ptr<const ov::Node>>& outputs,
const std::shared_ptr<CLDNNExecNetwork>& execNetwork);
InferRequest(InferenceEngine::InputsDataMap networkInputs, InferenceEngine::OutputsDataMap networkOutputs,
const std::shared_ptr<CompiledModel>& execNetwork);
InferRequest(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
const std::vector<std::shared_ptr<const ov::Node>>& outputs,
const std::shared_ptr<CompiledModel>& execNetwork);
CLDNNInferRequest(const CLDNNInferRequest &) = delete;
InferRequest(const InferRequest &) = delete;
virtual ~CLDNNInferRequest() = default;
virtual ~InferRequest() = default;
InferenceEngine::Blob::Ptr GetBlob(const std::string& name) override;
void SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr &data) override;
void SetBatch(int batch = -1) override;
void SetGraph(std::shared_ptr<CLDNNGraph> graph);
void SetGraph(std::shared_ptr<Graph> graph);
void EnableProfiling() { m_useProfiling = true; }
void EnableStreams() { m_useStreams = true; }
@ -73,7 +75,7 @@ private:
bool m_useProfiling = false;
bool m_useStreams = false;
bool m_useExternalQueue = false;
std::shared_ptr<CLDNNGraph> m_graph;
std::shared_ptr<Graph> m_graph;
// dynamic batch stuff
std::map<std::string, std::vector<buf_info>> batchInputs;
@ -102,4 +104,6 @@ private:
std::vector<std::map<cldnn::primitive_id, cldnn::network_output>> internal_outputs_dynamic;
};
}; // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -0,0 +1,24 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
/**
* @brief Defines openvino domains for tracing
* @file itt.hpp
*/
#pragma once
#include <openvino/itt.hpp>
namespace ov {
namespace runtime {
namespace intel_gpu {
namespace itt {
namespace domains {
OV_ITT_DOMAIN(intel_gpu_plugin);
} // namespace domains
} // namespace itt
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -10,14 +10,16 @@
#include "intel_gpu/runtime/engine.hpp"
#include <cpp_interfaces/interface/ie_iplugin_internal.hpp>
#include <cpp_interfaces/interface/ie_iexecutable_network_internal.hpp>
#include "cldnn_remote_context.h"
#include "intel_gpu/plugin/remote_context.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
using CLDNNCustomLayerPtr = std::shared_ptr<class CLDNNCustomLayer>;
using CustomLayerPtr = std::shared_ptr<class CustomLayer>;
class clDNNEngine : public InferenceEngine::IInferencePlugin,
public InferenceEngine::gpu::details::param_map_obj_getter {
class Plugin : public InferenceEngine::IInferencePlugin,
public InferenceEngine::gpu::details::param_map_obj_getter {
struct impl;
std::shared_ptr<impl> _impl;
bool streamsSet = false;
@ -26,23 +28,23 @@ class clDNNEngine : public InferenceEngine::IInferencePlugin,
// key: device_id, value: cldnn device
std::map<std::string, cldnn::device::ptr> device_map;
// key: cldnn context, value: memory statistics
mutable std::map<CLDNNRemoteCLContext::Ptr, std::map<std::string, uint64_t>> statistics_map;
mutable std::map<RemoteCLContext::Ptr, std::map<std::string, uint64_t>> statistics_map;
mutable std::mutex engine_mutex;
mutable CLDNNRemoteCLContext::Ptr m_defaultContext;
mutable RemoteCLContext::Ptr m_defaultContext;
cldnn::device_info GetDeviceInfo(const std::map<std::string, std::string> &config) const;
InferenceEngine::CNNNetwork CloneAndTransformNetwork(const InferenceEngine::CNNNetwork& network,
const CLDNNPlugin::Config& config) const;
const Config& config) const;
std::map<std::string, std::string> ConvertPerfHintsToConfig(const std::map<std::string, std::string>& network_config,
const CLDNNPlugin::Config& plugin_config) const;
const Config& plugin_config) const;
void RegisterPrimitives();
void UpdateConfig(Config& conf, const InferenceEngine::CNNNetwork &network, const std::map<std::string, std::string> &params) const;
void UpdateStatistics(const CLDNNRemoteCLContext::Ptr& context) const;
void UpdateStatistics(const RemoteCLContext::Ptr& context) const;
public:
clDNNEngine();
Plugin();
InferenceEngine::IExecutableNetworkInternal::Ptr LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network,
const std::map<std::string, std::string> &config) override;
@ -61,7 +63,7 @@ public:
std::shared_ptr<InferenceEngine::RemoteContext> CreateContext(const InferenceEngine::ParamMap& params) override;
std::shared_ptr<InferenceEngine::RemoteContext> GetDefaultContext(const InferenceEngine::ParamMap& params) override;
struct clDNNEngineParams {
struct PluginParams {
cldnn::queue_types queue_type;
cldnn::engine_types engine_type;
cldnn::runtime_types runtime_type;
@ -69,9 +71,9 @@ public:
InferenceEngine::ITaskExecutor::Ptr task_executor;
};
static clDNNEngineParams GetEngineParams(const Config& config, const cldnn::device::ptr& dev,
InferenceEngine::gpu_handle_param external_queue = nullptr) {
clDNNEngineParams params;
static PluginParams GetParams(const Config& config, const cldnn::device::ptr& dev,
InferenceEngine::gpu_handle_param external_queue = nullptr) {
PluginParams params;
params.engine_type = cldnn::engine_types::ocl;
params.runtime_type = cldnn::runtime_types::ocl;
if (external_queue) {
@ -87,4 +89,6 @@ public:
}
};
}; // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -61,6 +61,7 @@ REGISTER_FACTORY(v0, Squeeze);
REGISTER_FACTORY(v0, ShuffleChannels);
REGISTER_FACTORY(v0, Tan);
REGISTER_FACTORY(v0, Tanh);
REGISTER_FACTORY(v0, TensorIterator);
REGISTER_FACTORY(v0, Tile);
REGISTER_FACTORY(v0, Unsqueeze);
@ -88,7 +89,6 @@ REGISTER_FACTORY(v0, Unsqueeze);
// REGISTER_FACTORY(v0, Range);
// REGISTER_FACTORY(v0, RNNCell);
// REGISTER_FACTORY(v0, ShapeOf);
REGISTER_FACTORY(v0, TensorIterator);
// ------------------------------ Supported v1 ops ------------------------------ //
REGISTER_FACTORY(v1, Add);
@ -156,6 +156,7 @@ REGISTER_FACTORY(v3, EmbeddingBagOffsetsSum);
REGISTER_FACTORY(v3, EmbeddingBagPackedSum);
REGISTER_FACTORY(v3, EmbeddingSegmentsSum);
REGISTER_FACTORY(v3, ExtractImagePatches);
REGISTER_FACTORY(v3, ROIAlign);
REGISTER_FACTORY(v3, ScatterUpdate);
REGISTER_FACTORY(v3, ScatterElementsUpdate);
REGISTER_FACTORY(v3, ScatterNDUpdate);
@ -166,7 +167,6 @@ REGISTER_FACTORY(v3, ScatterNDUpdate);
// REGISTER_FACTORY(v3, Bucketize);
// REGISTER_FACTORY(v3, GRUCell);
// REGISTER_FACTORY(v3, NonZero);
REGISTER_FACTORY(v3, ROIAlign);
// REGISTER_FACTORY(v3, ReadValue);
// REGISTER_FACTORY(v3, ShapeOf);
// REGISTER_FACTORY(v3, TopK);

View File

@ -15,7 +15,7 @@
#include <ngraph/ngraph.hpp>
#include <ngraph/compatibility.hpp>
#include "cldnn_config.h"
#include "intel_gpu/plugin/device_config.hpp"
#include "intel_gpu/runtime/engine.hpp"
#include "intel_gpu/graph/topology.hpp"
@ -40,7 +40,9 @@ void __register ## _ ## op_name ## _ ## op_version() {
}); \
}
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
std::string layer_type_lower(const ngraph::Node* op);
std::string layer_type_name_ID(const ngraph::Node* op);
@ -135,7 +137,7 @@ public:
template<typename PType>
void AddPrimitive(PType prim) {
if (m_topology == nullptr) {
IE_THROW() << "m_topology object was not created in clDNNPlugin::Program";
IE_THROW() << "m_topology object was not created in ov::runtime::intel_gpu::Program";
}
m_topology->add(prim);
@ -172,11 +174,13 @@ private:
void ChangeInputBatch(int batch);
};
void CreateCustomOp(Program& p, const std::shared_ptr<ngraph::Node>& node, CLDNNCustomLayerPtr customLayer);
void CreateCustomOp(Program& p, const std::shared_ptr<ngraph::Node>& node, CustomLayerPtr customLayer);
void CreateUnaryEltwiseOp(Program& p, const std::shared_ptr<ngraph::Node>& node,
cldnn::activation_func func, cldnn::activation_additional_params params);
void CreateElementwiseOp(Program& p, const std::shared_ptr<ngraph::Node>& node, cldnn::eltwise_mode mode);
bool IsNodeOnConstPath(const std::shared_ptr<ngraph::Node>& node);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -6,12 +6,13 @@
#include "intel_gpu/runtime/memory.hpp"
#include "intel_gpu/runtime/engine.hpp"
#include "intel_gpu/plugin/device_config.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include <ie_parameter.hpp>
#include <cpp_interfaces/interface/ie_iplugin_internal.hpp>
#include <blob_factory.hpp>
#include <ie_remote_context.hpp>
#include "cldnn_config.h"
#include "cldnn_common_utils.h"
#ifndef NOMINMAX
# define NOMINMAX
@ -28,11 +29,13 @@
#include <memory>
#include <atomic>
namespace CLDNNPlugin {
class CLDNNRemoteAllocator;
namespace ov {
namespace runtime {
namespace intel_gpu {
class RemoteAllocator;
class CLDNNRemoteBlobImpl : public InferenceEngine::gpu::details::param_map_obj_getter {
friend class CLDNNRemoteAllocator;
class RemoteBlobImpl : public InferenceEngine::gpu::details::param_map_obj_getter {
friend class RemoteAllocator;
public:
enum BlobType {
BT_EMPTY,
@ -46,13 +49,13 @@ public:
BT_DX_BUF_SHARED,
};
explicit CLDNNRemoteBlobImpl(InferenceEngine::gpu::ClContext::Ptr context,
cldnn::stream& stream,
const cldnn::layout& layout,
cldnn::shared_handle mem = nullptr,
cldnn::shared_surface surf = 0,
uint32_t plane = 0,
BlobType mem_type = BT_BUF_INTERNAL);
explicit RemoteBlobImpl(InferenceEngine::gpu::ClContext::Ptr context,
cldnn::stream& stream,
const cldnn::layout& layout,
cldnn::shared_handle mem = nullptr,
cldnn::shared_surface surf = 0,
uint32_t plane = 0,
BlobType mem_type = BT_BUF_INTERNAL);
void allocate();
bool deallocate() noexcept;
@ -72,7 +75,7 @@ public:
cldnn::memory::ptr getMemory() { return m_memObject; }
protected:
static CLDNNRemoteAllocator m_allocator;
static RemoteAllocator m_allocator;
std::weak_ptr<InferenceEngine::gpu::ClContext> m_context;
cldnn::stream& m_stream;
@ -95,18 +98,18 @@ protected:
};
template<typename TpublicAPI>
class typedCLDNNRemoteBlob : public TpublicAPI {
class TypedRemoteBlob : public TpublicAPI {
public:
using Ptr = std::shared_ptr<typedCLDNNRemoteBlob>;
using Ptr = std::shared_ptr<TypedRemoteBlob>;
explicit typedCLDNNRemoteBlob(InferenceEngine::gpu::ClContext::Ptr context,
cldnn::stream& stream,
const InferenceEngine::TensorDesc& desc,
const cldnn::layout& layout,
cldnn::shared_handle mem = nullptr,
cldnn::shared_surface surf = 0,
uint32_t plane = 0,
CLDNNRemoteBlobImpl::BlobType mem_type = CLDNNRemoteBlobImpl::BlobType::BT_BUF_INTERNAL)
explicit TypedRemoteBlob(InferenceEngine::gpu::ClContext::Ptr context,
cldnn::stream& stream,
const InferenceEngine::TensorDesc& desc,
const cldnn::layout& layout,
cldnn::shared_handle mem = nullptr,
cldnn::shared_surface surf = 0,
uint32_t plane = 0,
RemoteBlobImpl::BlobType mem_type = RemoteBlobImpl::BlobType::BT_BUF_INTERNAL)
: _impl(context, stream, layout, mem, surf, plane, mem_type)
, TpublicAPI(desc) {}
@ -124,62 +127,62 @@ public:
InferenceEngine::LockedMemory<void> rwmap() noexcept override { return _impl.rwmap(); }
InferenceEngine::LockedMemory<const void> rmap() const noexcept override { return _impl.rmap(); }
InferenceEngine::LockedMemory<void> wmap()noexcept override { return _impl.wmap(); }
CLDNNRemoteBlobImpl* getImpl() { return &_impl; }
RemoteBlobImpl* getImpl() { return &_impl; }
protected:
const std::shared_ptr<InferenceEngine::IAllocator> &getAllocator() const noexcept override { return _impl.getAllocator(); }
void *getHandle() const noexcept override { return _impl.getHandle(); }
CLDNNRemoteBlobImpl _impl;
RemoteBlobImpl _impl;
};
using CLDNNRemoteCLbuffer = typedCLDNNRemoteBlob<InferenceEngine::gpu::ClBufferBlob>;
using CLDNNRemoteUSMbuffer = typedCLDNNRemoteBlob<InferenceEngine::gpu::USMBlob>;
using CLDNNRemoteCLImage2D = typedCLDNNRemoteBlob<InferenceEngine::gpu::ClImage2DBlob>;
using RemoteCLbuffer = TypedRemoteBlob<InferenceEngine::gpu::ClBufferBlob>;
using RemoteUSMbuffer = TypedRemoteBlob<InferenceEngine::gpu::USMBlob>;
using RemoteCLImage2D = TypedRemoteBlob<InferenceEngine::gpu::ClImage2DBlob>;
#ifdef _WIN32
using CLDNNRemoteD3DBuffer = typedCLDNNRemoteBlob<InferenceEngine::gpu::D3DBufferBlob>;
using CLDNNRemoteD3DSurface = typedCLDNNRemoteBlob<InferenceEngine::gpu::D3DSurface2DBlob>;
using RemoteD3DBuffer = TypedRemoteBlob<InferenceEngine::gpu::D3DBufferBlob>;
using RemoteD3DSurface = TypedRemoteBlob<InferenceEngine::gpu::D3DSurface2DBlob>;
#else
using CLDNNRemoteVASurface = typedCLDNNRemoteBlob<InferenceEngine::gpu::VASurfaceBlob>;
using RemoteVASurface = TypedRemoteBlob<InferenceEngine::gpu::VASurfaceBlob>;
#endif
inline CLDNNRemoteBlobImpl* getBlobImpl(InferenceEngine::gpu::ClBlob* blobPtr) {
inline RemoteBlobImpl* getBlobImpl(InferenceEngine::gpu::ClBlob* blobPtr) {
#ifdef _WIN32
{
auto ptr = blobPtr->as<CLDNNRemoteD3DSurface>();
auto ptr = blobPtr->as<RemoteD3DSurface>();
if (ptr) return ptr->getImpl();
}
{
auto ptr = blobPtr->as<CLDNNRemoteD3DBuffer>();
auto ptr = blobPtr->as<RemoteD3DBuffer>();
if (ptr) return ptr->getImpl();
}
#else
{
auto ptr = blobPtr->as<CLDNNRemoteVASurface>();
auto ptr = blobPtr->as<RemoteVASurface>();
if (ptr) return ptr->getImpl();
}
#endif
{
auto ptr = blobPtr->as<CLDNNRemoteCLbuffer>();
auto ptr = blobPtr->as<RemoteCLbuffer>();
if (ptr) return ptr->getImpl();
}
{
auto ptr = blobPtr->as<CLDNNRemoteCLImage2D>();
auto ptr = blobPtr->as<RemoteCLImage2D>();
if (ptr) return ptr->getImpl();
}
{
auto ptr = blobPtr->as<CLDNNRemoteUSMbuffer>();
auto ptr = blobPtr->as<RemoteUSMbuffer>();
if (ptr) return ptr->getImpl();
}
return nullptr;
}
class CLDNNRemoteAllocator : public InferenceEngine::IAllocator {
class RemoteAllocator : public InferenceEngine::IAllocator {
protected:
friend class CLDNNRemoteBlobImpl;
friend class RemoteBlobImpl;
std::atomic_flag _lock;
std::map<void*, const CLDNNRemoteBlobImpl*> m_lockedBlobs;
std::map<void*, const RemoteBlobImpl*> m_lockedBlobs;
void regLockedBlob(void* handle, const CLDNNRemoteBlobImpl* blob);
void regLockedBlob(void* handle, const RemoteBlobImpl* blob);
void acquire_lock() {
while (_lock.test_and_set(std::memory_order_acquire)) {}
@ -190,9 +193,9 @@ protected:
}
public:
using Ptr = std::shared_ptr<CLDNNRemoteAllocator>;
using Ptr = std::shared_ptr<RemoteAllocator>;
CLDNNRemoteAllocator() { _lock.clear(std::memory_order_relaxed); }
RemoteAllocator() { _lock.clear(std::memory_order_relaxed); }
/**
* @brief Maps handle to heap memory accessible by any memory manipulation routines.
* @return Generic pointer to memory
@ -269,19 +272,19 @@ public:
};
class CLDNNExecutionContextImpl : public InferenceEngine::gpu::details::param_map_obj_getter {
class ExecutionContextImpl : public InferenceEngine::gpu::details::param_map_obj_getter {
public:
enum ContextType {
OCL,
DEV_SHARED
};
using Ptr = std::shared_ptr<CLDNNExecutionContextImpl>;
using CPtr = std::shared_ptr<const CLDNNExecutionContextImpl>;
using Ptr = std::shared_ptr<ExecutionContextImpl>;
using CPtr = std::shared_ptr<const ExecutionContextImpl>;
explicit CLDNNExecutionContextImpl(std::shared_ptr<InferenceEngine::IInferencePlugin> plugin,
const InferenceEngine::ParamMap& params,
const Config& config = {});
explicit ExecutionContextImpl(std::shared_ptr<InferenceEngine::IInferencePlugin> plugin,
const InferenceEngine::ParamMap& params,
const Config& config = {});
InferenceEngine::ParamMap getParams() const;
std::string getDeviceName() const noexcept;
@ -313,7 +316,7 @@ protected:
};
template<typename TpublicContextAPI>
class typedCLDNNExecutionContext : public TpublicContextAPI {
class TypedExecutionContext : public TpublicContextAPI {
template<typename T1, typename T2>
struct _Key {
T1 _surf;
@ -357,17 +360,17 @@ class typedCLDNNExecutionContext : public TpublicContextAPI {
// unlickily, not found - create new and insert into registry
cldnn::layout layout(DataTypeFromPrecision(tensorDesc.getPrecision()),
ImageFormatFromLayout(tensorDesc.getLayout()),
CldnnTensorFromIEDims(tensorDesc.getDims()));
tensor_from_dims(tensorDesc.getDims()));
auto smart_this =
std::dynamic_pointer_cast<InferenceEngine::gpu::ClContext>(this->shared_from_this());
#ifdef _WIN32
ret = std::make_shared<CLDNNRemoteD3DSurface>(smart_this, stream,
ret = std::make_shared<RemoteD3DSurface>(smart_this, stream,
tensorDesc, layout, mem, 0, plane,
CLDNNRemoteBlobImpl::BlobType::BT_SURF_SHARED);
RemoteBlobImpl::BlobType::BT_SURF_SHARED);
#else
ret = std::make_shared<CLDNNRemoteVASurface>(smart_this, stream,
ret = std::make_shared<RemoteVASurface>(smart_this, stream,
tensorDesc, layout, nullptr, surf, plane,
CLDNNRemoteBlobImpl::BlobType::BT_SURF_SHARED);
RemoteBlobImpl::BlobType::BT_SURF_SHARED);
#endif
shared_surf_reg[skey] = ret;
}
@ -378,7 +381,7 @@ class typedCLDNNExecutionContext : public TpublicContextAPI {
InferenceEngine::RemoteBlob::Ptr reuse_obj(const InferenceEngine::TensorDesc& tensorDesc,
cldnn::shared_handle mem,
CLDNNRemoteBlobImpl::BlobType blob_type) {
RemoteBlobImpl::BlobType blob_type) {
InferenceEngine::RemoteBlob::Ptr ret = nullptr;
_impl.acquire_lock();
@ -392,24 +395,24 @@ class typedCLDNNExecutionContext : public TpublicContextAPI {
// unlickily, not found - create new and insert into registry
cldnn::layout layout(DataTypeFromPrecision(tensorDesc.getPrecision()),
FormatFromLayout(tensorDesc.getLayout()),
CldnnTensorFromIEDims(tensorDesc.getDims()));
tensor_from_dims(tensorDesc.getDims()));
auto smart_this =
std::dynamic_pointer_cast<InferenceEngine::gpu::ClContext>(this->shared_from_this());
switch (blob_type) {
case CLDNNRemoteBlobImpl::BlobType::BT_BUF_SHARED:
ret = std::make_shared<CLDNNRemoteCLbuffer>(smart_this, stream, tensorDesc, layout, mem, 0, 0, blob_type);
case RemoteBlobImpl::BlobType::BT_BUF_SHARED:
ret = std::make_shared<RemoteCLbuffer>(smart_this, stream, tensorDesc, layout, mem, 0, 0, blob_type);
break;
case CLDNNRemoteBlobImpl::BlobType::BT_USM_SHARED:
ret = std::make_shared<CLDNNRemoteUSMbuffer>(smart_this, stream, tensorDesc, layout, mem, 0, 0, blob_type);
case RemoteBlobImpl::BlobType::BT_USM_SHARED:
ret = std::make_shared<RemoteUSMbuffer>(smart_this, stream, tensorDesc, layout, mem, 0, 0, blob_type);
break;
case CLDNNRemoteBlobImpl::BlobType::BT_IMG_SHARED:
case RemoteBlobImpl::BlobType::BT_IMG_SHARED:
layout.format = ImageFormatFromLayout(tensorDesc.getLayout());
ret = std::make_shared<CLDNNRemoteCLImage2D>(smart_this, stream, tensorDesc, layout, mem, 0, 0, blob_type);
ret = std::make_shared<RemoteCLImage2D>(smart_this, stream, tensorDesc, layout, mem, 0, 0, blob_type);
break;
#ifdef _WIN32
case CLDNNRemoteBlobImpl::BlobType::BT_DX_BUF_SHARED:
ret = std::make_shared<CLDNNRemoteD3DBuffer>(smart_this, stream, tensorDesc, layout, mem, 0, 0, blob_type);
case RemoteBlobImpl::BlobType::BT_DX_BUF_SHARED:
ret = std::make_shared<RemoteD3DBuffer>(smart_this, stream, tensorDesc, layout, mem, 0, 0, blob_type);
break;
#endif
default:
@ -425,44 +428,44 @@ class typedCLDNNExecutionContext : public TpublicContextAPI {
InferenceEngine::RemoteBlob::Ptr create_buffer(const InferenceEngine::TensorDesc& tensorDesc) {
cldnn::layout layout(DataTypeFromPrecision(tensorDesc.getPrecision()),
FormatFromLayout(tensorDesc.getLayout()),
CldnnTensorFromIEDims(tensorDesc.getDims()));
tensor_from_dims(tensorDesc.getDims()));
auto smart_this = std::dynamic_pointer_cast<InferenceEngine::gpu::ClContext>(this->shared_from_this());
auto& stream = _impl.GetEngine()->get_program_stream();
return std::make_shared<CLDNNRemoteCLbuffer>(smart_this,
stream,
tensorDesc,
layout,
nullptr, 0, 0,
CLDNNRemoteBlobImpl::BlobType::BT_BUF_INTERNAL);
return std::make_shared<RemoteCLbuffer>(smart_this,
stream,
tensorDesc,
layout,
nullptr, 0, 0,
RemoteBlobImpl::BlobType::BT_BUF_INTERNAL);
}
InferenceEngine::RemoteBlob::Ptr create_usm(const InferenceEngine::TensorDesc& tensorDesc, CLDNNRemoteBlobImpl::BlobType alloc_type) {
InferenceEngine::RemoteBlob::Ptr create_usm(const InferenceEngine::TensorDesc& tensorDesc, RemoteBlobImpl::BlobType alloc_type) {
cldnn::layout layout(DataTypeFromPrecision(tensorDesc.getPrecision()),
FormatFromLayout(tensorDesc.getLayout()),
CldnnTensorFromIEDims(tensorDesc.getDims()));
tensor_from_dims(tensorDesc.getDims()));
auto smart_this = std::dynamic_pointer_cast<InferenceEngine::gpu::ClContext>(this->shared_from_this());
auto& stream = _impl.GetEngine()->get_program_stream();
return std::make_shared<CLDNNRemoteUSMbuffer>(smart_this,
stream,
tensorDesc,
layout,
nullptr, 0, 0,
alloc_type);
return std::make_shared<RemoteUSMbuffer>(smart_this,
stream,
tensorDesc,
layout,
nullptr, 0, 0,
alloc_type);
}
void check_if_shared() {
if (GetType() != CLDNNExecutionContextImpl::ContextType::DEV_SHARED)
if (GetType() != ExecutionContextImpl::ContextType::DEV_SHARED)
IE_THROW() << "Shared context is required to to share this type of memory";
}
public:
using Ptr = std::shared_ptr<typedCLDNNExecutionContext>;
using CPtr = std::shared_ptr<const typedCLDNNExecutionContext>;
using Ptr = std::shared_ptr<TypedExecutionContext>;
using CPtr = std::shared_ptr<const TypedExecutionContext>;
explicit typedCLDNNExecutionContext(std::shared_ptr<InferenceEngine::IInferencePlugin> plugin,
const InferenceEngine::ParamMap& params,
const Config& config = {})
explicit TypedExecutionContext(std::shared_ptr<InferenceEngine::IInferencePlugin> plugin,
const InferenceEngine::ParamMap& params,
const Config& config = {})
: _impl(plugin, params, config) {}
InferenceEngine::ParamMap getParams() const override { return _impl.getParams(); }
@ -479,7 +482,7 @@ public:
using namespace InferenceEngine;
using InferenceEngine::gpu::details::param_map_obj_getter;
if (params.empty()) {
// user wants clDNN to allocate blob by itself and return handle
// user wants plugin to allocate blob by itself and return handle
return create_buffer(tensorDesc);
} else {
// user will supply shared object handle
@ -497,25 +500,25 @@ public:
check_if_shared();
return reuse_surf(tensorDesc, params);
} else if (GPU_PARAM_VALUE(USM_HOST_BUFFER) == memTypeStr) {
return create_usm(tensorDesc, CLDNNRemoteBlobImpl::BlobType::BT_USM_HOST_INTERNAL);
return create_usm(tensorDesc, RemoteBlobImpl::BlobType::BT_USM_HOST_INTERNAL);
} else if (GPU_PARAM_VALUE(USM_DEVICE_BUFFER) == memTypeStr) {
return create_usm(tensorDesc, CLDNNRemoteBlobImpl::BlobType::BT_USM_DEVICE_INTERNAL);
return create_usm(tensorDesc, RemoteBlobImpl::BlobType::BT_USM_DEVICE_INTERNAL);
} else {
CLDNNRemoteBlobImpl::BlobType blob_type;
RemoteBlobImpl::BlobType blob_type;
cldnn::shared_handle mem = nullptr;
if (GPU_PARAM_VALUE(OCL_BUFFER) == memTypeStr) {
blob_type = CLDNNRemoteBlobImpl::BlobType::BT_BUF_SHARED;
blob_type = RemoteBlobImpl::BlobType::BT_BUF_SHARED;
mem = param_map_obj_getter::_ObjFromParamSimple<cldnn::shared_handle>(params, GPU_PARAM_KEY(MEM_HANDLE));
} else if (GPU_PARAM_VALUE(USM_USER_BUFFER) == memTypeStr) {
blob_type = CLDNNRemoteBlobImpl::BlobType::BT_USM_SHARED;
blob_type = RemoteBlobImpl::BlobType::BT_USM_SHARED;
mem = param_map_obj_getter::_ObjFromParamSimple<cldnn::shared_handle>(params, GPU_PARAM_KEY(MEM_HANDLE));
} else if (GPU_PARAM_VALUE(OCL_IMAGE2D) == memTypeStr) {
blob_type = CLDNNRemoteBlobImpl::BlobType::BT_IMG_SHARED;
blob_type = RemoteBlobImpl::BlobType::BT_IMG_SHARED;
mem = param_map_obj_getter::_ObjFromParamSimple<cldnn::shared_handle>(params, GPU_PARAM_KEY(MEM_HANDLE));
#ifdef _WIN32
} else if (GPU_PARAM_VALUE(DX_BUFFER) == memTypeStr) {
blob_type = CLDNNRemoteBlobImpl::BlobType::BT_DX_BUF_SHARED;
blob_type = RemoteBlobImpl::BlobType::BT_DX_BUF_SHARED;
mem = param_map_obj_getter::_ObjFromParamSimple<cldnn::shared_handle>(params, GPU_PARAM_KEY(DEV_OBJECT_HANDLE));
check_if_shared();
#endif
@ -529,38 +532,40 @@ public:
}
Config& GetConfig() { return _impl.GetConfig(); }
CLDNNExecutionContextImpl::ContextType GetType() const { return _impl.GetType(); }
ExecutionContextImpl::ContextType GetType() const { return _impl.GetType(); }
CLDNNExecutionContextImpl* getImpl() { return &_impl; }
ExecutionContextImpl* getImpl() { return &_impl; }
protected:
CLDNNExecutionContextImpl _impl;
ExecutionContextImpl _impl;
};
using CLDNNRemoteCLContext = typedCLDNNExecutionContext<InferenceEngine::gpu::ClContext>;
using RemoteCLContext = TypedExecutionContext<InferenceEngine::gpu::ClContext>;
#ifdef _WIN32
using CLDNNRemoteD3DContext = typedCLDNNExecutionContext<InferenceEngine::gpu::D3DContext>;
using RemoteD3DContext = TypedExecutionContext<InferenceEngine::gpu::D3DContext>;
#else
using CLDNNRemoteVAContext = typedCLDNNExecutionContext<InferenceEngine::gpu::VAContext>;
using RemoteVAContext = TypedExecutionContext<InferenceEngine::gpu::VAContext>;
#endif
inline CLDNNExecutionContextImpl* getContextImpl(InferenceEngine::gpu::ClContext::Ptr ctxPtr) {
inline ExecutionContextImpl* getContextImpl(InferenceEngine::gpu::ClContext::Ptr ctxPtr) {
#ifdef _WIN32
{
auto ptr = ctxPtr->as<CLDNNRemoteD3DContext>();
auto ptr = ctxPtr->as<RemoteD3DContext>();
if (ptr) return ptr->getImpl();
}
#else
{
auto ptr = ctxPtr->as<CLDNNRemoteVAContext>();
auto ptr = ctxPtr->as<RemoteVAContext>();
if (ptr) return ptr->getImpl();
}
#endif
{
auto ptr = ctxPtr->as<CLDNNRemoteCLContext>();
auto ptr = ctxPtr->as<RemoteCLContext>();
if (ptr) return ptr->getImpl();
}
return nullptr;
}
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -8,9 +8,11 @@
#include <ngraph/function.hpp>
#include "cldnn_config.h"
#include "intel_gpu/plugin/device_config.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
class TransformationsPipeline {
public:
@ -23,4 +25,6 @@ private:
cldnn::device_info device_info;
};
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,21 +2,25 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_async_infer_request.h"
#include "cldnn_itt.h"
#include "intel_gpu/plugin/async_infer_request.hpp"
#include "intel_gpu/plugin/itt.hpp"
#include <memory>
CLDNNPlugin::CLDNNAsyncInferRequest::CLDNNAsyncInferRequest(const CLDNNInferRequest::Ptr &inferRequest,
const InferenceEngine::ITaskExecutor::Ptr& taskExecutor,
const InferenceEngine::ITaskExecutor::Ptr& waitExecutor,
const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor)
namespace ov {
namespace runtime {
namespace intel_gpu {
AsyncInferRequest::AsyncInferRequest(const InferRequest::Ptr &inferRequest,
const InferenceEngine::ITaskExecutor::Ptr& taskExecutor,
const InferenceEngine::ITaskExecutor::Ptr& waitExecutor,
const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor)
: AsyncInferRequestThreadSafeDefault(inferRequest, taskExecutor, callbackExecutor), _inferRequest(inferRequest), _waitExecutor(waitExecutor) {
_pipeline = {};
if (!_inferRequest->use_external_queue()) {
_pipeline.push_back({taskExecutor,
[this] {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNAsyncInferRequest::PreprocessingAndStartPipeline");
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "AsyncInferRequest::PreprocessingAndStartPipeline");
_inferRequest->setup_stream_graph();
_inferRequest->preprocess();
_inferRequest->enqueue();
@ -25,13 +29,13 @@ CLDNNPlugin::CLDNNAsyncInferRequest::CLDNNAsyncInferRequest(const CLDNNInferRequ
} else {
_pipeline.push_back({ _waitExecutor,
[this] {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNAsyncInferRequest::WaitPipeline");
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "AsyncInferRequest::WaitPipeline");
_inferRequest->wait_notify();
} });
}
}
void CLDNNPlugin::CLDNNAsyncInferRequest::Infer_ThreadUnsafe() {
void AsyncInferRequest::Infer_ThreadUnsafe() {
if (_inferRequest->use_external_queue()) {
_inferRequest->setup_stream_graph();
_inferRequest->preprocess_notify();
@ -40,7 +44,7 @@ void CLDNNPlugin::CLDNNAsyncInferRequest::Infer_ThreadUnsafe() {
Parent::Infer_ThreadUnsafe();
}
void CLDNNPlugin::CLDNNAsyncInferRequest::StartAsync_ThreadUnsafe() {
void AsyncInferRequest::StartAsync_ThreadUnsafe() {
if (_inferRequest->use_external_queue()) {
_inferRequest->setup_stream_graph();
_inferRequest->preprocess_notify();
@ -49,6 +53,10 @@ void CLDNNPlugin::CLDNNAsyncInferRequest::StartAsync_ThreadUnsafe() {
Parent::StartAsync_ThreadUnsafe();
}
CLDNNPlugin::CLDNNAsyncInferRequest::~CLDNNAsyncInferRequest() {
AsyncInferRequest::~AsyncInferRequest() {
StopAndWait();
}
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,48 +2,44 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "ie_metric_helpers.hpp"
#include <chrono>
#include <cmath>
#include <algorithm>
#include "ie_metric_helpers.hpp"
#include <chrono>
#include <cmath>
#include <algorithm>
#include "cldnn_graph.h"
#include "cldnn_itt.h"
#include "intel_gpu/plugin/graph.hpp"
#include "intel_gpu/plugin/itt.hpp"
#include "intel_gpu/plugin/infer_request.hpp"
#include "intel_gpu/plugin/compiled_model.hpp"
#include "intel_gpu/plugin/async_infer_request.hpp"
#include <description_buffer.hpp>
#include "cldnn_infer_request.h"
#include <threading/ie_executor_manager.hpp>
#include "cldnn_async_infer_request.h"
#include <fstream>
#include <utility>
#include <sys/types.h>
#include "cldnn_executable_network.h"
#include "threading/ie_cpu_streams_executor.hpp"
#include "cpp_interfaces/interface/ie_iinfer_request_internal.hpp"
#include "ie_icore.hpp"
#include <fstream>
#include <utility>
#include <sys/types.h>
#include <chrono>
#include <cmath>
#include <algorithm>
using namespace InferenceEngine;
using namespace InferenceEngine::details;
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
CLDNNExecNetwork::CLDNNExecNetwork(InferenceEngine::CNNNetwork &network, std::shared_ptr<RemoteContext> context, Config config) :
InferenceEngine::ExecutableNetworkThreadSafeDefault{[&]()->InferenceEngine::ITaskExecutor::Ptr {
CompiledModel::CompiledModel(InferenceEngine::CNNNetwork &network, std::shared_ptr<InferenceEngine::RemoteContext> context, Config config) :
InferenceEngine::ExecutableNetworkThreadSafeDefault{[&]() -> InferenceEngine::ITaskExecutor::Ptr {
if (config.exclusiveAsyncRequests) {
//exclusiveAsyncRequests essentially disables the streams (and hence should be checked first) => aligned with the CPU behavior
return ExecutorManager::getInstance()->getExecutor("GPU");
} else if (config.throughput_streams > 1) {
return std::make_shared<InferenceEngine::CPUStreamsExecutor>(
IStreamsExecutor::Config{"CLDNNPlugin executor", config.throughput_streams});
IStreamsExecutor::Config{"Intel GPU plugin executor", config.throughput_streams});
} else {
return std::make_shared<InferenceEngine::CPUStreamsExecutor>(
IStreamsExecutor::Config{"CLDNNPlugin executor", 1});
IStreamsExecutor::Config{"Intel GPU plugin executor", 1});
}
}()},
m_config(config),
@ -57,18 +53,18 @@ CLDNNExecNetwork::CLDNNExecNetwork(InferenceEngine::CNNNetwork &network, std::sh
m_context = casted_context;
auto graph_base = std::make_shared<CLDNNGraph>(network, m_context, m_config, 0);
auto graph_base = std::make_shared<Graph>(network, m_context, m_config, 0);
for (uint16_t n = 0; n < m_config.throughput_streams; n++) {
auto graph = n == 0 ? graph_base : std::make_shared<CLDNNGraph>(graph_base, n);
auto graph = n == 0 ? graph_base : std::make_shared<Graph>(graph_base, n);
m_graphs.push_back(graph);
}
}
IInferRequestInternal::Ptr CLDNNExecNetwork::CreateInferRequestImpl(InputsDataMap networkInputs,
OutputsDataMap networkOutputs) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNExecNetwork::CreateInferRequestImpl");
auto ptr = std::make_shared<CLDNNInferRequest>(networkInputs, networkOutputs,
std::static_pointer_cast<CLDNNExecNetwork>(shared_from_this()));
IInferRequestInternal::Ptr CompiledModel::CreateInferRequestImpl(InputsDataMap networkInputs,
OutputsDataMap networkOutputs) {
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "CompiledModel::CreateInferRequestImpl");
auto ptr = std::make_shared<InferRequest>(networkInputs, networkOutputs,
std::static_pointer_cast<CompiledModel>(shared_from_this()));
if (m_config.throughput_streams > 1) {
ptr->EnableStreams();
}
@ -82,11 +78,11 @@ IInferRequestInternal::Ptr CLDNNExecNetwork::CreateInferRequestImpl(InputsDataMa
return ptr;
}
IInferRequestInternal::Ptr CLDNNExecNetwork::CreateInferRequestImpl(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
const std::vector<std::shared_ptr<const ov::Node>>& outputs) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNExecNetwork::CreateInferRequestImpl");
auto ptr = std::make_shared<CLDNNInferRequest>(inputs, outputs,
std::static_pointer_cast<CLDNNExecNetwork>(shared_from_this()));
IInferRequestInternal::Ptr CompiledModel::CreateInferRequestImpl(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
const std::vector<std::shared_ptr<const ov::Node>>& outputs) {
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "CompiledModel::CreateInferRequestImpl");
auto ptr = std::make_shared<InferRequest>(inputs, outputs,
std::static_pointer_cast<CompiledModel>(shared_from_this()));
if (m_config.throughput_streams > 1) {
ptr->EnableStreams();
}
@ -101,8 +97,8 @@ IInferRequestInternal::Ptr CLDNNExecNetwork::CreateInferRequestImpl(const std::v
return ptr;
}
IInferRequestInternal::Ptr CLDNNExecNetwork::CreateInferRequest() {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNExecNetwork::CreateInferRequest");
IInferRequestInternal::Ptr CompiledModel::CreateInferRequest() {
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "CompiledModel::CreateInferRequest");
InferenceEngine::IInferRequestInternal::Ptr internalRequest;
if (m_graphs.empty()) {
IE_THROW(NetworkNotLoaded);
@ -123,20 +119,20 @@ IInferRequestInternal::Ptr CLDNNExecNetwork::CreateInferRequest() {
if (!internalRequest)
internalRequest = CreateInferRequestImpl(_networkInputs, _networkOutputs);
internalRequest->setPointerToExecutableNetworkInternal(shared_from_this());
return std::make_shared<CLDNNAsyncInferRequest>(std::static_pointer_cast<CLDNNInferRequest>(internalRequest),
m_taskExecutor,
m_waitExecutor,
_callbackExecutor);
return std::make_shared<AsyncInferRequest>(std::static_pointer_cast<InferRequest>(internalRequest),
m_taskExecutor,
m_waitExecutor,
_callbackExecutor);
}
std::shared_ptr<ngraph::Function> CLDNNExecNetwork::GetExecGraphInfo() {
std::shared_ptr<ngraph::Function> CompiledModel::GetExecGraphInfo() {
if (m_graphs.empty())
IE_THROW(NetworkNotLoaded);
return m_graphs.front()->GetExecGraphInfo();
}
InferenceEngine::Parameter CLDNNExecNetwork::GetConfig(const std::string &name) const {
InferenceEngine::Parameter CompiledModel::GetConfig(const std::string &name) const {
auto it = m_config.key_config_map.find(name);
if (it != m_config.key_config_map.end()) {
return it->second;
@ -145,7 +141,7 @@ InferenceEngine::Parameter CLDNNExecNetwork::GetConfig(const std::string &name)
}
}
InferenceEngine::Parameter CLDNNExecNetwork::GetMetric(const std::string &name) const {
InferenceEngine::Parameter CompiledModel::GetMetric(const std::string &name) const {
if (name == METRIC_KEY(NETWORK_NAME)) {
IE_ASSERT(!m_graphs.empty());
IE_SET_METRIC_RETURN(NETWORK_NAME, m_graphs[0]->getName());
@ -171,8 +167,10 @@ InferenceEngine::Parameter CLDNNExecNetwork::GetMetric(const std::string &name)
}
}
std::shared_ptr<RemoteContext> CLDNNExecNetwork::GetContext() const {
std::shared_ptr<InferenceEngine::RemoteContext> CompiledModel::GetContext() const {
return m_context;
}
}; // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,7 +2,10 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_custom_layer.h"
#include "intel_gpu/plugin/custom_layer.hpp"
#include "intel_gpu/plugin/simple_math.hpp"
#include "intel_gpu/plugin/itt.hpp"
#include "xml_parse_utils.h"
#include <description_buffer.hpp>
#include <map>
@ -14,9 +17,6 @@
# include <windows.h>
#endif
#include "simple_math.h"
#include "cldnn_itt.h"
using namespace InferenceEngine;
using namespace XMLParseUtils;
@ -29,9 +29,11 @@ using namespace XMLParseUtils;
#define CheckIntAttrAndReturnError(node, attr, value) \
CheckAndReturnError(GetIntAttr(node, attr, -1) != (value), "Wrong attribute value! expected: " << value << " found: " << GetIntAttr(node, attr, -1))
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
void CLDNNCustomLayer::LoadSingleLayer(const pugi::xml_node & node) {
void CustomLayer::LoadSingleLayer(const pugi::xml_node & node) {
// Root checks
CheckNodeTypeAndReturnError(node, "CustomLayer");
CheckStrAttrAndReturnError(node, "type", "SimpleGPU");
@ -46,7 +48,7 @@ void CLDNNCustomLayer::LoadSingleLayer(const pugi::xml_node & node) {
ProcessWorkSizesNode(node.child("WorkSizes"));
}
void CLDNNCustomLayer::ProcessKernelNode(const pugi::xml_node & node) {
void CustomLayer::ProcessKernelNode(const pugi::xml_node & node) {
CheckNodeTypeAndReturnError(node, "Kernel");
CheckAndReturnError(m_kernelSource.length() > 0, "Multiple definition of Kernel");
m_kernelEntry = GetStrAttr(node, "entry", "");
@ -89,7 +91,7 @@ void CLDNNCustomLayer::ProcessKernelNode(const pugi::xml_node & node) {
}
}
void CLDNNCustomLayer::ProcessBuffersNode(const pugi::xml_node & node) {
void CustomLayer::ProcessBuffersNode(const pugi::xml_node & node) {
CheckNodeTypeAndReturnError(node, "Buffers");
FOREACH_CHILD(tensorNode, node, "Tensor") {
KerenlParam kp;
@ -120,7 +122,7 @@ void CLDNNCustomLayer::ProcessBuffersNode(const pugi::xml_node & node) {
}
}
void CLDNNCustomLayer::ProcessCompilerOptionsNode(const pugi::xml_node & node) {
void CustomLayer::ProcessCompilerOptionsNode(const pugi::xml_node & node) {
if (node.empty()) {
return; // Optional node doesn't exist
}
@ -129,7 +131,7 @@ void CLDNNCustomLayer::ProcessCompilerOptionsNode(const pugi::xml_node & node) {
m_compilerOptions = GetStrAttr(node, "options", "");
}
void CLDNNCustomLayer::ProcessWorkSizesNode(const pugi::xml_node & node) {
void CustomLayer::ProcessWorkSizesNode(const pugi::xml_node & node) {
if (node.empty()) {
return; // Optional node doesn't exist
}
@ -180,7 +182,7 @@ void CLDNNCustomLayer::ProcessWorkSizesNode(const pugi::xml_node & node) {
}
}
bool CLDNNCustomLayer::IsLegalSizeRule(const std::string & rule) {
bool CustomLayer::IsLegalSizeRule(const std::string & rule) {
SimpleMathExpression expr;
expr.SetVariables({
{ 'b', 1 }, { 'B', 1 },
@ -200,7 +202,7 @@ bool CLDNNCustomLayer::IsLegalSizeRule(const std::string & rule) {
return true;
}
cldnn::format CLDNNCustomLayer::FormatFromString(const std::string & str) {
cldnn::format CustomLayer::FormatFromString(const std::string & str) {
static const std::map<std::string, cldnn::format> FormatNameToType = {
{ "BFYX" , cldnn::format::bfyx },
{ "bfyx" , cldnn::format::bfyx },
@ -224,8 +226,8 @@ cldnn::format CLDNNCustomLayer::FormatFromString(const std::string & str) {
return cldnn::format::format_num;
}
void CLDNNCustomLayer::LoadFromFile(const std::string configFile, CLDNNCustomLayerMap& customLayers, bool can_be_missed) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNCustomLayer::LoadFromFile");
void CustomLayer::LoadFromFile(const std::string configFile, CustomLayerMap& customLayers, bool can_be_missed) {
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "CustomLayer::LoadFromFile");
pugi::xml_document xmlDoc;
pugi::xml_parse_result res = xmlDoc.load_file(configFile.c_str());
if (res.status != pugi::status_ok) {
@ -267,7 +269,7 @@ void CLDNNCustomLayer::LoadFromFile(const std::string configFile, CLDNNCustomLay
}
for (auto r = xmlDoc.document_element(); r; r = r.next_sibling()) {
CLDNNCustomLayerPtr layer = std::make_shared<CLDNNCustomLayer>(CLDNNCustomLayer(dir_path));
CustomLayerPtr layer = std::make_shared<CustomLayer>(CustomLayer(dir_path));
layer->LoadSingleLayer(r);
if (layer->Error()) {
customLayers.clear();
@ -278,4 +280,6 @@ void CLDNNCustomLayer::LoadFromFile(const std::string configFile, CLDNNCustomLay
}
}
}; // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -6,11 +6,11 @@
#include <cldnn/cldnn_config.hpp>
#include <gpu/gpu_config.hpp>
#include "cldnn_config.h"
#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
#include "ie_api.h"
#include "file_utils.h"
#include "cldnn_itt.h"
#include "intel_gpu/plugin/device_config.hpp"
#include "intel_gpu/plugin/itt.hpp"
#include <ie_system_conf.h>
#include <thread>
@ -25,7 +25,9 @@
using namespace InferenceEngine;
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static void createDirectory(std::string _path) {
#if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
@ -57,7 +59,7 @@ static int getNumberOfCores(const IStreamsExecutor::Config::PreferredCoreType co
IE_SUPPRESS_DEPRECATED_START
void Config::UpdateFromMap(const std::map<std::string, std::string>& configMap) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "Config::UpdateFromMap");
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Config::UpdateFromMap");
for (auto& kvp : configMap) {
std::string key = kvp.first;
std::string val = kvp.second;
@ -199,7 +201,7 @@ void Config::UpdateFromMap(const std::map<std::string, std::string>& configMap)
std::istream_iterator<std::string> end;
std::vector<std::string> configFiles(begin, end);
for (auto& file : configFiles) {
CLDNNCustomLayer::LoadFromFile(file, customLayers);
CustomLayer::LoadFromFile(file, customLayers);
}
} else if (key.compare(PluginConfigParams::KEY_TUNING_MODE) == 0) {
if (val.compare(PluginConfigParams::TUNING_DISABLED) == 0) {
@ -329,7 +331,7 @@ void Config::UpdateFromMap(const std::map<std::string, std::string>& configMap)
}
void Config::adjustKeyMapValues() {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "Config::AdjustKeyMapValues");
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Config::AdjustKeyMapValues");
if (useProfiling)
key_config_map[PluginConfigParams::KEY_PERF_COUNT] = PluginConfigParams::YES;
else
@ -465,4 +467,6 @@ Config& Configs::GetDefaultDeviceConfig() {
IE_SUPPRESS_DEPRECATED_END
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -6,10 +6,11 @@
#include "intel_gpu/runtime/profiling.hpp"
#include "intel_gpu/runtime/debug_configuration.hpp"
#include "cldnn_graph.h"
#include "simple_math.h"
#include "intel_gpu/plugin/graph.hpp"
#include "intel_gpu/plugin/simple_math.hpp"
#include <cldnn/cldnn_config.hpp>
#include "cldnn_infer_request.h"
#include "intel_gpu/plugin/infer_request.hpp"
#include "intel_gpu/plugin/itt.hpp"
#include <description_buffer.hpp>
#include <threading/ie_executor_manager.hpp>
@ -33,14 +34,15 @@
#include <ie_ngraph_utils.hpp>
#include <ngraph/variant.hpp>
#include <ngraph/ngraph.hpp>
#include "cldnn_itt.h"
using namespace InferenceEngine;
using namespace InferenceEngine::details;
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
CLDNNGraph::CLDNNGraph(InferenceEngine::CNNNetwork& network, gpu::ClContext::Ptr context, Config config, uint16_t stream_id)
Graph::Graph(InferenceEngine::CNNNetwork& network, gpu::ClContext::Ptr context, Config config, uint16_t stream_id)
: m_context(context)
, m_networkName(network.getName())
, m_config(config)
@ -50,7 +52,7 @@ CLDNNGraph::CLDNNGraph(InferenceEngine::CNNNetwork& network, gpu::ClContext::Ptr
Build();
}
CLDNNGraph::CLDNNGraph(std::shared_ptr<CLDNNGraph> graph, uint16_t stream_id)
Graph::Graph(std::shared_ptr<Graph> graph, uint16_t stream_id)
: m_context(graph->m_context)
, m_program(graph->m_program)
, m_networkName(graph->m_networkName)
@ -60,8 +62,8 @@ CLDNNGraph::CLDNNGraph(std::shared_ptr<CLDNNGraph> graph, uint16_t stream_id)
Build();
}
void CLDNNGraph::UpdateLayersMaps() {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNGraph::UpdateLayersMaps");
void Graph::UpdateLayersMaps() {
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Graph::UpdateLayersMaps");
primitiveIDs = m_program->primitiveIDs;
prevPrimitiveIDs = m_program->prevPrimitiveIDs;
profilingIDs = m_program->profilingIDs;
@ -69,8 +71,8 @@ void CLDNNGraph::UpdateLayersMaps() {
outputDims = m_program->outputDims;
}
void CLDNNGraph::Build() {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNGraph::Build");
void Graph::Build() {
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Graph::Build");
UpdateLayersMaps();
if (GetMaxDynamicBatchSize() > 1) {
@ -92,13 +94,13 @@ void CLDNNGraph::Build() {
}
}
bool CLDNNGraph::use_external_queue() const {
bool Graph::use_external_queue() const {
auto impl = getContextImpl(m_context);
return impl->GetExternalQueue() != nullptr;
}
std::shared_ptr<cldnn::network> CLDNNGraph::BuildNetwork(std::shared_ptr<cldnn::program> program) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNGraph::BuildNetwork");
std::shared_ptr<cldnn::network> Graph::BuildNetwork(std::shared_ptr<cldnn::program> program) {
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Graph::BuildNetwork");
std::shared_ptr<cldnn::network> network = nullptr;
auto impl = getContextImpl(m_context);
@ -129,9 +131,9 @@ std::shared_ptr<cldnn::network> CLDNNGraph::BuildNetwork(std::shared_ptr<cldnn::
return network;
}
std::shared_ptr<ngraph::Function> CLDNNGraph::GetExecGraphInfoByPrimitivesInfo(std::vector<cldnn::primitive_info>& primitives_info,
bool filter_const_primitives) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNGraph::GetExecGraphInfoByPrimitivesInfo");
std::shared_ptr<ngraph::Function> Graph::GetExecGraphInfoByPrimitivesInfo(std::vector<cldnn::primitive_info>& primitives_info,
bool filter_const_primitives) {
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Graph::GetExecGraphInfoByPrimitivesInfo");
if (m_config.useProfiling) {
try {
// Update may throw an exception for step-by-step runtime graph dump,
@ -289,7 +291,7 @@ std::shared_ptr<ngraph::Function> CLDNNGraph::GetExecGraphInfoByPrimitivesInfo(s
auto desc_from_layout = [&](cldnn::layout layout) -> TensorDesc {
Precision precision = data_type_to_precision(layout.data_type);
SizeVector dims;
Layout l = Layout::NCHW;
auto l = InferenceEngine::Layout::NCHW;
auto size = layout.size;
if (layout.format.dimension() == 4) {
dims = {static_cast<size_t>(size.batch[0]),
@ -302,7 +304,7 @@ std::shared_ptr<ngraph::Function> CLDNNGraph::GetExecGraphInfoByPrimitivesInfo(s
static_cast<size_t>(size.spatial[2]),
static_cast<size_t>(size.spatial[1]),
static_cast<size_t>(size.spatial[0])};
l = Layout::NCDHW;
l = InferenceEngine::Layout::NCDHW;
} else if (layout.format.dimension() == 6) {
dims = {static_cast<size_t>(size.batch[0]),
static_cast<size_t>(size.feature[0]),
@ -311,7 +313,7 @@ std::shared_ptr<ngraph::Function> CLDNNGraph::GetExecGraphInfoByPrimitivesInfo(s
static_cast<size_t>(size.spatial[1]),
static_cast<size_t>(size.spatial[0])};
// Should be NC?DHW but there is no such layout yet
l = Layout::BLOCKED;
l = InferenceEngine::Layout::BLOCKED;
}
TensorDesc dst{precision, dims, l};
return dst;
@ -465,14 +467,14 @@ std::shared_ptr<ngraph::Function> CLDNNGraph::GetExecGraphInfoByPrimitivesInfo(s
return std::make_shared<ngraph::Function>(results, params, "runtime_gpu_graph");
}
std::shared_ptr<ngraph::Function> CLDNNGraph::GetExecGraphInfo() {
std::shared_ptr<ngraph::Function> Graph::GetExecGraphInfo() {
auto primitives_info = GetNetwork()->get_primitives_info();
return GetExecGraphInfoByPrimitivesInfo(primitives_info, true);
}
void CLDNNGraph::UpdatePerfStatistics() {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNGraph::UpdatePerfStatistics");
void Graph::UpdatePerfStatistics() {
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Graph::UpdatePerfStatistics");
if (GetNetworksCount() == 0) {
return;
}
@ -539,12 +541,12 @@ void CLDNNGraph::UpdatePerfStatistics() {
}
}
bool CLDNNGraph::IsLoaded() const {
bool Graph::IsLoaded() const {
return GetNetwork() != nullptr;
}
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> CLDNNGraph::GetPerformanceCounts() const {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNGraph::GetPerformanceCounts");
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> Graph::GetPerformanceCounts() const {
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Graph::GetPerformanceCounts");
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> result;
bool combinePrimByIRLayers = false;
unsigned i = 0;
@ -624,7 +626,7 @@ std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> CLDNNGraph::G
return true;
};
// Step 1. Get all primitives in execution order which was added by clDNNPlugin
// Step 1. Get all primitives in execution order which was added by GPU plugin
for (auto& primId : profilingIDs) {
getFromProfiling(primId);
}
@ -693,7 +695,7 @@ std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> CLDNNGraph::G
}
}
// Step 3. Checking primitives which has been deleted from execution order but added by clDNNPlugin
// Step 3. Checking primitives which has been deleted from execution order but added by GPU plugin
for (auto& primId : profilingIDs) {
if (std::find(allIds.begin(), allIds.end(), primId) == allIds.end()) {
getFromProfiling(primId);
@ -718,7 +720,7 @@ std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> CLDNNGraph::G
return result;
}
std::shared_ptr<cldnn::network> CLDNNGraph::GetNetwork(size_t idx) const {
std::shared_ptr<cldnn::network> Graph::GetNetwork(size_t idx) const {
if (idx >= GetNetworksCount())
IE_THROW() << "Unable to find network with id=" << idx << ". Stored networks count: " << GetNetworksCount();
@ -726,7 +728,7 @@ std::shared_ptr<cldnn::network> CLDNNGraph::GetNetwork(size_t idx) const {
}
std::string CLDNNGraph::MapOutputName(std::string outName) const {
std::string Graph::MapOutputName(std::string outName) const {
auto networkOutputsIDs = GetNetwork()->get_output_ids();
auto allPrimitiveIds = GetNetwork()->get_all_primitives();
@ -751,7 +753,7 @@ std::string CLDNNGraph::MapOutputName(std::string outName) const {
return outputID;
}
InferenceEngine::SizeVector CLDNNGraph::GetOutputSize(std::string outName) const {
InferenceEngine::SizeVector Graph::GetOutputSize(std::string outName) const {
auto res_output = outputDims.find(outName);
InferenceEngine::SizeVector sz;
@ -763,4 +765,6 @@ InferenceEngine::SizeVector CLDNNGraph::GetOutputSize(std::string outName) const
return sz;
}
}; // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -8,10 +8,10 @@
#include <functional>
#include <utility>
#include <description_buffer.hpp>
#include "cldnn_infer_request.h"
#include "cldnn_remote_context.h"
#include "cldnn_executable_network.h"
#include "cldnn_itt.h"
#include "intel_gpu/plugin/infer_request.hpp"
#include "intel_gpu/plugin/remote_context.hpp"
#include "intel_gpu/plugin/compiled_model.hpp"
#include "intel_gpu/plugin/itt.hpp"
#include "intel_gpu/runtime/debug_configuration.hpp"
#include <ie_algorithm.hpp>
#include <debug.h>
@ -47,7 +47,7 @@ void copyToFloat(float* dst, const InferenceEngine::Blob* src) {
}
template<typename T>
void copyResultToOutputBlob(cldnn::memory::ptr src, Blob::Ptr dst, CLDNNPlugin::buf_info* bi, cldnn::stream& stream) {
void copyResultToOutputBlob(cldnn::memory::ptr src, Blob::Ptr dst, ov::runtime::intel_gpu::buf_info* bi, cldnn::stream& stream) {
size_t n = (bi == nullptr) ? dst->size() : bi->buf_size;
size_t offset = (bi == nullptr) ? 0 : bi->buf_offset;
@ -89,7 +89,7 @@ inline void checkAlloc(const Blob::Ptr& blob, const std::string& err_str) {
if (!blob->is<gpu::ClBlob>()) {
not_allocated = (blob->buffer() == nullptr);
} else {
not_allocated = !CLDNNPlugin::getBlobImpl(blob->as<gpu::ClBlob>())->is_allocated();
not_allocated = !ov::runtime::intel_gpu::getBlobImpl(blob->as<gpu::ClBlob>())->is_allocated();
}
if (not_allocated) {
IE_THROW(NotAllocated) << err_str;
@ -173,14 +173,16 @@ bool same_host_mem(cldnn::memory::ptr memPtr, uint8_t* hostPtr) {
} // namespace
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
// ----------------------------------------------------------------------------------------- //
// ---------------------------- IE API impl ------------------------------------------------ //
// ----------------------------------------------------------------------------------------- //
Blob::Ptr CLDNNInferRequest::GetBlob(const std::string& name) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::GetBlob");
Blob::Ptr InferRequest::GetBlob(const std::string& name) {
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::GetBlob");
Blob::Ptr data;
InputInfo::Ptr foundInput;
DataPtr foundOutput;
@ -202,8 +204,8 @@ Blob::Ptr CLDNNInferRequest::GetBlob(const std::string& name) {
return data;
}
void CLDNNInferRequest::SetBlob(const std::string& name, const Blob::Ptr& data) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::SetBlob");
void InferRequest::SetBlob(const std::string& name, const Blob::Ptr& data) {
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::SetBlob");
// perform all common checks first
if (name.empty()) {
@ -339,8 +341,8 @@ void CLDNNInferRequest::SetBlob(const std::string& name, const Blob::Ptr& data)
}
}
void CLDNNInferRequest::checkBlobs() {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::checkBlobs");
void InferRequest::checkBlobs() {
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::checkBlobs");
for (auto const &input : _inputs) {
InputInfo::Ptr foundInput = nullptr;
auto foundInputPair = std::find_if(std::begin(_networkInputs), std::end(_networkInputs),
@ -369,8 +371,8 @@ void CLDNNInferRequest::checkBlobs() {
}
}
void CLDNNInferRequest::SetGraph(std::shared_ptr<CLDNNPlugin::CLDNNGraph> graph) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::SetGraph");
void InferRequest::SetGraph(std::shared_ptr<Graph> graph) {
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::SetGraph");
m_graph = graph;
if (m_graph == nullptr) {
@ -387,8 +389,8 @@ void CLDNNInferRequest::SetGraph(std::shared_ptr<CLDNNPlugin::CLDNNGraph> graph)
}
}
void CLDNNInferRequest::SetBatch(int new_batch) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::SetBatch");
void InferRequest::SetBatch(int new_batch) {
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::SetBatch");
if (m_graph->GetMaxDynamicBatchSize() < 0)
IE_THROW() << "Dynamic batch is not enabled.";
@ -456,16 +458,16 @@ void CLDNNInferRequest::SetBatch(int new_batch) {
m_curBatch = new_batch;
}
CLDNNInferRequest::CLDNNInferRequest(InputsDataMap networkInputs, OutputsDataMap networkOutputs,
const CLDNNExecNetwork::Ptr& execNetwork)
InferRequest::InferRequest(InputsDataMap networkInputs, OutputsDataMap networkOutputs,
const CompiledModel::Ptr& execNetwork)
: IInferRequestInternal(networkInputs, networkOutputs) {
IE_ASSERT(nullptr != execNetwork);
streamExecutor = dynamic_cast<InferenceEngine::IStreamsExecutor*>(execNetwork->m_taskExecutor.get());
}
CLDNNInferRequest::CLDNNInferRequest(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
InferRequest::InferRequest(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
const std::vector<std::shared_ptr<const ov::Node>>& outputs,
const CLDNNExecNetwork::Ptr& execNetwork)
const CompiledModel::Ptr& execNetwork)
: IInferRequestInternal(inputs, outputs) {
IE_ASSERT(nullptr != execNetwork);
streamExecutor = dynamic_cast<InferenceEngine::IStreamsExecutor*>(execNetwork->m_taskExecutor.get());
@ -474,17 +476,17 @@ CLDNNInferRequest::CLDNNInferRequest(const std::vector<std::shared_ptr<const ov:
// ----------------------------------------------------------------------------------------- //
// ---------------------------- internal pipeline stages ----------------------------------- //
// ----------------------------------------------------------------------------------------- //
void CLDNNInferRequest::preprocess_notify() {
m_graph->wait(CLDNNGraph::Stage::PREPROC);
void InferRequest::preprocess_notify() {
m_graph->wait(Graph::Stage::PREPROC);
if (m_graph->GetMaxDynamicBatchSize() > 1) {
preprocess_dynamic();
} else {
execDataPreprocessing(_inputs, true); // "true" stands for serial preprocessing in case of OpenMP
}
m_graph->notify(CLDNNGraph::Stage::PREPROC);
m_graph->notify(Graph::Stage::PREPROC);
}
void CLDNNInferRequest::preprocess() {
void InferRequest::preprocess() {
if (m_graph->GetMaxDynamicBatchSize() > 1) {
preprocess_dynamic();
} else {
@ -492,12 +494,12 @@ void CLDNNInferRequest::preprocess() {
}
}
void CLDNNInferRequest::enqueue_notify() {
m_graph->wait(CLDNNGraph::Stage::EXECUTE);
void InferRequest::enqueue_notify() {
m_graph->wait(Graph::Stage::EXECUTE);
enqueue();
}
void CLDNNInferRequest::enqueue() {
void InferRequest::enqueue() {
if (m_graph->GetMaxDynamicBatchSize() > 1) {
enqueue_dynamic();
return;
@ -546,12 +548,12 @@ void CLDNNInferRequest::enqueue() {
internal_outputs = m_graph->GetNetwork()->execute(dependencies);
}
void CLDNNInferRequest::wait_notify() {
void InferRequest::wait_notify() {
wait();
m_graph->notify(CLDNNGraph::Stage::EXECUTE);
m_graph->notify(Graph::Stage::EXECUTE);
}
void CLDNNInferRequest::wait() {
void InferRequest::wait() {
if (m_graph->GetMaxDynamicBatchSize() > 1) {
wait_dynamic();
return;
@ -588,12 +590,12 @@ void CLDNNInferRequest::wait() {
}
}
void CLDNNInferRequest::preprocess_dynamic() {
void InferRequest::preprocess_dynamic() {
// execute input pre-processing.
execDataPreprocessing(_inputs, true); // "true" stands for serial preprocessing in case of OpenMP
}
void CLDNNInferRequest::enqueue_dynamic() {
void InferRequest::enqueue_dynamic() {
internal_outputs_dynamic.clear();
auto numNets = m_graph->GetNetworksCount();
internal_outputs_dynamic.resize(numNets);
@ -616,7 +618,7 @@ void CLDNNInferRequest::enqueue_dynamic() {
}
}
void CLDNNInferRequest::wait_dynamic() {
void InferRequest::wait_dynamic() {
if (internal_outputs_dynamic.empty()) {
IE_THROW() << "Inference was not started!\n";
}
@ -640,9 +642,9 @@ void CLDNNInferRequest::wait_dynamic() {
// ----------------------------------------------------------------------------------------- //
// ---------------------------- internal utils --------- ----------------------------------- //
// ----------------------------------------------------------------------------------------- //
void CLDNNInferRequest::setup_stream_graph() {
void InferRequest::setup_stream_graph() {
int streamID = 0;
auto& streamGraphs = static_cast<CLDNNExecNetwork*>(_exeNetwork.get())->m_graphs;
auto& streamGraphs = static_cast<CompiledModel*>(_exeNetwork.get())->m_graphs;
if (nullptr != streamExecutor) {
streamID = streamExecutor->GetStreamId();
int numGraphs = streamGraphs.size();
@ -651,8 +653,8 @@ void CLDNNInferRequest::setup_stream_graph() {
m_graph = streamGraphs[streamID];
}
Blob::Ptr CLDNNInferRequest::create_host_blob(const TensorDesc& desc, uint8_t* mem_ptr) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::create_host_blob");
Blob::Ptr InferRequest::create_host_blob(const TensorDesc& desc, uint8_t* mem_ptr) {
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::create_host_blob");
const Precision& p = desc.getPrecision();
switch (p) {
@ -706,8 +708,8 @@ Blob::Ptr CLDNNInferRequest::create_host_blob(const TensorDesc& desc, uint8_t* m
}
}
void CLDNNInferRequest::copy_output_data(cldnn::memory::ptr src, Blob::Ptr dst, buf_info* bi) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::copy_output_data");
void InferRequest::copy_output_data(cldnn::memory::ptr src, Blob::Ptr dst, buf_info* bi) {
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::copy_output_data");
auto& stream = m_graph->GetNetwork()->get_stream();
switch (dst->getTensorDesc().getPrecision()) {
case Precision::FP32: copyResultToOutputBlob<float>(src, dst, bi, stream); break;
@ -720,11 +722,11 @@ void CLDNNInferRequest::copy_output_data(cldnn::memory::ptr src, Blob::Ptr dst,
}
}
void CLDNNInferRequest::copy_input_data(std::shared_ptr<cldnn::network> network,
void InferRequest::copy_input_data(std::shared_ptr<cldnn::network> network,
const cldnn::primitive_id &inputName,
const cldnn::layout& inputLayout,
const Blob &inputBlob, buf_info* bi) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::copy_input_data");
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::copy_input_data");
size_t offset = (bi == nullptr) ? 0 : bi->buf_offset;
@ -771,7 +773,7 @@ void CLDNNInferRequest::copy_input_data(std::shared_ptr<cldnn::network> network,
}
}
Blob::Ptr CLDNNInferRequest::host_blob_from_device_blob(Blob::Ptr blobPtr) {
Blob::Ptr InferRequest::host_blob_from_device_blob(Blob::Ptr blobPtr) {
uint8_t* bufferMem = nullptr;
auto clblobPtr = std::dynamic_pointer_cast<InferenceEngine::gpu::ClBlob>(blobPtr);
if (clblobPtr) {
@ -786,8 +788,8 @@ Blob::Ptr CLDNNInferRequest::host_blob_from_device_blob(Blob::Ptr blobPtr) {
return hostBlob;
}
void CLDNNInferRequest::allocate_inputs() {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::allocate_inputs");
void InferRequest::allocate_inputs() {
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::allocate_inputs");
auto inputLayouts = m_graph->GetInputLayouts();
// allocate inputs
for (auto& ni : _networkInputs) {
@ -823,8 +825,8 @@ void CLDNNInferRequest::allocate_inputs() {
}
}
void CLDNNInferRequest::allocate_inputs_dynamic() {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::allocate_inputs_dynamic");
void InferRequest::allocate_inputs_dynamic() {
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::allocate_inputs_dynamic");
// allocate inputs
for (auto &input : m_graph->GetInputLayouts()) {
InputInfo::Ptr ni = _networkInputs.at(input.first);
@ -849,8 +851,8 @@ void CLDNNInferRequest::allocate_inputs_dynamic() {
}
}
void CLDNNInferRequest::allocate_outputs() {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::allocate_outputs");
void InferRequest::allocate_outputs() {
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::allocate_outputs");
// allocate outputs
for (auto& no : _networkOutputs) {
std::string outputID = m_graph->MapOutputName(no.first);
@ -868,8 +870,8 @@ void CLDNNInferRequest::allocate_outputs() {
}
}
void CLDNNInferRequest::allocate_outputs_dynamic() {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::allocate_outputs_dynamic");
void InferRequest::allocate_outputs_dynamic() {
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::allocate_outputs_dynamic");
// allocate outputs
for (auto& no : _networkOutputs) {
std::string outputID = m_graph->MapOutputName(no.first);
@ -890,8 +892,8 @@ void CLDNNInferRequest::allocate_outputs_dynamic() {
}
}
void CLDNNInferRequest::InferImpl() {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::InferImpl");
void InferRequest::InferImpl() {
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::InferImpl");
setup_stream_graph();
std::lock_guard<std::mutex> lk(m_graph->get_mutex());
preprocess();
@ -899,8 +901,8 @@ void CLDNNInferRequest::InferImpl() {
wait();
}
std::map<std::string, InferenceEngineProfileInfo> CLDNNInferRequest::GetPerformanceCounts() const {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::GetPerformanceCounts");
std::map<std::string, InferenceEngineProfileInfo> InferRequest::GetPerformanceCounts() const {
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::GetPerformanceCounts");
if (!m_useProfiling) {
IE_THROW() << "Performance counters were not enabled";
} else {
@ -908,9 +910,9 @@ std::map<std::string, InferenceEngineProfileInfo> CLDNNInferRequest::GetPerforma
}
}
void CLDNNInferRequest::prepare_input(const cldnn::primitive_id& inputName, Blob::Ptr& inputBlob,
void InferRequest::prepare_input(const cldnn::primitive_id& inputName, Blob::Ptr& inputBlob,
std::vector<cldnn::event::ptr>& dependencies) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::prepare_input");
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::prepare_input");
auto inputLayoutItr = m_graph->GetInputLayouts().find(inputName);
if (inputLayoutItr == m_graph->GetInputLayouts().end()) {
IE_THROW() << "Input name mismatch.";
@ -943,7 +945,7 @@ void CLDNNInferRequest::prepare_input(const cldnn::primitive_id& inputName, Blob
if (!is_dev_input) {
if (prec == Precision::I16 || prec == Precision::U16) {
// clDNN doesn't support I16 input precision,
// GPU plugin doesn't support I16 input precision,
// so have to convert input data to fp32 precision
cldnn::mem_lock<float> ptr{ inputMem, stream };
if (prec == Precision::I16) {
@ -968,8 +970,8 @@ void CLDNNInferRequest::prepare_input(const cldnn::primitive_id& inputName, Blob
}
}
void CLDNNInferRequest::prepare_output(const cldnn::primitive_id& outputName, Blob::Ptr& outputBlob) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::prepare_output");
void InferRequest::prepare_output(const cldnn::primitive_id& outputName, Blob::Ptr& outputBlob) {
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::prepare_output");
Blob::Ptr reqBlob = _deviceOutputs.at(outputName);
cldnn::primitive_id internalName = outputsMap[outputName];
auto _nw_ptr = m_graph->GetNetwork();
@ -985,26 +987,28 @@ void CLDNNInferRequest::prepare_output(const cldnn::primitive_id& outputName, Bl
_nw_ptr->set_output_memory(internalName, outputMem);
}
InferenceEngine::Blob::Ptr CLDNNInferRequest::create_device_blob(const InferenceEngine::TensorDesc& desc, const cldnn::layout& layout) {
InferenceEngine::Blob::Ptr InferRequest::create_device_blob(const InferenceEngine::TensorDesc& desc, const cldnn::layout& layout) {
if (m_graph->GetEngine()->use_unified_shared_memory()) {
auto blobPtr = std::make_shared<CLDNNRemoteUSMbuffer>(m_graph->GetContext(),
m_graph->GetNetwork()->get_stream(),
desc,
layout,
nullptr,
0,
0,
CLDNNRemoteBlobImpl::BlobType::BT_USM_HOST_INTERNAL);
auto blobPtr = std::make_shared<RemoteUSMbuffer>(m_graph->GetContext(),
m_graph->GetNetwork()->get_stream(),
desc,
layout,
nullptr,
0,
0,
RemoteBlobImpl::BlobType::BT_USM_HOST_INTERNAL);
getBlobImpl(blobPtr.get())->allocate();
return blobPtr;
} else {
auto blobPtr = std::make_shared<CLDNNRemoteCLbuffer>(m_graph->GetContext(),
m_graph->GetNetwork()->get_stream(),
desc,
layout);
auto blobPtr = std::make_shared<RemoteCLbuffer>(m_graph->GetContext(),
m_graph->GetNetwork()->get_stream(),
desc,
layout);
getBlobImpl(blobPtr.get())->allocate();
return blobPtr;
}
}
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,15 +2,17 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/batch_to_space.hpp"
#include "ngraph/op/constant.hpp"
#include "intel_gpu/primitives/batch_to_space.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static void CreateBatchToSpaceOp(Program& p, const std::shared_ptr<ngraph::op::v1::BatchToSpace>& op) {
p.ValidateInputs(op, {4});
@ -35,7 +37,7 @@ static void CreateBatchToSpaceOp(Program& p, const std::shared_ptr<ngraph::op::v
}
inputs.emplace_back(format, sizes, default_size);
}
auto out_size = CldnnTensorFromIEDims(op->get_output_shape(0));
auto out_size = tensor_from_dims(op->get_output_shape(0));
auto batchToSpacePrim = cldnn::batch_to_space(layerName,
inputPrimitives[0], // input
@ -51,4 +53,6 @@ static void CreateBatchToSpaceOp(Program& p, const std::shared_ptr<ngraph::op::v
REGISTER_FACTORY_IMPL(v1, BatchToSpace);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,8 +2,8 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/broadcast.hpp"
#include "ngraph/op/constant.hpp"
@ -12,7 +12,9 @@
#include "intel_gpu/primitives/reorder.hpp"
#include "intel_gpu/primitives/reshape.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static void CreateCommonBroadcastOp(Program& p, const std::shared_ptr<ngraph::Node>& op, const ngraph::AxisSet axis_mapping) {
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
@ -69,7 +71,7 @@ static void CreateCommonBroadcastOp(Program& p, const std::shared_ptr<ngraph::No
inputShape = tmp_shape;
}
auto targetShape = CldnnTensorFromIEDims(inputShape);
auto targetShape = tensor_from_dims(inputShape);
auto reshapePrim = cldnn::reshape(reshapeName, inputPrimitive, targetShape, op->get_friendly_name());
p.AddPrimitive(reshapePrim);
@ -80,7 +82,7 @@ static void CreateCommonBroadcastOp(Program& p, const std::shared_ptr<ngraph::No
auto broadcastPrim = cldnn::broadcast(layerName,
inputPrimitive,
CldnnTensorFromIEDims(op->get_output_shape(0)),
tensor_from_dims(op->get_output_shape(0)),
{},
op->get_friendly_name());
@ -119,4 +121,6 @@ static void CreateBroadcastOp(Program& p, const std::shared_ptr<ngraph::op::v3::
REGISTER_FACTORY_IMPL(v1, Broadcast);
REGISTER_FACTORY_IMPL(v3, Broadcast);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,21 +2,23 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/concat.hpp"
#include "intel_gpu/primitives/concatenation.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static cldnn::concatenation::concatenation_axis GetConcatAxis(int32_t axis, size_t rank) {
unsigned cldnn_axis = axis >= 0 ? axis : axis + static_cast<int32_t>(rank);
if (cldnn_axis >= rank)
IE_THROW() << "Concatenation axis exceeds number of dimensions";
// Difference in dimension ordering between IE and clDNN,
// Difference in dimension ordering between IE and GPU plugin,
// reverse spatial dimensions after batch and feature.
if (cldnn_axis >= 2) {
auto spatial_axis = cldnn_axis - 2;
@ -54,4 +56,6 @@ static void CreateConcatOp(Program& p, const std::shared_ptr<ngraph::op::v0::Con
REGISTER_FACTORY_IMPL(v0, Concat);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,8 +2,8 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/op/convolution.hpp"
@ -20,7 +20,9 @@
#include "intel_gpu/primitives/data.hpp"
#include "intel_gpu/runtime/debug_configuration.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static cldnn::tensor getConstTensor(const ngraph::Shape constDims) {
cldnn::tensor constTensor;
@ -216,4 +218,6 @@ void createClDnnConstant(Program& p, const ngraph::Shape& constDims, const std::
REGISTER_FACTORY_IMPL(v0, Constant);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,15 +2,17 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/convert.hpp"
#include "ngraph/op/convert_like.hpp"
#include "intel_gpu/primitives/reorder.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static void CreateConvertLikeOp(Program& p, const std::shared_ptr<ngraph::op::v1::ConvertLike>& op) {
p.ValidateInputs(op, {2});
@ -52,4 +54,6 @@ static void CreateConvertOp(Program& p, const std::shared_ptr<ngraph::op::v0::Co
REGISTER_FACTORY_IMPL(v0, Convert);
REGISTER_FACTORY_IMPL(v1, ConvertLike);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,8 +2,8 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/convolution.hpp"
#include "ngraph/op/binary_convolution.hpp"
@ -19,7 +19,9 @@
#include "intel_gpu/primitives/permute.hpp"
#include "intel_gpu/primitives/reorder.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
struct ConvoltuionParameters {
cldnn::tensor stride;
@ -82,7 +84,7 @@ static void CreateGroupConvolutionOp(Program& p, const std::shared_ptr<ngraph::o
params.stride,
params.padding,
params.dilation,
CldnnTensorFromIEDims(outDims),
tensor_from_dims(outDims),
DataTypeFromPrecision(outPrecision),
weights_have_group_dim,
op->get_friendly_name());
@ -111,7 +113,7 @@ static void CreateConvolutionOp(Program& p, const std::shared_ptr<ngraph::op::v1
params.stride,
params.padding,
params.dilation,
CldnnTensorFromIEDims(outDims),
tensor_from_dims(outDims),
DataTypeFromPrecision(outPrecision),
weights_have_group_dim,
op->get_friendly_name());
@ -168,7 +170,7 @@ static void CreateConvolutionBackpropDataOp(Program& p, const std::shared_ptr<ng
params.groups,
params.stride,
params.padding,
CldnnTensorFromIEDims(op->get_output_tensor(0).get_shape()),
tensor_from_dims(op->get_output_tensor(0).get_shape()),
weights_have_group_dim,
op->get_friendly_name());
@ -225,7 +227,7 @@ static void CreateGroupConvolutionBackpropDataOp(Program& p, const std::shared_p
params.groups,
params.stride,
params.padding,
CldnnTensorFromIEDims(op->get_output_tensor(0).get_shape()),
tensor_from_dims(op->get_output_tensor(0).get_shape()),
weights_have_group_dim,
op->get_friendly_name());
@ -272,7 +274,7 @@ static void DeformableConvolutionImpl(Program& p,
params.stride,
params.padding,
params.dilation,
CldnnTensorFromIEDims(outDims),
tensor_from_dims(outDims),
kernel,
bilinearInterpolationPad,
op->get_friendly_name());
@ -283,7 +285,7 @@ static void DeformableConvolutionImpl(Program& p,
weights,
{},
params.groups,
CldnnTensorFromIEDims(outDims),
tensor_from_dims(outDims),
op->get_friendly_name());
p.AddPrimitive(defConvPrim);
p.AddPrimitiveToProfiler(defConvLayerNameConv, op);
@ -297,7 +299,7 @@ static void DeformableConvolutionImpl(Program& p,
params.stride,
params.padding,
params.dilation,
CldnnTensorFromIEDims(outDims),
tensor_from_dims(outDims),
bilinearInterpolationPad,
op->get_friendly_name());
@ -334,7 +336,7 @@ static void CreateBinaryConvolutionOp(Program& p, const std::shared_ptr<ngraph::
params.stride,
params.padding,
params.dilation,
CldnnTensorFromIEDims(outDims),
tensor_from_dims(outDims),
params.groups,
op->get_pad_value(),
calc_precision,
@ -352,4 +354,6 @@ REGISTER_FACTORY_IMPL(v1, DeformableConvolution);
REGISTER_FACTORY_IMPL(v8, DeformableConvolution);
REGISTER_FACTORY_IMPL(v1, BinaryConvolution);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,8 +2,8 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/ctc_greedy_decoder.hpp"
#include "ngraph/op/ctc_greedy_decoder_seq_len.hpp"
@ -15,7 +15,9 @@
#include "transformations/utils/utils.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static void CreateCommonCTCGreedyDecoderOp(Program& p, const std::shared_ptr<ngraph::Node>& op, bool ctc_merge_repeated) {
p.ValidateInputs(op, {2, 3});
@ -27,7 +29,7 @@ static void CreateCommonCTCGreedyDecoderOp(Program& p, const std::shared_ptr<ngr
for (size_t portIndex = 0; portIndex < inputPrimitives.size(); portIndex++) {
auto inputDataType = DataTypeFromPrecision(op->get_input_element_type(portIndex));
if (inputDataType == cldnn::data_types::i64) {
// clDNN primitive supports only i32 data type for 'sequence_length' and 'blank_index' inputs
// GPU primitive supports only i32 data type for 'sequence_length' and 'blank_index' inputs
// so we need additional reorder if it's provided as i64
auto reorderPrimName = inputPrimitives[portIndex] + "_" + op->get_friendly_name() + Program::m_preProcessTag;
auto targetFormat = DefaultFormatForDims(op->get_input_shape(portIndex).size());
@ -72,7 +74,7 @@ static void CreateCommonCTCGreedyDecoderOp(Program& p, const std::shared_ptr<ngr
cldnn::layout mutableLayout = cldnn::layout(
DataTypeFromPrecision(mutable_precision),
DefaultFormatForDims(op->get_output_shape(1).size()),
CldnnTensorFromIEDims(op->get_output_shape(1)));
tensor_from_dims(op->get_output_shape(1)));
GPU_DEBUG_GET_INSTANCE(debug_config);
GPU_DEBUG_IF(debug_config->verbose >= 2) {
@ -95,10 +97,10 @@ static void CreateCommonCTCGreedyDecoderOp(Program& p, const std::shared_ptr<ngr
reorderedInputs,
blank_index,
ctc_merge_repeated,
CldnnTensorFromIEDims(op->get_output_shape(0)),
tensor_from_dims(op->get_output_shape(0)),
op->get_friendly_name());
// clDNN primitive supports only i32 as output data type
// GPU primitive supports only i32 as output data type
primitive.output_data_type = DataTypeFromPrecision(ngraph::element::i32);
if (num_output == 2) {
@ -131,4 +133,6 @@ static void CreateCTCGreedyDecoderSeqLenOp(Program& p, const std::shared_ptr<ngr
REGISTER_FACTORY_IMPL(v0, CTCGreedyDecoder);
REGISTER_FACTORY_IMPL(v6, CTCGreedyDecoderSeqLen);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,15 +2,17 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/cum_sum.hpp"
#include "ngraph/op/constant.hpp"
#include "intel_gpu/primitives/cum_sum.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static inline cldnn::cum_sum::cum_sum_axis GetCumSumAxis(int32_t axis, uint32_t rank) {
if (axis < 0)
@ -18,7 +20,7 @@ static inline cldnn::cum_sum::cum_sum_axis GetCumSumAxis(int32_t axis, uint32_t
if (axis < 0 || axis >= rank)
IE_THROW() << "CumSum axis is not correspond to number of dimensions";
// Difference in dimension ordering between IE and clDNN,
// Difference in dimension ordering between IE and GPU plugin,
// reverse spatial dimensions after batch and feature.
uint32_t cldnn_axis = axis;
if (axis >= 2) {
@ -72,4 +74,6 @@ static void CreateCumSumOp(Program& p, const std::shared_ptr<ngraph::op::v0::Cum
REGISTER_FACTORY_IMPL(v0, CumSum);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,9 +2,9 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "simple_math.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "intel_gpu/plugin/simple_math.hpp"
#include "ngraph/attribute_visitor.hpp"
#include "ngraph/node.hpp"
@ -12,7 +12,9 @@
#include "intel_gpu/primitives/custom_gpu_primitive.hpp"
#include "intel_gpu/primitives/reorder.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
template<typename T>
static inline std::string vecToString(std::vector<T> vec) {
@ -100,7 +102,7 @@ protected:
std::map<std::string, std::string> m_values;
};
void CreateCustomOp(Program& p, const std::shared_ptr<ngraph::Node>& op, CLDNNCustomLayerPtr customLayer) {
void CreateCustomOp(Program& p, const std::shared_ptr<ngraph::Node>& op, CustomLayerPtr customLayer) {
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
@ -130,7 +132,7 @@ void CreateCustomOp(Program& p, const std::shared_ptr<ngraph::Node>& op, CLDNNCu
cldnn::format outputFormat(cldnn::format::any);
for (const auto& param : customLayer->KernelParams()) {
switch (param.type) {
case CLDNNCustomLayer::ParamType::Input: {
case CustomLayer::ParamType::Input: {
kernelParameters.resize(kernelParameters.size() > size_t(param.paramIndex + 1) ? kernelParameters.size() : size_t(param.paramIndex + 1));
kernelParameters[param.paramIndex].type = cldnn::custom_gpu_primitive::arg_input;
kernelParameters[param.paramIndex].index =
@ -159,7 +161,7 @@ void CreateCustomOp(Program& p, const std::shared_ptr<ngraph::Node>& op, CLDNNCu
}
break;
}
case CLDNNCustomLayer::ParamType::Output: {
case CustomLayer::ParamType::Output: {
kernelParameters.resize(kernelParameters.size() > size_t(param.paramIndex + 1) ? kernelParameters.size() : size_t(param.paramIndex + 1));
kernelParameters[param.paramIndex].type = cldnn::custom_gpu_primitive::arg_output;
kernelParameters[param.paramIndex].index =
@ -255,4 +257,6 @@ void CreateCustomOp(Program& p, const std::shared_ptr<ngraph::Node>& op, CLDNNCu
p.primitiveIDs[genericLayerName] = prevLayerName;
}
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,14 +2,16 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/depth_to_space.hpp"
#include "intel_gpu/primitives/depth_to_space.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static cldnn::depth_to_space_mode GetDepthMode(ngraph::op::v0::DepthToSpace::DepthToSpaceMode mode) {
switch (mode) {
@ -42,4 +44,6 @@ static void CreateDepthToSpaceOp(Program& p, const std::shared_ptr<ngraph::op::v
REGISTER_FACTORY_IMPL(v0, DepthToSpace);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,14 +2,16 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/detection_output.hpp"
#include "intel_gpu/primitives/detection_output.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static cldnn::prior_box_code_type PriorBoxCodeFromString(const std::string& str) {
static const std::map<std::string, cldnn::prior_box_code_type> CodeNameToType = {
@ -84,4 +86,6 @@ static void CreateDetectionOutputOp(Program& p, const std::shared_ptr<ngraph::op
REGISTER_FACTORY_IMPL(v0, DetectionOutput);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,8 +2,8 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "transformations/utils/utils.hpp"
#include "ngraph/op/add.hpp"
@ -30,7 +30,9 @@
#include "intel_gpu/primitives/reorder.hpp"
#include "intel_gpu/primitives/reshape.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
void CreateElementwiseOp(Program& p, const std::shared_ptr<ngraph::Node>& op, cldnn::eltwise_mode mode) {
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
@ -65,7 +67,7 @@ void CreateElementwiseOp(Program& p, const std::shared_ptr<ngraph::Node>& op, cl
// Extend input dimensions by prepending ones
inputShape.insert(inputShape.begin(), outRank - inputRank, 1ul);
auto targetShape = CldnnTensorFromIEDims(inputShape);
auto targetShape = tensor_from_dims(inputShape);
auto reshapePrim = cldnn::reshape(reshapeName, inputPrimitives[i], targetShape, op->get_friendly_name());
p.AddPrimitive(reshapePrim);
@ -194,4 +196,6 @@ REGISTER_FACTORY_IMPL(v1, Power);
REGISTER_FACTORY_IMPL(v1, FloorMod);
REGISTER_FACTORY_IMPL(v1, Mod);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,8 +2,8 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/embedding_segments_sum.hpp"
#include "ngraph/op/embeddingbag_offsets_sum.hpp"
@ -14,7 +14,9 @@
#include "transformations/utils/utils.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static void CreateEmbeddingBagOffsetsSumOp(Program& p, const std::shared_ptr<ngraph::op::v3::EmbeddingBagOffsetsSum>& op) {
p.ValidateInputs(op, {3, 4, 5});
@ -42,7 +44,7 @@ static void CreateEmbeddingBagOffsetsSumOp(Program& p, const std::shared_ptr<ngr
for (size_t portIndex = 0; portIndex < inputPrimitives.size(); portIndex++) {
auto inputDataType = DataTypeFromPrecision(op->get_input_element_type(portIndex));
if (((portIndex == 1) || (portIndex == 2)) && (inputDataType == cldnn::data_types::i64)) {
// clDNN primitive supports only i32 data type for indices inputs,
// GPU primitive supports only i32 data type for indices inputs,
// so we need additional reorders if they are provided as i64
auto reorderPrimName = inputPrimitives[portIndex] + "_" + op->get_friendly_name() + Program::m_preProcessTag;
auto targetFormat = DefaultFormatForDims(op->get_input_shape(portIndex).size());
@ -64,7 +66,7 @@ static void CreateEmbeddingBagOffsetsSumOp(Program& p, const std::shared_ptr<ngr
auto embeddingBagPrim = cldnn::embedding_bag(layerName,
reorderedInputs,
cldnn::embedding_bag::offsets_sum,
CldnnTensorFromIEDims(op->get_output_shape(0)),
tensor_from_dims(op->get_output_shape(0)),
defaultIndex,
op->get_friendly_name());
@ -83,7 +85,7 @@ static void CreateEmbeddingBagPackedSumOp(Program& p, const std::shared_ptr<ngra
for (size_t portIndex = 0; portIndex < inputPrimitives.size(); portIndex++) {
auto inputDataType = DataTypeFromPrecision(op->get_input_element_type(portIndex));
if ((portIndex == 1) && (inputDataType == cldnn::data_types::i64)) {
// clDNN primitive supports only i32 data type for indices input,
// GPU primitive supports only i32 data type for indices input,
// so we need additional reorder if it's provided as i64
auto reorderPrimName = inputPrimitives[portIndex] + "_" + op->get_friendly_name() + Program::m_preProcessTag;
auto targetFormat = DefaultFormatForDims(op->get_input_shape(portIndex).size());
@ -105,7 +107,7 @@ static void CreateEmbeddingBagPackedSumOp(Program& p, const std::shared_ptr<ngra
auto embeddingBagPrim = cldnn::embedding_bag(layerName,
reorderedInputs,
cldnn::embedding_bag::packed_sum,
CldnnTensorFromIEDims(op->get_output_shape(0)),
tensor_from_dims(op->get_output_shape(0)),
-1,
op->get_friendly_name());
@ -142,7 +144,7 @@ static void CreateEmbeddingSegmentsSumOp(Program& p, const std::shared_ptr<ngrap
for (size_t portIndex = 0; portIndex < inputPrimitives.size(); portIndex++) {
auto inputDataType = DataTypeFromPrecision(op->get_input_element_type(portIndex));
if (((portIndex == 1) || (portIndex == 2)) && (inputDataType == cldnn::data_types::i64)) {
// clDNN primitive supports only i32 data type for indices inputs,
// GPU primitive supports only i32 data type for indices inputs,
// so we need additional reorders if they are provided as i64
auto reorderPrimName = inputPrimitives[portIndex] + "_" + op->get_friendly_name() + Program::m_preProcessTag;
auto targetFormat = DefaultFormatForDims(op->get_input_shape(portIndex).size());
@ -164,7 +166,7 @@ static void CreateEmbeddingSegmentsSumOp(Program& p, const std::shared_ptr<ngrap
auto embeddingBagPrim = cldnn::embedding_bag(layerName,
reorderedInputs,
cldnn::embedding_bag::segments_sum,
CldnnTensorFromIEDims(op->get_output_shape(0)),
tensor_from_dims(op->get_output_shape(0)),
defaultIndex,
op->get_friendly_name());
@ -176,4 +178,6 @@ REGISTER_FACTORY_IMPL(v3, EmbeddingBagOffsetsSum);
REGISTER_FACTORY_IMPL(v3, EmbeddingBagPackedSum);
REGISTER_FACTORY_IMPL(v3, EmbeddingSegmentsSum);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,15 +2,17 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/experimental_detectron_roi_feature.hpp"
#include "intel_gpu/primitives/mutable_data.hpp"
#include "intel_gpu/primitives/experimental_detectron_roi_feature_extractor.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static void CreateExperimentalDetectronROIFeatureExtractorOp(Program& p, const std::shared_ptr<ngraph::op::v6::ExperimentalDetectronROIFeatureExtractor>& op) {
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
@ -19,7 +21,7 @@ static void CreateExperimentalDetectronROIFeatureExtractorOp(Program& p, const s
cldnn::layout mutableLayout = cldnn::layout(
DataTypeFromPrecision(op->get_output_element_type(1)),
DefaultFormatForDims(op->get_output_shape(1).size()),
CldnnTensorFromIEDims(op->get_output_shape(1)));
tensor_from_dims(op->get_output_shape(1)));
cldnn::memory::ptr shared_memory {p.GetEngine().allocate_memory(mutableLayout)};
@ -54,4 +56,6 @@ static void CreateExperimentalDetectronROIFeatureExtractorOp(Program& p, const s
REGISTER_FACTORY_IMPL(v6, ExperimentalDetectronROIFeatureExtractor);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,14 +2,16 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/extractimagepatches.hpp"
#include "intel_gpu/primitives/extract_image_patches.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static inline std::string PadToString(ngraph::op::PadType pad) {
switch (pad) {
@ -38,7 +40,7 @@ static void CreateExtractImagePatchesOp(Program& p, const std::shared_ptr<ngraph
strides,
rates,
auto_pad,
CldnnTensorFromIEDims(op->get_output_shape(0)),
tensor_from_dims(op->get_output_shape(0)),
op->get_friendly_name());
p.AddPrimitive(extractImagePatchesPrim);
@ -47,4 +49,6 @@ static void CreateExtractImagePatchesOp(Program& p, const std::shared_ptr<ngraph
REGISTER_FACTORY_IMPL(v3, ExtractImagePatches);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,14 +2,16 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/fake_quantize.hpp"
#include "intel_gpu/primitives/quantize.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static void CreateFakeQuantizeOp(Program& p, const std::shared_ptr<ngraph::op::v0::FakeQuantize>& op) {
p.ValidateInputs(op, {5});
@ -40,4 +42,6 @@ static void CreateFakeQuantizeOp(Program& p, const std::shared_ptr<ngraph::op::v
REGISTER_FACTORY_IMPL(v0, FakeQuantize);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,15 +2,17 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/gather_tree.hpp"
#include "intel_gpu/primitives/gather_tree.hpp"
#include "intel_gpu/primitives/reorder.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static void CreateGatherTreeOp(Program& p, const std::shared_ptr<ngraph::op::v1::GatherTree>& op) {
p.ValidateInputs(op, {4});
@ -23,7 +25,7 @@ static void CreateGatherTreeOp(Program& p, const std::shared_ptr<ngraph::op::v1:
for (size_t portIndex = 0; portIndex < inputPrimitives.size(); portIndex++) {
auto inputDataType = DataTypeFromPrecision(op->get_input_element_type(portIndex));
if (inputDataType == cldnn::data_types::i64) {
// clDNN primitive does not support i64 inputs,
// GPU primitive does not support i64 inputs,
// so we need additional reorders to convert them to i32
auto reorderPrimName = inputPrimitives[portIndex] + "_" + op->get_friendly_name() + Program::m_preProcessTag;
auto targetFormat = DefaultFormatForDims(op->get_input_shape(portIndex).size());
@ -55,4 +57,6 @@ static void CreateGatherTreeOp(Program& p, const std::shared_ptr<ngraph::op::v1:
REGISTER_FACTORY_IMPL(v1, GatherTree);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,15 +2,17 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/gather.hpp"
#include "intel_gpu/primitives/gather.hpp"
#include "intel_gpu/primitives/reorder.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static cldnn::gather::gather_axis GetGatherAxis(int32_t axis, cldnn::format inputFormat) {
if (axis == 0) {
@ -70,7 +72,7 @@ void CreateGatherOpBase(Program& p, const std::shared_ptr<T>& op, const int64_t
for (size_t portIndex = 0; portIndex < inputPrimitives.size(); portIndex++) {
auto inputDataType = DataTypeFromPrecision(op->get_input_element_type(portIndex));
if (inputDataType == cldnn::data_types::i64) {
// clDNN primitive does not support i64 inputs,
// GPU primitive does not support i64 inputs,
// so we need additional reorders to convert them to i32
auto reorderPrimName = inputPrimitives[portIndex] + "_" + op->get_friendly_name() + Program::m_preProcessTag;
auto targetFormat = DefaultFormatForDims(op->get_input_shape(portIndex).size());
@ -95,7 +97,7 @@ void CreateGatherOpBase(Program& p, const std::shared_ptr<T>& op, const int64_t
reorderedInputs[1],
GetGatherAxis(axis, DefaultFormatForDims(op->get_input_shape(0).size())),
outLayout,
CldnnTensorFromIEDims(op->get_output_shape(0)),
tensor_from_dims(op->get_output_shape(0)),
batch_dim,
support_neg_ind,
op->get_friendly_name());
@ -125,4 +127,6 @@ static void CreateGatherOp(Program& p, const std::shared_ptr<ngraph::op::v8::Gat
REGISTER_FACTORY_IMPL(v8, Gather);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,15 +2,17 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/gather_elements.hpp"
#include "ngraph/op/constant.hpp"
#include "intel_gpu/primitives/gather_elements.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static cldnn::gather_elements::gather_elements_axis GetGatherAxis(int axis, unsigned rank) {
if (axis < 0)
@ -18,7 +20,7 @@ static cldnn::gather_elements::gather_elements_axis GetGatherAxis(int axis, unsi
if (axis < 0 || axis >= rank)
IE_THROW() << "GatherElements axis is not correspond to number of dimensions";
// Difference in dimension ordering between IE and clDNN,
// Difference in dimension ordering between IE and GPU plugin,
// reverse spatial dimensions after batch and feature.
unsigned cldnn_axis = axis;
if (axis >= 2) {
@ -54,7 +56,7 @@ static void CreateGatherElementsOp(Program& p, const std::shared_ptr<ngraph::op:
inputPrimitives[0],
inputPrimitives[1],
outLayout,
CldnnTensorFromIEDims(op->get_output_shape(0)),
tensor_from_dims(op->get_output_shape(0)),
GetGatherAxis(axis, rank),
op->get_friendly_name());
@ -64,4 +66,6 @@ static void CreateGatherElementsOp(Program& p, const std::shared_ptr<ngraph::op:
REGISTER_FACTORY_IMPL(v6, GatherElements);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,15 +2,17 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/gather_nd.hpp"
#include "ngraph/op/constant.hpp"
#include "intel_gpu/primitives/gather_nd.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static void CreateGatherNDOp(Program& p, const std::shared_ptr<ngraph::op::v5::GatherND>& op) {
p.ValidateInputs(op, {2});
@ -62,4 +64,6 @@ static void CreateGatherNDOp(Program& p, const std::shared_ptr<ngraph::op::v8::G
REGISTER_FACTORY_IMPL(v8, GatherND);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,14 +2,16 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/grn.hpp"
#include "intel_gpu/primitives/grn.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static void CreateGRNOp(Program& p, const std::shared_ptr<ngraph::op::v0::GRN>& op) {
p.ValidateInputs(op, {1});
@ -28,4 +30,6 @@ static void CreateGRNOp(Program& p, const std::shared_ptr<ngraph::op::v0::GRN>&
REGISTER_FACTORY_IMPL(v0, GRN);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,8 +2,8 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "caseless.hpp"
#include "ngraph/op/interpolate.hpp"
@ -11,7 +11,9 @@
#include "intel_gpu/primitives/resample.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static cldnn::coordinate_transformation_mode GetCoordinateTransformationMode(ngraph::op::v4::Interpolate::CoordinateTransformMode mode) {
switch (mode) {
@ -71,7 +73,7 @@ static cldnn::resample::resample_axis GetInterpolationAxis(int32_t axis, uint32_
if (axis < 0 || axis >= sz)
IE_THROW() << "Interpolate axis is not correspond to number of dimensions";
// Difference in dimension ordering between IE and clDNN,
// Difference in dimension ordering between IE and GPU plugin,
// reverse spatial dimensions after batch and feature.
uint32_t cldnn_axis = axis;
if (axis >= 2) {
@ -111,7 +113,7 @@ static void CreateInterpolateOp(Program& p, const std::shared_ptr<ngraph::op::v4
auto attrs = op->get_attrs();
auto inputRank = op->get_input_shape(0).size();
auto outDims = op->get_output_shape(0).size();
auto outTensor = CldnnTensorFromIEDims(op->get_output_shape(0));
auto outTensor = tensor_from_dims(op->get_output_shape(0));
std::vector<int> pad_begin(attrs.pads_begin.begin(), attrs.pads_begin.end());
std::vector<int> pad_end(attrs.pads_end.begin(), attrs.pads_end.end());
@ -202,4 +204,6 @@ static void CreateInterpolateOp(Program& p, const std::shared_ptr<ngraph::op::v4
REGISTER_FACTORY_IMPL(v4, Interpolate);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -3,9 +3,9 @@
//
///////////////////////////////////////////////////////////////////////////////////////////////////
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "cldnn_engine.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "intel_gpu/plugin/plugin.hpp"
#include <cpp/ie_cnn_network.h>
@ -26,7 +26,9 @@
using Loop = ngraph::op::v5::Loop;
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
template<class DATA_TYPE>
static DATA_TYPE CreateScalarData(Program &p, const cldnn::primitive_id& id, int64_t num, const cldnn::primitive_id& ext_prim_id) {
@ -41,7 +43,7 @@ static cldnn::mutable_data CreateAdditionalOutputData(Program &p, const std::sha
const int32_t output_idx) {
const auto precision = DataTypeFromPrecision(op->get_output_element_type(output_idx));
const auto format = DefaultFormatForDims(op->get_output_shape(output_idx).size());
const auto tensor = CldnnTensorFromIEDims(op->get_output_shape(output_idx));
const auto tensor = tensor_from_dims(op->get_output_shape(output_idx));
cldnn::layout output_layout = cldnn::layout(precision, format, tensor);
auto mem = p.GetEngine().allocate_memory(output_layout);
auto md = cldnn::mutable_data(id, {input}, mem, op->get_friendly_name()); // cldnn::data cannot set dependency
@ -224,4 +226,6 @@ static void CreateLoopOp(Program& p, const std::shared_ptr<Loop>& op) {
REGISTER_FACTORY_IMPL(v5, Loop);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,15 +2,17 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/lrn.hpp"
#include "ngraph/op/constant.hpp"
#include "intel_gpu/primitives/lrn.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static cldnn::lrn_norm_region GetNormRegion(std::vector<int64_t> axis_value) {
if (axis_value.size() == 1 && axis_value[0] == 1) {
@ -47,4 +49,6 @@ static void CreateLRNOp(Program& p, const std::shared_ptr<ngraph::op::v0::LRN>&
REGISTER_FACTORY_IMPL(v0, LRN);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,8 +2,8 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/matmul.hpp"
#include "ngraph/op/constant.hpp"
@ -15,7 +15,9 @@
#include "intel_gpu/primitives/reorder.hpp"
#include "intel_gpu/primitives/permute.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
/*
* get_aligned_shapes function align two input shapes to have the same size and
@ -128,7 +130,7 @@ static void CreateMatMulOp(Program& p, const std::shared_ptr<ngraph::op::v0::Mat
auto reshapeInName = op->get_friendly_name() + suffix;
auto reshapeInPrim = cldnn::reshape(reshapeInName,
inputName,
CldnnTensorFromIEDims(reshapeSize),
tensor_from_dims(reshapeSize),
op->get_friendly_name());
p.AddPrimitive(reshapeInPrim);
p.AddInnerPrimitiveToProfiler(reshapeInName, layerName, op);
@ -157,7 +159,7 @@ static void CreateMatMulOp(Program& p, const std::shared_ptr<ngraph::op::v0::Mat
auto lastLayerName = layerName;
if (reshape_fc) {
auto outputShape = CldnnTensorFromIEDims(op->get_output_shape(0));
auto outputShape = tensor_from_dims(op->get_output_shape(0));
auto outReshapeName = layerName + "_cldnn_out_reshape";
auto outReshapePrim = cldnn::reshape(outReshapeName, layerName, outputShape, op->get_friendly_name());
@ -269,7 +271,7 @@ static void CreateMatMulOp(Program& p, const std::shared_ptr<ngraph::op::v0::Mat
// Reshape output if gemm specific shape does not match default one
if (outDimsN < 4) {
auto outputShape = CldnnTensorFromIEDims(outDims);
auto outputShape = tensor_from_dims(outDims);
auto outReshapeName = layerName + "_cldnn_out_reshape";
auto outReshapePrim = cldnn::reshape(outReshapeName, layerName, outputShape, op->get_friendly_name());
@ -285,4 +287,6 @@ static void CreateMatMulOp(Program& p, const std::shared_ptr<ngraph::op::v0::Mat
REGISTER_FACTORY_IMPL(v0, MatMul);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,8 +2,8 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/mvn.hpp"
#include "ngraph/op/constant.hpp"
@ -12,7 +12,9 @@
#include <algorithm>
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static void CreateCommonMVNOp(Program& p, const std::shared_ptr<ngraph::Node>& op,
bool across_channels, bool normalize_variance, float eps, bool eps_inside_sqrt = true) {
@ -65,4 +67,6 @@ static void CreateMVNOp(Program& p, const std::shared_ptr<ngraph::op::v6::MVN>&
REGISTER_FACTORY_IMPL(v0, MVN);
REGISTER_FACTORY_IMPL(v6, MVN);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,8 +2,8 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/non_max_suppression.hpp"
#include <ngraph/opsets/opset3.hpp>
@ -14,7 +14,9 @@
#include "intel_gpu/primitives/non_max_suppression.hpp"
#include "intel_gpu/runtime/debug_configuration.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static bool GetCenterPointBox(ngraph::op::v5::NonMaxSuppression::BoxEncodingType encoding) {
switch (encoding) {
@ -35,7 +37,7 @@ static void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_pt
for (size_t portIndex = 0; portIndex < inputPrimitives.size(); portIndex++) {
auto inputDataType = DataTypeFromPrecision(op->get_input_element_type(portIndex));
if ((portIndex == 2) && (inputDataType == cldnn::data_types::i64)) {
// clDNN primitive supports only i32 data type for 'max_output_boxes_per_class' input
// GPU primitive supports only i32 data type for 'max_output_boxes_per_class' input
// so we need additional reorder if it's provided as i64
auto reorderPrimName = inputPrimitives[portIndex] + "_" + op->get_friendly_name() + Program::m_preProcessTag;
auto targetFormat = DefaultFormatForDims(op->get_input_shape(portIndex).size());
@ -54,7 +56,7 @@ static void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_pt
}
}
// clDNN primitive supports only i32 as output data type
// GPU primitive supports only i32 as output data type
auto out_type = op->get_output_element_type(0);
if (out_type == ngraph::element::i64) {
out_type = ngraph::element::i32;
@ -77,7 +79,7 @@ static void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_pt
cldnn::layout mutableLayoutSecond = cldnn::layout(
DataTypeFromPrecision(mutable_precision_second),
DefaultFormatForDims(op->get_output_shape(2).size()),
CldnnTensorFromIEDims(op->get_output_shape(2)));
tensor_from_dims(op->get_output_shape(2)));
GPU_DEBUG_IF(debug_config->verbose >= 2) {
GPU_DEBUG_COUT << "[" << layer_type_name_ID(op) << ": mutable data]" << std::endl;
@ -175,4 +177,6 @@ static void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_pt
REGISTER_FACTORY_IMPL(internal, NonMaxSuppressionIEInternal);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,8 +2,8 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/normalize_l2.hpp"
#include "ngraph/op/constant.hpp"
@ -11,7 +11,9 @@
#include "intel_gpu/primitives/normalize.hpp"
#include "intel_gpu/primitives/data.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static void CreateNormalizeL2Op(Program& p, const std::shared_ptr<ngraph::op::v0::NormalizeL2>& op) {
p.ValidateInputs(op, {2});
@ -61,4 +63,6 @@ static void CreateNormalizeL2Op(Program& p, const std::shared_ptr<ngraph::op::v0
REGISTER_FACTORY_IMPL(v0, NormalizeL2);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,15 +2,17 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "transformations/utils/utils.hpp"
#include "ngraph/op/one_hot.hpp"
#include "intel_gpu/primitives/one_hot.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static void CreateOneHotOp(Program& p, const std::shared_ptr<ngraph::op::v1::OneHot>& op) {
p.ValidateInputs(op, {4});
@ -49,7 +51,7 @@ static void CreateOneHotOp(Program& p, const std::shared_ptr<ngraph::op::v1::One
auto oneHotPrim = cldnn::one_hot(layerName,
inputPrimitives[0],
CldnnTensorFromIEDims(op->get_output_shape(0)),
tensor_from_dims(op->get_output_shape(0)),
DataTypeFromPrecision(op->get_output_element_type(0)),
static_cast<uint16_t>(axis),
on_value,
@ -62,4 +64,6 @@ static void CreateOneHotOp(Program& p, const std::shared_ptr<ngraph::op::v1::One
REGISTER_FACTORY_IMPL(v1, OneHot);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,15 +2,17 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "transformations/utils/utils.hpp"
#include "ngraph/op/pad.hpp"
#include "intel_gpu/primitives/border.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static cldnn::border_type GetBorderType(ngraph::op::PadMode mode) {
switch (mode) {
@ -73,4 +75,6 @@ static void CreatePadOp(Program& p, const std::shared_ptr<ngraph::op::v1::Pad>&
REGISTER_FACTORY_IMPL(v1, Pad);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,8 +2,8 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/parameter.hpp"
@ -14,7 +14,9 @@
using namespace InferenceEngine;
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static void CreateParameterOp(Program& p, const std::shared_ptr<ngraph::op::v0::Parameter>& op) {
auto networkInputs = p.GetNetworkInputs();
@ -26,11 +28,11 @@ static void CreateParameterOp(Program& p, const std::shared_ptr<ngraph::op::v0::
// first create and add the input layout
const auto inputDesc = inputInfo->getTensorDesc();
const auto inputDims = inputDesc.getDims();
Layout l = inputDesc.getLayout();
Precision ip = inputDesc.getPrecision();
InferenceEngine::Layout l = inputDesc.getLayout();
InferenceEngine::Precision ip = inputDesc.getPrecision();
cldnn::format inputFormat = cldnn::format::bfyx;
if (Layout::BLOCKED == l && 6 == inputDims.size()) {
if (InferenceEngine::Layout::BLOCKED == l && 6 == inputDims.size()) {
inputFormat = cldnn::format::bfwzyx;
} else {
inputFormat = FormatFromLayout(l);
@ -46,7 +48,7 @@ static void CreateParameterOp(Program& p, const std::shared_ptr<ngraph::op::v0::
cldnn::spatial(inputDims[5], inputDims[4], inputDims[3], inputDims[2]));
break;
case 5:
if (Layout::NCDHW == l) {
if (InferenceEngine::Layout::NCDHW == l) {
dataTensor = cldnn::tensor(cldnn::batch(batch),
cldnn::feature(inputDims[1]),
cldnn::spatial(inputDims[4], inputDims[3], inputDims[2]));
@ -55,10 +57,10 @@ static void CreateParameterOp(Program& p, const std::shared_ptr<ngraph::op::v0::
}
break;
case 4:
if (Layout::NCHW == l || Layout::CHW == l) {
if (InferenceEngine::Layout::NCHW == l || InferenceEngine::Layout::CHW == l) {
dataTensor = cldnn::tensor(batch,
TensorValue(inputDims[1]), TensorValue(inputDims[3]), TensorValue(inputDims[2]));
} else if (Layout::NHWC == l) {
} else if (InferenceEngine::Layout::NHWC == l) {
dataTensor = cldnn::tensor(batch,
TensorValue(inputDims[1]), TensorValue(inputDims[3]), TensorValue(inputDims[2]));
} else {
@ -66,14 +68,14 @@ static void CreateParameterOp(Program& p, const std::shared_ptr<ngraph::op::v0::
}
break;
case 3:
if (Layout::CHW == l) {
if (InferenceEngine::Layout::CHW == l) {
dataTensor = cldnn::tensor(TensorValue(inputDims[0]), TensorValue(inputDims[1]), 1, TensorValue(inputDims[2]));
} else {
IE_THROW() << "Unsupported layout (" << l << ") in 3D input " + inputInfo->name();
}
break;
case 2:
if (Layout::NCHW == l || NC == l) {
if (InferenceEngine::Layout::NCHW == l || NC == l) {
dataTensor = cldnn::tensor(batch, TensorValue(inputDims[1]), 1, 1);
} else {
IE_THROW() << "Unsupported layout (" << l << ") in 2D input " << inputInfo->name();
@ -177,8 +179,8 @@ static void CreateParameterOp(Program& p, const std::shared_ptr<ngraph::op::v0::
if (ColorFormat::NV12 == preProcess.getColorFormat() && p.GetConfig().nv12_two_inputs) {
// for NV12, create two input layouts with reorder instead of one,
// and then would expect compound blob in inferRequest
if (Layout::NCHW != l &&
(Precision::I8 != ip || Precision::U8 != ip)) {
if (InferenceEngine::Layout::NCHW != l &&
(InferenceEngine::Precision::I8 != ip || InferenceEngine::Precision::U8 != ip)) {
IE_THROW() << "Unsupported layout (" << l << ") or precision "
<< ip.name() << ") for NV12 input " + inputInfo->name();
}
@ -280,4 +282,6 @@ static void CreateParameterOp(Program& p, const std::shared_ptr<ngraph::op::v0::
REGISTER_FACTORY_IMPL(v0, Parameter);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,15 +2,17 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/max_pool.hpp"
#include "ngraph/op/avg_pool.hpp"
#include "intel_gpu/primitives/pooling.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
struct PoolingParameters {
cldnn::tensor kernel;
@ -69,7 +71,7 @@ static void CreateAvgPoolOp(Program& p, const std::shared_ptr<ngraph::op::v1::Av
params.kernel,
params.stride,
params.pad_begin,
CldnnTensorFromIEDims(op->get_output_shape(0)),
tensor_from_dims(op->get_output_shape(0)),
DataTypeFromPrecision(op->get_output_element_type(0)),
op->get_friendly_name());
poolPrim.pad_end = params.pad_end;
@ -89,7 +91,7 @@ static void CreateMaxPoolOp(Program& p, const std::shared_ptr<ngraph::op::v1::Ma
params.kernel,
params.stride,
params.pad_begin,
CldnnTensorFromIEDims(op->get_output_shape(0)),
tensor_from_dims(op->get_output_shape(0)),
DataTypeFromPrecision(op->get_output_element_type(0)),
op->get_friendly_name());
poolPrim.pad_end = params.pad_end;
@ -100,4 +102,6 @@ static void CreateMaxPoolOp(Program& p, const std::shared_ptr<ngraph::op::v1::Ma
REGISTER_FACTORY_IMPL(v1, MaxPool);
REGISTER_FACTORY_IMPL(v1, AvgPool);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,15 +2,17 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/prior_box.hpp"
#include "ngraph/op/prior_box_clustered.hpp"
#include "intel_gpu/primitives/prior_box.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static void CreatePriorBoxClusteredOp(Program& p, const std::shared_ptr<ngraph::op::v0::PriorBoxClustered>& op) {
p.ValidateInputs(op, {2});
@ -114,4 +116,6 @@ static void CreatePriorBoxOp(Program& p, const std::shared_ptr<ngraph::op::v0::P
REGISTER_FACTORY_IMPL(v0, PriorBoxClustered);
REGISTER_FACTORY_IMPL(v0, PriorBox);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,8 +2,8 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/proposal.hpp"
@ -11,7 +11,9 @@
#include "intel_gpu/primitives/mutable_data.hpp"
#include "intel_gpu/runtime/debug_configuration.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static void CreateProposalOp(Program& p, const std::shared_ptr<ngraph::op::v0::Proposal>& op) {
p.ValidateInputs(op, {3});
@ -61,7 +63,7 @@ static void CreateProposalOp(Program& p, const std::shared_ptr<ngraph::op::v0::P
cldnn::layout mutableLayout = cldnn::layout(DataTypeFromPrecision(mutable_precision),
DefaultFormatForDims(op->get_output_shape(1).size()),
CldnnTensorFromIEDims(op->get_output_shape(1)));
tensor_from_dims(op->get_output_shape(1)));
GPU_DEBUG_GET_INSTANCE(debug_config);
GPU_DEBUG_IF(debug_config->verbose >= 2) {
@ -153,4 +155,6 @@ static void CreateProposalOp(Program& p, const std::shared_ptr<ngraph::op::v0::P
REGISTER_FACTORY_IMPL(v0, Proposal);
REGISTER_FACTORY_IMPL(v4, Proposal);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,8 +2,8 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/reduce_sum.hpp"
#include "ngraph/op/reduce_prod.hpp"
@ -20,7 +20,9 @@
#include "intel_gpu/primitives/reorder.hpp"
#include "intel_gpu/primitives/reshape.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static void CreateReduceOp(Program& p, const std::shared_ptr<ngraph::Node>& op, cldnn::reduce_mode mode, bool keep_dims) {
p.ValidateInputs(op, {2});
@ -173,4 +175,6 @@ REGISTER_FACTORY_IMPL(v1, ReduceSum);
REGISTER_FACTORY_IMPL(v4, ReduceL1);
REGISTER_FACTORY_IMPL(v4, ReduceL2);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,14 +2,16 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/region_yolo.hpp"
#include "intel_gpu/primitives/region_yolo.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static void CreateRegionYoloOp(Program& p, const std::shared_ptr<ngraph::op::v0::RegionYolo>& op) {
p.ValidateInputs(op, {1});
@ -37,4 +39,6 @@ static void CreateRegionYoloOp(Program& p, const std::shared_ptr<ngraph::op::v0:
REGISTER_FACTORY_IMPL(v0, RegionYolo);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,14 +2,16 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/reorg_yolo.hpp"
#include "intel_gpu/primitives/reorg_yolo.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static void CreateReorgYoloOp(Program& p, const std::shared_ptr<ngraph::op::v0::ReorgYolo>& op) {
p.ValidateInputs(op, {1});
@ -29,4 +31,6 @@ static void CreateReorgYoloOp(Program& p, const std::shared_ptr<ngraph::op::v0::
REGISTER_FACTORY_IMPL(v0, ReorgYolo);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,8 +2,8 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/reshape.hpp"
#include "ngraph/op/squeeze.hpp"
@ -12,7 +12,9 @@
#include "intel_gpu/primitives/reshape.hpp"
#include "intel_gpu/primitives/reorder.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static void CreateCommonReshapeOp(Program& p, const std::shared_ptr<ngraph::Node>& op) {
p.ValidateInputs(op, {1, 2});
@ -21,7 +23,7 @@ static void CreateCommonReshapeOp(Program& p, const std::shared_ptr<ngraph::Node
auto inDims = op->get_input_shape(0);
auto outDims = op->get_output_shape(0);
auto outTensor = CldnnTensorFromIEDims(outDims);
auto outTensor = tensor_from_dims(outDims);
// if we convert from or to 5D/6D, additional reorder also required to change format
cldnn::primitive_id reshapeInputId = inputPrimitives[0];
@ -74,4 +76,6 @@ REGISTER_FACTORY_IMPL(v1, Reshape);
REGISTER_FACTORY_IMPL(v0, Squeeze);
REGISTER_FACTORY_IMPL(v0, Unsqueeze);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,8 +2,8 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/result.hpp"
@ -11,7 +11,9 @@
using namespace InferenceEngine;
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static void CreateResultOp(Program& p, const std::shared_ptr<ngraph::op::v0::Result>& op) {
OutputsDataMap networkOutputs = p.GetNetworkOutputs();
@ -73,4 +75,6 @@ static void CreateResultOp(Program& p, const std::shared_ptr<ngraph::op::v0::Res
REGISTER_FACTORY_IMPL(v0, Result);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,14 +2,16 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/reverse_sequence.hpp"
#include "intel_gpu/primitives/reverse_sequence.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static void CreateReverseSequenceOp(Program& p, const std::shared_ptr<ngraph::op::v0::ReverseSequence>& op) {
p.ValidateInputs(op, {2});
@ -31,4 +33,6 @@ static void CreateReverseSequenceOp(Program& p, const std::shared_ptr<ngraph::op
REGISTER_FACTORY_IMPL(v0, ReverseSequence);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,8 +2,8 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/lstm_cell.hpp"
#include "ngraph/op/lstm_sequence.hpp"
@ -15,7 +15,9 @@
#include "intel_gpu/primitives/crop.hpp"
#include "intel_gpu/primitives/concatenation.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static cldnn::activation_func GetActivationFunc(std::string name) {
static const std::map<std::string, cldnn::activation_func> name_mapping = {
{"sigmoid", cldnn::activation_func::logistic},
@ -276,7 +278,7 @@ static void CreateLSTMSequenceOp(Program& p, const std::shared_ptr<ngraph::op::v
std::vector<size_t> WRreshapeSize = { 4 * size_t(lstm_hidden_size), size_t(lstm_input_size + lstm_hidden_size) };
cldnn::primitive_id WRreshapeID = WRconcatID + "_reshape";
auto reshapeInPrim = cldnn::reshape(WRreshapeID, WRconcatID, CldnnTensorFromIEDims(WRreshapeSize), op->get_friendly_name());
auto reshapeInPrim = cldnn::reshape(WRreshapeID, WRconcatID, tensor_from_dims(WRreshapeSize), op->get_friendly_name());
p.AddPrimitive(reshapeInPrim);
p.AddInnerPrimitiveToProfiler(WRreshapeID, op->get_friendly_name(), op);
@ -353,4 +355,6 @@ static void CreateLSTMSequenceOp(Program& p, const std::shared_ptr<ngraph::op::v
REGISTER_FACTORY_IMPL(v4, LSTMCell);
REGISTER_FACTORY_IMPL(v5, LSTMSequence);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -1,13 +1,15 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/roi_align.hpp"
#include "intel_gpu/primitives/roi_align.hpp"
#include <memory>
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
namespace {
@ -39,4 +41,6 @@ void CreateROIAlignOp(Program& p, const std::shared_ptr<ngraph::op::v3::ROIAlign
REGISTER_FACTORY_IMPL(v3, ROIAlign);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,8 +2,8 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/roi_pooling.hpp"
#include "ngraph/op/psroi_pooling.hpp"
@ -11,7 +11,9 @@
#include "intel_gpu/primitives/roi_pooling.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static cldnn::pooling_mode GetPoolingMode(std::string method) {
if (method == "bilinear")
@ -126,4 +128,6 @@ REGISTER_FACTORY_IMPL(v1, DeformablePSROIPooling);
REGISTER_FACTORY_IMPL(v0, PSROIPooling);
REGISTER_FACTORY_IMPL(v0, ROIPooling);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,15 +2,17 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/scatter_elements_update.hpp"
#include "ngraph/op/constant.hpp"
#include "intel_gpu/primitives/scatter_elements_update.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static inline cldnn::scatter_elements_update::scatter_elements_update_axis GetScatterElementsUpdateAxis(int axis, unsigned rank) {
if (axis < 0)
@ -18,7 +20,7 @@ static inline cldnn::scatter_elements_update::scatter_elements_update_axis GetSc
if (axis < 0 || axis >= rank)
IE_THROW() << "ScatterElementsUpdate axis is not correspond to number of dimensions";
// Difference in dimension ordering between IE and clDNN,
// Difference in dimension ordering between IE and GPU plugin,
// reverse spatial dimensions after batch and feature.
unsigned cldnn_axis = axis;
if (axis >= 2) {
@ -66,4 +68,6 @@ static void CreateScatterElementsUpdateOp(Program& p, const std::shared_ptr<ngra
REGISTER_FACTORY_IMPL(v3, ScatterElementsUpdate);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,15 +2,17 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/scatter_nd_update.hpp"
#include "ngraph/op/constant.hpp"
#include "intel_gpu/primitives/scatter_nd_update.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static void CreateScatterNDUpdateOp(Program& p, const std::shared_ptr<ngraph::op::v3::ScatterNDUpdate>& op) {
p.ValidateInputs(op, {3});
@ -31,4 +33,6 @@ static void CreateScatterNDUpdateOp(Program& p, const std::shared_ptr<ngraph::op
REGISTER_FACTORY_IMPL(v3, ScatterNDUpdate);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,15 +2,17 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/scatter_update.hpp"
#include "ngraph/op/constant.hpp"
#include "intel_gpu/primitives/scatter_update.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static inline cldnn::scatter_update::scatter_update_axis GetScatterUpdateAxis(int axis, unsigned rank) {
if (axis < 0)
@ -18,7 +20,7 @@ static inline cldnn::scatter_update::scatter_update_axis GetScatterUpdateAxis(in
if (axis < 0 || axis >= rank)
IE_THROW() << "ScatterUpdate axis is not correspond to number of dimensions";
// Difference in dimension ordering between IE and clDNN,
// Difference in dimension ordering between IE and GPU plugin,
// reverse spatial dimensions after batch and feature.
unsigned cldnn_axis = axis;
if (axis >= 2) {
@ -66,4 +68,6 @@ static void CreateScatterUpdateOp(Program& p, const std::shared_ptr<ngraph::op::
REGISTER_FACTORY_IMPL(v3, ScatterUpdate);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,8 +2,8 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/select.hpp"
@ -11,7 +11,9 @@
#include "intel_gpu/primitives/reorder.hpp"
#include "intel_gpu/primitives/reshape.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static void CreateSelectOp(Program& p, const std::shared_ptr<ngraph::op::v1::Select>& op) {
p.ValidateInputs(op, {3});
@ -61,7 +63,7 @@ static void CreateSelectOp(Program& p, const std::shared_ptr<ngraph::op::v1::Sel
// Extend input dimensions to the same size as output dimensions by prepending ones
inputDims.insert(inputDims.begin(), outDimsN - inputDimsN, 1ul);
auto targetShape = CldnnTensorFromIEDims(inputDims);
auto targetShape = tensor_from_dims(inputDims);
auto reshapePrim = cldnn::reshape(reshapeName, inputPrimitives[i], targetShape, op->get_friendly_name());
@ -89,4 +91,6 @@ static void CreateSelectOp(Program& p, const std::shared_ptr<ngraph::op::v1::Sel
REGISTER_FACTORY_IMPL(v1, Select);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,14 +2,16 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/shuffle_channels.hpp"
#include "intel_gpu/primitives/shuffle_channels.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static void CreateShuffleChannelsOp(Program& p, const std::shared_ptr<ngraph::op::v0::ShuffleChannels>& op) {
p.ValidateInputs(op, {1, 2});
@ -45,4 +47,6 @@ static void CreateShuffleChannelsOp(Program& p, const std::shared_ptr<ngraph::op
REGISTER_FACTORY_IMPL(v0, ShuffleChannels);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,8 +2,8 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/softmax.hpp"
#include "ngraph/op/log_softmax.hpp"
@ -11,7 +11,9 @@
#include "intel_gpu/primitives/softmax.hpp"
#include "intel_gpu/primitives/activation.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static cldnn::softmax::dimension_t GetSoftmaxAxis(int64_t axis, size_t rank) {
switch (axis) {
@ -73,4 +75,6 @@ static void CreateLogSoftmaxOp(Program& p, const std::shared_ptr<ngraph::op::v5:
REGISTER_FACTORY_IMPL(v1, Softmax);
REGISTER_FACTORY_IMPL(v5, LogSoftmax);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,15 +2,17 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/space_to_batch.hpp"
#include "ngraph/op/constant.hpp"
#include "intel_gpu/primitives/space_to_batch.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static void CreateSpaceToBatchOp(Program& p, const std::shared_ptr<ngraph::op::v1::SpaceToBatch>& op) {
p.ValidateInputs(op, {4});
@ -35,7 +37,7 @@ static void CreateSpaceToBatchOp(Program& p, const std::shared_ptr<ngraph::op::v
}
inputs.emplace_back(format, sizes, default_size);
}
auto out_size = CldnnTensorFromIEDims(op->get_output_shape(0));
auto out_size = tensor_from_dims(op->get_output_shape(0));
auto batchToSpacePrim = cldnn::space_to_batch(layerName,
inputPrimitives[0], // input
@ -51,4 +53,6 @@ static void CreateSpaceToBatchOp(Program& p, const std::shared_ptr<ngraph::op::v
REGISTER_FACTORY_IMPL(v1, SpaceToBatch);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,14 +2,16 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/space_to_depth.hpp"
#include "intel_gpu/primitives/space_to_depth.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static cldnn::space_to_depth::depth_mode GetDepthMode(ngraph::op::v0::SpaceToDepth::SpaceToDepthMode mode) {
switch (mode) {
@ -36,4 +38,6 @@ static void CreateSpaceToDepthOp(Program& p, const std::shared_ptr<ngraph::op::v
REGISTER_FACTORY_IMPL(v0, SpaceToDepth);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,15 +2,17 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/split.hpp"
#include "ngraph/op/variadic_split.hpp"
#include "intel_gpu/primitives/crop.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static void CreateCommonSplitOp(Program& p, const std::shared_ptr<ngraph::Node>& op) {
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
@ -37,8 +39,8 @@ static void CreateCommonSplitOp(Program& p, const std::shared_ptr<ngraph::Node>&
}
NGRAPH_SUPPRESS_DEPRECATED_END
auto outTensor = CldnnTensorFromIEDims(outLayerDims, 1);
auto offsetTensor = CldnnTensorFromIEDims(startOffset, 0);
auto outTensor = tensor_from_dims(outLayerDims, 1);
auto offsetTensor = tensor_from_dims(startOffset, 0);
auto cropPrim = cldnn::crop(outLayerName, inputPrimitives[0], outTensor, offsetTensor, op->get_friendly_name());
p.primitiveIDs[outLayerName] = outLayerName;
@ -71,4 +73,6 @@ static void CreateVariadicSplitOp(Program& p, const std::shared_ptr<ngraph::op::
REGISTER_FACTORY_IMPL(v1, Split);
REGISTER_FACTORY_IMPL(v1, VariadicSplit);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,8 +2,8 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/strided_slice.hpp"
#include "ngraph/op/constant.hpp"
@ -12,7 +12,9 @@
#include "intel_gpu/primitives/reshape.hpp"
#include "intel_gpu/primitives/crop.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static void CreateStridedSliceOp(Program& p, const std::shared_ptr<ngraph::op::v1::StridedSlice>& op) {
p.ValidateInputs(op, {4});
@ -187,7 +189,7 @@ static void CreateStridedSliceOp(Program& p, const std::shared_ptr<ngraph::op::v
auto inPrimitive = inputPrimitives[0];
// Reshape in case of new axis
if (!new_axis_mask.empty()) {
auto targetShape = CldnnTensorFromIEDims(reshape_pattern);
auto targetShape = tensor_from_dims(reshape_pattern);
auto reshapeInName = op->get_friendly_name() + "/Reshape_before";
auto reshapePrim = cldnn::reshape(reshapeInName, inputPrimitives[0], targetShape, op->get_friendly_name());
p.AddPrimitive(reshapePrim);
@ -211,8 +213,8 @@ static void CreateStridedSliceOp(Program& p, const std::shared_ptr<ngraph::op::v
}
cldnn::tensor refSize = CldnnTensorFromIEDims(crop_shape);
cldnn::tensor offSize = CldnnTensorFromIEDims(offset, 0);
cldnn::tensor refSize = tensor_from_dims(crop_shape);
cldnn::tensor offSize = tensor_from_dims(offset, 0);
auto cropPrim = cldnn::crop(layerName, inPrimitive, refSize, offSize, op->get_friendly_name());
@ -221,7 +223,7 @@ static void CreateStridedSliceOp(Program& p, const std::shared_ptr<ngraph::op::v
// Reshape in case of deleting of axis
if (!shrink_axis_mask.empty()) {
auto targetShape = CldnnTensorFromIEDims(output_shape);
auto targetShape = tensor_from_dims(output_shape);
auto reshapeOutName = op->get_friendly_name() + "/Crop";
auto reshapePrim = cldnn::reshape(reshapeOutName, layerName, targetShape, op->get_friendly_name());
p.AddPrimitive(reshapePrim);
@ -249,7 +251,7 @@ static void CreateStridedSliceOp(Program& p, const std::shared_ptr<ngraph::op::v
e = 1 - e;
}
auto out_size = CldnnTensorFromIEDims(op->get_output_shape(0));
auto out_size = tensor_from_dims(op->get_output_shape(0));
auto stridedSlicePrim = cldnn::strided_slice(layerName,
inputPrimitives[0],
@ -269,4 +271,6 @@ static void CreateStridedSliceOp(Program& p, const std::shared_ptr<ngraph::op::v
REGISTER_FACTORY_IMPL(v1, StridedSlice);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,10 +2,9 @@
// SPDX-License-Identifier: Apache-2.0
//
///////////////////////////////////////////////////////////////////////////////////////////////////
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "cldnn_engine.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "intel_gpu/plugin/plugin.hpp"
#include <cpp/ie_cnn_network.h>
@ -24,7 +23,9 @@
using TensorIterator = ngraph::op::v0::TensorIterator;
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
template<class DATA_TYPE>
static DATA_TYPE CreateScalarData(Program &p, const cldnn::primitive_id& id, int64_t num, const cldnn::primitive_id& ext_prim_id) {
@ -39,7 +40,7 @@ static cldnn::mutable_data CreateAdditionalOutputData(Program &p, const std::sha
const int32_t output_idx) {
const auto precision = DataTypeFromPrecision(op->get_output_element_type(output_idx));
const auto format = DefaultFormatForDims(op->get_output_shape(output_idx).size());
const auto tensor = CldnnTensorFromIEDims(op->get_output_shape(output_idx));
const auto tensor = tensor_from_dims(op->get_output_shape(output_idx));
cldnn::layout output_layout = cldnn::layout(precision, format, tensor);
auto mem = p.GetEngine().allocate_memory(output_layout);
auto md = cldnn::mutable_data(id, {input}, mem, op->get_friendly_name()); // cldnn::data cannot set dependency
@ -199,4 +200,6 @@ static void CreateTensorIteratorOp(Program &p, const std::shared_ptr<TensorItera
REGISTER_FACTORY_IMPL(v0, TensorIterator);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,14 +2,16 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/tile.hpp"
#include "intel_gpu/primitives/tile.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static void CreateTileOp(Program& p, const std::shared_ptr<ngraph::op::v0::Tile>& op) {
p.ValidateInputs(op, {2});
@ -18,7 +20,7 @@ static void CreateTileOp(Program& p, const std::shared_ptr<ngraph::op::v0::Tile>
auto tilePrim = cldnn::tile(layerName,
inputPrimitives[0],
CldnnTensorFromIEDims(op->get_output_shape(0)),
tensor_from_dims(op->get_output_shape(0)),
op->get_friendly_name());
p.AddPrimitive(tilePrim);
@ -27,4 +29,6 @@ static void CreateTileOp(Program& p, const std::shared_ptr<ngraph::op::v0::Tile>
REGISTER_FACTORY_IMPL(v0, Tile);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,8 +2,8 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/topk.hpp"
@ -11,7 +11,9 @@
#include "intel_gpu/primitives/mutable_data.hpp"
#include "intel_gpu/runtime/debug_configuration.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static cldnn::arg_max_min::axis_name GetAxis(int32_t axis, size_t in_rank) {
if (in_rank == 5) {
@ -70,7 +72,7 @@ static void CreateTopKOp(Program& p, const std::shared_ptr<ngraph::op::v1::TopK>
cldnn::layout mutableLayout = cldnn::layout(DataTypeFromPrecision(mutable_precision),
DefaultFormatForDims(op->get_output_shape(1).size()),
CldnnTensorFromIEDims(op->get_output_shape(1)));
tensor_from_dims(op->get_output_shape(1)));
GPU_DEBUG_GET_INSTANCE(debug_config);
GPU_DEBUG_IF(debug_config->verbose >= 2) {
@ -130,4 +132,6 @@ static void CreateTopKOp(Program& p, const std::shared_ptr<ngraph::op::v1::TopK>
REGISTER_FACTORY_IMPL(v1, TopK);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,15 +2,17 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/transpose.hpp"
#include "ngraph/op/constant.hpp"
#include "intel_gpu/primitives/permute.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
static void CreateTransposeOp(Program& p, const std::shared_ptr<ngraph::op::v1::Transpose>& op) {
p.ValidateInputs(op, {1, 2});
@ -46,4 +48,6 @@ static void CreateTransposeOp(Program& p, const std::shared_ptr<ngraph::op::v1::
REGISTER_FACTORY_IMPL(v1, Transpose);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,7 +2,7 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "intel_gpu/plugin/program.hpp"
#include "transformations/utils/utils.hpp"
#include "ngraph/op/tanh.hpp"
@ -43,7 +43,9 @@
#include "intel_gpu/primitives/activation.hpp"
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
void CreateUnaryEltwiseOp(Program& p, const std::shared_ptr<ngraph::Node>& op,
cldnn::activation_func func, cldnn::activation_additional_params params) {
@ -313,4 +315,6 @@ REGISTER_FACTORY_IMPL(v0, Sign);
REGISTER_FACTORY_IMPL(v5, HSigmoid);
REGISTER_FACTORY_IMPL(v5, Round);
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -16,11 +16,11 @@
#include <ie_ngraph_utils.hpp>
#include <ie_algorithm.hpp>
#include "cldnn_engine.h"
#include "cldnn_executable_network.h"
#include "cldnn_transformations_pipeline.h"
#include "cldnn_custom_layer.h"
#include "cldnn_itt.h"
#include "intel_gpu/plugin/plugin.hpp"
#include "intel_gpu/plugin/compiled_model.hpp"
#include "intel_gpu/plugin/transformations_pipeline.hpp"
#include "intel_gpu/plugin/custom_layer.hpp"
#include "intel_gpu/plugin/itt.hpp"
#include "gpu/gpu_config.hpp"
#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
@ -41,7 +41,9 @@ using namespace InferenceEngine;
using namespace InferenceEngine::gpu;
using namespace InferenceEngine::details;
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
#define FACTORY_DECLARATION(op_version, op_name) \
void __register ## _ ## op_name ## _ ## op_version();
@ -50,20 +52,20 @@ namespace CLDNNPlugin {
__register ## _ ## op_name ## _ ## op_version();
#define REGISTER_FACTORY(op_version, op_name) FACTORY_DECLARATION(op_version, op_name)
#include "cldnn_primitives_list.hpp"
#include "intel_gpu/plugin/primitives_list.hpp"
#undef REGISTER_FACTORY
void clDNNEngine::RegisterPrimitives() {
void Plugin::RegisterPrimitives() {
#define REGISTER_FACTORY(op_version, op_name) FACTORY_CALL(op_version, op_name)
#include "cldnn_primitives_list.hpp"
#include "intel_gpu/plugin/primitives_list.hpp"
#undef REGISTER_FACTORY
}
struct clDNNEngine::impl {
CLDNNPlugin::Configs m_configs;
struct Plugin::impl {
Configs m_configs;
};
std::string clDNNEngine::GetDeviceIDFromConfig(const std::map<std::string, std::string>& config) const {
std::string Plugin::GetDeviceIDFromConfig(const std::map<std::string, std::string>& config) const {
std::string device_id;
if (config.find(PluginConfigParams::KEY_DEVICE_ID) != config.end()) {
device_id = config.at(PluginConfigParams::KEY_DEVICE_ID);
@ -71,7 +73,7 @@ std::string clDNNEngine::GetDeviceIDFromConfig(const std::map<std::string, std::
return device_id;
}
cldnn::device_info clDNNEngine::GetDeviceInfo(const std::map<std::string, std::string> &config) const {
cldnn::device_info Plugin::GetDeviceInfo(const std::map<std::string, std::string> &config) const {
auto device_info = device_map.begin()->second->get_info();
std::string device_id = GetDeviceIDFromConfig(config);
if (!device_id.empty()) {
@ -84,9 +86,9 @@ cldnn::device_info clDNNEngine::GetDeviceInfo(const std::map<std::string, std::s
return device_info;
}
InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const InferenceEngine::CNNNetwork& network,
const CLDNNPlugin::Config& config) const {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::CloneAndTransformNetwork");
InferenceEngine::CNNNetwork Plugin::CloneAndTransformNetwork(const InferenceEngine::CNNNetwork& network,
const Config& config) const {
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::CloneAndTransformNetwork");
CNNNetwork clonedNetwork = InferenceEngine::details::cloneNetwork(network);
if (clonedNetwork.getFunction()) {
@ -103,11 +105,11 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc
return clonedNetwork;
}
clDNNEngine::clDNNEngine() : m_defaultContext(nullptr) {
Plugin::Plugin() : m_defaultContext(nullptr) {
_pluginName = "GPU";
_impl = std::make_shared<impl>();
RegisterPrimitives();
// try loading clDNN engine and get info from it
// try loading gpu engine and get info from it
{
// Set OCL runtime which should be always available
cldnn::device_query device_query(cldnn::engine_types::ocl, cldnn::runtime_types::ocl);
@ -124,12 +126,12 @@ clDNNEngine::clDNNEngine() : m_defaultContext(nullptr) {
CHAR mpath[MAX_PATH + 1];
HMODULE nModule;
GetModuleHandleEx(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
(LPCSTR)CLDNNCustomLayer::LoadFromFile,
(LPCSTR)CustomLayer::LoadFromFile,
&nModule);
GetModuleFileName(nModule, mpath, sizeof(mpath));
#elif __linux__
Dl_info dl_info;
dladdr(reinterpret_cast<void *>(CLDNNCustomLayer::LoadFromFile), &dl_info);
dladdr(reinterpret_cast<void *>(CustomLayer::LoadFromFile), &dl_info);
const char* mpath = dl_info.dli_fname;
#endif
std::string configFile(mpath);
@ -142,7 +144,7 @@ clDNNEngine::clDNNEngine() : m_defaultContext(nullptr) {
}
config_path += "/cldnn_global_custom_kernels/cldnn_global_custom_kernels.xml";
for (auto& config : _impl->m_configs) {
CLDNNCustomLayer::LoadFromFile(config_path, config.second.customLayers, true);
CustomLayer::LoadFromFile(config_path, config.second.customLayers, true);
}
}
@ -164,8 +166,8 @@ auto check_inputs = [](InferenceEngine::InputsDataMap _networkInputs) {
}
};
void clDNNEngine::UpdateConfig(CLDNNPlugin::Config& conf, const InferenceEngine::CNNNetwork &network, const std::map<std::string, std::string> &params) const {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::UpdateConfig");
void Plugin::UpdateConfig(Config& conf, const InferenceEngine::CNNNetwork &network, const std::map<std::string, std::string> &params) const {
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::UpdateConfig");
auto device_info = GetDeviceInfo(params);
conf.enableInt8 = device_info.supports_imad || device_info.supports_immad;
conf.UpdateFromMap(params);
@ -174,8 +176,8 @@ void clDNNEngine::UpdateConfig(CLDNNPlugin::Config& conf, const InferenceEngine:
}
}
void clDNNEngine::UpdateStatistics(const CLDNNRemoteCLContext::Ptr& context) const {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::UpdateStatistics");
void Plugin::UpdateStatistics(const RemoteCLContext::Ptr& context) const {
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::UpdateStatistics");
{
std::lock_guard<std::mutex> lock(engine_mutex);
@ -193,9 +195,9 @@ void clDNNEngine::UpdateStatistics(const CLDNNRemoteCLContext::Ptr& context) con
}
}
std::map<std::string, std::string> clDNNEngine::ConvertPerfHintsToConfig(
std::map<std::string, std::string> Plugin::ConvertPerfHintsToConfig(
const std::map<std::string, std::string>& network_config,
const CLDNNPlugin::Config& plugin_config) const {
const Config& plugin_config) const {
// deduces the actual settings from the performance hints and returns fully-defined config
auto config = network_config;
const auto &mode = config.find(PluginConfigParams::KEY_PERFORMANCE_HINT);
@ -223,21 +225,21 @@ std::map<std::string, std::string> clDNNEngine::ConvertPerfHintsToConfig(
return config;
}
IExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network,
IExecutableNetworkInternal::Ptr Plugin::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network,
const std::map<std::string, std::string> &orig_config) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::LoadExeNetworkImpl");
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::LoadExeNetworkImpl");
// verification of supported input
InferenceEngine::InputsDataMap _networkInputs = network.getInputsInfo();
check_inputs(_networkInputs);
CLDNNPlugin::Configs confs = _impl->m_configs;
Configs confs = _impl->m_configs;
std::string device_id = GetDeviceIDFromConfig(orig_config);
CLDNNPlugin::Config conf = confs.GetConfig(device_id);
Config conf = confs.GetConfig(device_id);
auto config = ConvertPerfHintsToConfig(orig_config, conf);
UpdateConfig(conf, network, config);
CLDNNRemoteCLContext::Ptr context;
RemoteCLContext::Ptr context;
auto canReuseDefaultContext = [&]() -> bool {
if (m_defaultContext == nullptr)
@ -263,10 +265,10 @@ IExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceE
};
{
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::LoadExeNetworkImpl::CreateContext");
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::LoadExeNetworkImpl::CreateContext");
std::lock_guard<std::mutex> lock(engine_mutex);
if (!canReuseDefaultContext()) {
m_defaultContext.reset(new CLDNNRemoteCLContext(shared_from_this(), ParamMap(), conf));
m_defaultContext.reset(new RemoteCLContext(shared_from_this(), ParamMap(), conf));
}
}
@ -274,16 +276,16 @@ IExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceE
auto transformedNetwork = CloneAndTransformNetwork(network, conf);
{
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::LoadExeNetworkImpl::CreateExeNetwork");
CLDNNExecNetwork::Ptr exeNetwork = std::make_shared<CLDNNExecNetwork>(transformedNetwork, context, conf);
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::LoadExeNetworkImpl::CreateExeNetwork");
CompiledModel::Ptr exeNetwork = std::make_shared<CompiledModel>(transformedNetwork, context, conf);
UpdateStatistics(context);
return exeNetwork;
}
}
IExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network,
const RemoteContext::Ptr &context,
const std::map<std::string, std::string> &orig_config) {
IExecutableNetworkInternal::Ptr Plugin::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network,
const InferenceEngine::RemoteContext::Ptr &context,
const std::map<std::string, std::string> &orig_config) {
InferenceEngine::InputsDataMap _networkInputs = network.getInputsInfo();
check_inputs(_networkInputs);
@ -292,39 +294,39 @@ IExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceE
IE_THROW() << "Invalid context";
}
CLDNNPlugin::Config conf = getContextImpl(casted)->GetConfig();
Config conf = getContextImpl(casted)->GetConfig();
auto config = ConvertPerfHintsToConfig(orig_config, conf);
UpdateConfig(conf, network, config);
auto transformedNetwork = CloneAndTransformNetwork(network, conf);
return std::make_shared<CLDNNExecNetwork>(transformedNetwork, casted, conf);
return std::make_shared<CompiledModel>(transformedNetwork, casted, conf);
}
RemoteContext::Ptr clDNNEngine::CreateContext(const ParamMap& params) {
InferenceEngine::RemoteContext::Ptr Plugin::CreateContext(const ParamMap& params) {
// parameter map is non-empty
std::string contextTypeStr = _StrFromParams(params, GPU_PARAM_KEY(CONTEXT_TYPE));
if (GPU_PARAM_VALUE(OCL) == contextTypeStr) {
return std::make_shared<CLDNNRemoteCLContext>(shared_from_this(), params, _impl->m_configs.GetDefaultDeviceConfig());
return std::make_shared<RemoteCLContext>(shared_from_this(), params, _impl->m_configs.GetDefaultDeviceConfig());
} else if (GPU_PARAM_VALUE(VA_SHARED) == contextTypeStr) {
#ifdef _WIN32
return std::make_shared<CLDNNRemoteD3DContext>(shared_from_this(), params, _impl->m_configs.GetDefaultDeviceConfig());
return std::make_shared<RemoteD3DContext>(shared_from_this(), params, _impl->m_configs.GetDefaultDeviceConfig());
#else
return std::make_shared<CLDNNRemoteVAContext>(shared_from_this(), params, _impl->m_configs.GetDefaultDeviceConfig());
return std::make_shared<RemoteVAContext>(shared_from_this(), params, _impl->m_configs.GetDefaultDeviceConfig());
#endif
} else {
IE_THROW() << "Invalid remote context type" << contextTypeStr;
}
}
RemoteContext::Ptr clDNNEngine::GetDefaultContext(const ParamMap& params) {
InferenceEngine::RemoteContext::Ptr Plugin::GetDefaultContext(const ParamMap& params) {
if (nullptr == m_defaultContext) {
m_defaultContext.reset(new CLDNNRemoteCLContext(shared_from_this(), params, _impl->m_configs.GetDefaultDeviceConfig()));
m_defaultContext.reset(new RemoteCLContext(shared_from_this(), params, _impl->m_configs.GetDefaultDeviceConfig()));
}
return m_defaultContext;
}
void clDNNEngine::SetConfig(const std::map<std::string, std::string> &config) {
void Plugin::SetConfig(const std::map<std::string, std::string> &config) {
streamsSet = (config.find(PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS) != config.end());
throttlingSet = config.find(GPUConfigParams::KEY_GPU_PLUGIN_THROTTLE) != config.end() ||
config.find(CLDNNConfigParams::KEY_CLDNN_PLUGIN_THROTTLE) != config.end();
@ -345,18 +347,18 @@ void clDNNEngine::SetConfig(const std::map<std::string, std::string> &config) {
}
}
QueryNetworkResult clDNNEngine::QueryNetwork(const CNNNetwork& network,
const std::map<std::string, std::string>& config) const {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::QueryNetwork");
QueryNetworkResult Plugin::QueryNetwork(const CNNNetwork& network,
const std::map<std::string, std::string>& config) const {
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::QueryNetwork");
QueryNetworkResult res;
CLDNNPlugin::Configs confs = _impl->m_configs;
Configs confs = _impl->m_configs;
std::string device_id = GetDeviceIDFromConfig(config);
CLDNNPlugin::Config conf = confs.GetConfig(device_id);
Config conf = confs.GetConfig(device_id);
UpdateConfig(conf, network, config);
if (m_defaultContext == nullptr) {
m_defaultContext.reset(new CLDNNRemoteCLContext(
m_defaultContext.reset(new RemoteCLContext(
std::const_pointer_cast<InferenceEngine::IInferencePlugin>(shared_from_this()),
ParamMap(), conf));
}
@ -568,8 +570,8 @@ QueryNetworkResult clDNNEngine::QueryNetwork(const CNNNetwork& network,
return res;
}
Parameter clDNNEngine::GetConfig(const std::string& name, const std::map<std::string, Parameter>& options) const {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::GetConfig");
Parameter Plugin::GetConfig(const std::string& name, const std::map<std::string, Parameter>& options) const {
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::GetConfig");
Parameter result;
std::string device_id;
@ -655,8 +657,8 @@ static float GetGOPS(cldnn::device_info info, cldnn::data_types dt) {
return freqGHz * opsPerComputeBlock * computeBlockIPC * numEUs;
}
Parameter clDNNEngine::GetMetric(const std::string& name, const std::map<std::string, Parameter>& options) const {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::GetMetric");
Parameter Plugin::GetMetric(const std::string& name, const std::map<std::string, Parameter>& options) const {
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::GetMetric");
std::string device_id = GetConfig(CONFIG_KEY(DEVICE_ID), options);
auto iter = device_map.find(device_id);
@ -813,7 +815,7 @@ Parameter clDNNEngine::GetMetric(const std::string& name, const std::map<std::st
InferenceEngine::CNNNetwork network(model);
size_t base_batch_size = 16; // empirically decided for DG1
auto engine_params = clDNNEngine::GetEngineParams(config, iter->second, nullptr);
auto engine_params = Plugin::GetParams(config, iter->second, nullptr);
auto engine = cldnn::engine::create(engine_params.engine_type, engine_params.runtime_type, iter->second,
cldnn::engine_configuration(false, engine_params.queue_type, std::string(),
config.queuePriority, config.queueThrottle, config.memory_pool_on,
@ -835,7 +837,7 @@ Parameter clDNNEngine::GetMetric(const std::string& name, const std::map<std::st
for (auto& info : inputs_info) {
if (!info.second)
continue;
Layout layout = info.second->getLayout();
InferenceEngine::Layout layout = info.second->getLayout();
auto data = info.second->getInputData();
if (!data)
continue;
@ -885,7 +887,9 @@ Parameter clDNNEngine::GetMetric(const std::string& name, const std::map<std::st
IE_THROW() << "Unsupported metric key " << name;
}
}
}; // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov
static const Version version = { {2, 1}, CI_BUILD_NUMBER, "clDNNPlugin" };
IE_DEFINE_PLUGIN_CREATE_FUNCTION(CLDNNPlugin::clDNNEngine, version)
static const Version version = { {2, 1}, CI_BUILD_NUMBER, "Intel GPU plugin" };
IE_DEFINE_PLUGIN_CREATE_FUNCTION(ov::runtime::intel_gpu::Plugin, version)

View File

@ -2,16 +2,18 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "intel_gpu/plugin/program.hpp"
#include "ngraph/ops.hpp"
#include "ngraph_ops/nms_ie_internal.hpp"
#include "cldnn_itt.h"
#include "intel_gpu/plugin/itt.hpp"
#include "intel_gpu/runtime/debug_configuration.hpp"
using namespace InferenceEngine;
using namespace InferenceEngine::details;
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
const cldnn::primitive_id Program::m_preProcessTag("_cldnn_input_preprocess");
const cldnn::primitive_id Program::m_meanValuesTag("_cldnn_mean_values");
@ -177,7 +179,7 @@ std::shared_ptr<cldnn::program> Program::BuildProgram(const std::vector<std::sha
InferenceEngine::InputsDataMap networkInputs,
InferenceEngine::OutputsDataMap networkOutputs,
bool createTopologyOnly, bool partialBuild) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "Program::BuildProgram");
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Program::BuildProgram");
cldnn::build_options options;
if (!m_config.graph_dumps_dir.empty()) {
@ -196,7 +198,7 @@ std::shared_ptr<cldnn::program> Program::BuildProgram(const std::vector<std::sha
if (createTopologyOnly) {
return {};
} else {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "Program::CreateProgram");
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Program::CreateProgram");
auto program = cldnn::program::build_program(*m_engine, *m_topology, options);
CleanupBuild();
@ -205,7 +207,7 @@ std::shared_ptr<cldnn::program> Program::BuildProgram(const std::vector<std::sha
}
bool Program::IsOpSupported(const InferenceEngine::CNNNetwork& network, const std::shared_ptr<ngraph::Node>& op) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "Program::IsOpSupported");
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Program::IsOpSupported");
cldnn::topology topology;
try {
// Query mode disables checks that input primitives are created,
@ -232,7 +234,7 @@ bool Program::IsOpSupported(const InferenceEngine::CNNNetwork& network, const st
}
void Program::CreateSingleLayerPrimitive(cldnn::topology& topology, const std::shared_ptr<ngraph::Node>& op) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "Program::CreateSingleLayerPrimitive");
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Program::CreateSingleLayerPrimitive");
InitProfileInfo(op->get_friendly_name(), op->get_type_name());
GPU_DEBUG_GET_INSTANCE(debug_config);
@ -355,4 +357,6 @@ bool IsNodeOnConstPath(const std::shared_ptr<ngraph::Node>& node) {
return is_const_node(node);
}
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -3,19 +3,21 @@
//
#include <memory>
#include "cldnn_remote_context.h"
#include "cldnn_itt.h"
#include "cldnn_engine.h"
#include "intel_gpu/plugin/remote_context.hpp"
#include "intel_gpu/plugin/itt.hpp"
#include "intel_gpu/plugin/plugin.hpp"
#include "intel_gpu/runtime/device_query.hpp"
using namespace InferenceEngine;
using namespace InferenceEngine::gpu;
using namespace InferenceEngine::details;
namespace CLDNNPlugin {
CLDNNRemoteAllocator CLDNNRemoteBlobImpl::m_allocator;
namespace ov {
namespace runtime {
namespace intel_gpu {
RemoteAllocator RemoteBlobImpl::m_allocator;
CLDNNRemoteBlobImpl::CLDNNRemoteBlobImpl(ClContext::Ptr context,
RemoteBlobImpl::RemoteBlobImpl(ClContext::Ptr context,
cldnn::stream& stream,
const cldnn::layout& layout,
cldnn::shared_handle mem,
@ -26,7 +28,7 @@ CLDNNRemoteBlobImpl::CLDNNRemoteBlobImpl(ClContext::Ptr context,
_handle(nullptr), _allocator(nullptr), m_memObject(nullptr), lockedHolder(nullptr) {
}
ParamMap CLDNNRemoteBlobImpl::getParams() const {
ParamMap RemoteBlobImpl::getParams() const {
assert(m_memObject != nullptr);
auto params = m_memObject->get_internal_params();
@ -86,21 +88,21 @@ ParamMap CLDNNRemoteBlobImpl::getParams() const {
}
}
bool CLDNNRemoteBlobImpl::deallocate() noexcept {
bool RemoteBlobImpl::deallocate() noexcept {
m_memObject.reset();
return m_memObject == nullptr;
}
bool CLDNNRemoteBlobImpl::is_allocated() const noexcept {
bool RemoteBlobImpl::is_allocated() const noexcept {
return m_memObject != nullptr;
}
bool CLDNNRemoteBlobImpl::is_locked() const noexcept {
bool RemoteBlobImpl::is_locked() const noexcept {
return lockedHolder != nullptr;
}
void CLDNNRemoteBlobImpl::allocate() {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNRemoteBlobImpl::Allocate");
void RemoteBlobImpl::allocate() {
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "RemoteBlobImpl::Allocate");
assert(m_memObject == nullptr);
auto _impl = getContextImpl(m_context.lock());
@ -153,22 +155,22 @@ void CLDNNRemoteBlobImpl::allocate() {
_impl->release_lock();
}
const std::shared_ptr<IAllocator>& CLDNNRemoteBlobImpl::getAllocator() const noexcept {
const std::shared_ptr<IAllocator>& RemoteBlobImpl::getAllocator() const noexcept {
if (!_allocator) {
_allocator = std::shared_ptr<IAllocator>(&m_allocator, [] (IAllocator*) {});
}
return _allocator;
};
std::string CLDNNRemoteBlobImpl::getDeviceName() const noexcept {
std::string RemoteBlobImpl::getDeviceName() const noexcept {
return getContextImpl(m_context.lock())->getDeviceName();
};
std::shared_ptr<RemoteContext> CLDNNRemoteBlobImpl::getContext() const noexcept {
std::shared_ptr<InferenceEngine::RemoteContext> RemoteBlobImpl::getContext() const noexcept {
return m_context.lock();
}
void CLDNNRemoteBlobImpl::lock() const {
void RemoteBlobImpl::lock() const {
if (!is_allocated()) {
IE_THROW(NotAllocated) << "[GPU] Remote blob can't be locked as it's not allocated";
}
@ -178,36 +180,36 @@ void CLDNNRemoteBlobImpl::lock() const {
m_allocator.regLockedBlob(_handle, this);
}
void CLDNNRemoteBlobImpl::unlock() const {
void RemoteBlobImpl::unlock() const {
lockedHolder.reset();
}
LockedMemory<void> CLDNNRemoteBlobImpl::buffer() noexcept {
LockedMemory<void> RemoteBlobImpl::buffer() noexcept {
lock();
return LockedMemory<void>(reinterpret_cast<IAllocator*>(&m_allocator), _handle, 0);
}
LockedMemory<const void> CLDNNRemoteBlobImpl::cbuffer() const noexcept {
LockedMemory<const void> RemoteBlobImpl::cbuffer() const noexcept {
lock();
return LockedMemory<const void>(reinterpret_cast<IAllocator*>(&m_allocator), _handle, 0);
}
LockedMemory<void> CLDNNRemoteBlobImpl::rwmap()noexcept {
LockedMemory<void> RemoteBlobImpl::rwmap()noexcept {
lock();
return LockedMemory<void>(reinterpret_cast<IAllocator *>(&m_allocator), _handle, 0);
}
LockedMemory<const void> CLDNNRemoteBlobImpl::rmap() const noexcept {
LockedMemory<const void> RemoteBlobImpl::rmap() const noexcept {
lock();
return LockedMemory<const void>(reinterpret_cast<IAllocator *>(&m_allocator), _handle, 0);
}
LockedMemory<void> CLDNNRemoteBlobImpl::wmap()noexcept {
LockedMemory<void> RemoteBlobImpl::wmap()noexcept {
lock();
return LockedMemory<void>(reinterpret_cast<IAllocator *>(&m_allocator), _handle, 0);
}
void CLDNNRemoteAllocator::regLockedBlob(void* handle, const CLDNNRemoteBlobImpl* blob) {
void RemoteAllocator::regLockedBlob(void* handle, const RemoteBlobImpl* blob) {
acquire_lock();
auto iter = m_lockedBlobs.find(handle);
if (iter == m_lockedBlobs.end()) {
@ -216,7 +218,7 @@ void CLDNNRemoteAllocator::regLockedBlob(void* handle, const CLDNNRemoteBlobImpl
release_lock();
}
void CLDNNRemoteAllocator::unlock(void* handle) noexcept {
void RemoteAllocator::unlock(void* handle) noexcept {
acquire_lock();
auto iter = m_lockedBlobs.find(handle);
if (iter != m_lockedBlobs.end()) {
@ -226,7 +228,7 @@ void CLDNNRemoteAllocator::unlock(void* handle) noexcept {
release_lock();
}
CLDNNExecutionContextImpl::CLDNNExecutionContextImpl(const std::shared_ptr<IInferencePlugin> plugin,
ExecutionContextImpl::ExecutionContextImpl(const std::shared_ptr<IInferencePlugin> plugin,
const ParamMap& params,
const Config& config) :
m_plugin(plugin),
@ -278,7 +280,7 @@ CLDNNExecutionContextImpl::CLDNNExecutionContextImpl(const std::shared_ptr<IInfe
(m_config.tuningConfig.mode == cldnn::tuning_mode::tuning_tune_and_cache) ||
(m_config.tuningConfig.mode == cldnn::tuning_mode::tuning_retune_and_cache));
auto engine_params = clDNNEngine::GetEngineParams(m_config, dev, m_external_queue);
auto engine_params = Plugin::GetParams(m_config, dev, m_external_queue);
m_engine = cldnn::engine::create(engine_params.engine_type,
engine_params.runtime_type, dev,
cldnn::engine_configuration(enable_profiling,
@ -293,7 +295,7 @@ CLDNNExecutionContextImpl::CLDNNExecutionContextImpl(const std::shared_ptr<IInfe
engine_params.task_executor);
}
ParamMap CLDNNExecutionContextImpl::getParams() const {
ParamMap ExecutionContextImpl::getParams() const {
ParamMap ret = { { GPU_PARAM_KEY(OCL_CONTEXT), m_engine->get_user_context() } };
switch (m_type) {
@ -312,7 +314,7 @@ ParamMap CLDNNExecutionContextImpl::getParams() const {
return ret;
}
std::string CLDNNExecutionContextImpl::getDeviceName() const noexcept {
std::string ExecutionContextImpl::getDeviceName() const noexcept {
auto devName = m_plugin.lock()->GetName();
auto engine_type = cldnn::engine_types::ocl;
@ -334,4 +336,6 @@ std::string CLDNNExecutionContextImpl::getDeviceName() const noexcept {
return devName;
}
}; // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@ -2,7 +2,7 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "simple_math.h"
#include "intel_gpu/plugin/simple_math.hpp"
#include <cctype>
#include <string>
#include <set>

View File

@ -12,7 +12,7 @@
#include <cctype>
#include <memory>
#include "cldnn_transformations_pipeline.h"
#include "intel_gpu/plugin/transformations_pipeline.hpp"
#include "ie_metric_helpers.hpp"
#include "ie_plugin_config.hpp"
@ -86,7 +86,7 @@
#include <low_precision/strided_slice.hpp>
#include <low_precision/network_helper.hpp>
#include "cldnn_itt.h"
#include "intel_gpu/plugin/itt.hpp"
namespace {
template<typename T>
@ -99,10 +99,12 @@ static bool disableReduceDecomposition(const std::shared_ptr<const ngraph::Node>
}
} // namespace
namespace CLDNNPlugin {
namespace ov {
namespace runtime {
namespace intel_gpu {
void TransformationsPipeline::apply(std::shared_ptr<ov::Function> func) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "TransformationsPipeline::apply");
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "TransformationsPipeline::apply");
using const_node_ptr = const std::shared_ptr<const ngraph::Node>;
bool use_onednn = false;
@ -336,7 +338,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Function> func) {
}
if (enableInt8) {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "TransformationsPipeline::apply::lpt");
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "TransformationsPipeline::apply::lpt");
using namespace ngraph::pass::low_precision;
// Conversion to FP32 might be needed for quantized models that face any fp16 related issues (e.g. overflow) for non-quantized layers
@ -421,7 +423,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Function> func) {
}
{
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "TransformationsPipeline::apply::run_passes");
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "TransformationsPipeline::apply::run_passes");
ngraph::pass::Manager manager;
// This ConstantFolding pass is added to fold reshapes added for constant inputs on NMS internal operation which prevents upper-bound calculation
// TODO: check why we have these reshapes
@ -442,4 +444,6 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Function> func) {
manager.run_passes(func);
}
}
} // namespace CLDNNPlugin
} // namespace intel_gpu
} // namespace runtime
} // namespace ov