[GPU] Plugin refactoring (#9068)
* [GPU] Plugin files renaming * [GPU] Updated plugin namespace to ov::runtime::intel_gpu * [GPU] Renamed plugin classes to get rid of cldnn prefix
This commit is contained in:
parent
ee4643d97e
commit
e04ca1516d
@ -16,14 +16,14 @@ if(ENABLE_GPU_DEBUG_CAPS)
|
||||
add_definitions(-DGPU_DEBUG_CONFIG=1)
|
||||
endif()
|
||||
|
||||
file(GLOB_RECURSE PLUGIN_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/src/plugin/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/include/intel_gpu/plugin/*.h)
|
||||
file(GLOB_RECURSE PLUGIN_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/src/plugin/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/include/intel_gpu/plugin/*.hpp)
|
||||
|
||||
addVersionDefines(src/plugin/cldnn_engine.cpp CI_BUILD_NUMBER CLDNN_VERSION)
|
||||
addVersionDefines(src/plugin/plugin.cpp CI_BUILD_NUMBER)
|
||||
|
||||
ie_add_plugin(NAME ${TARGET_NAME}
|
||||
DEVICE_NAME "GPU"
|
||||
SOURCES ${PLUGIN_SOURCES}
|
||||
VERSION_DEFINES_FOR src/plugin/cldnn_engine.cpp)
|
||||
VERSION_DEFINES_FOR src/plugin/plugin.cpp)
|
||||
|
||||
target_compile_options(${TARGET_NAME} PRIVATE
|
||||
$<$<CONFIG:Release>:$<IF:$<CXX_COMPILER_ID:MSVC>,/Os,-Os>>)
|
||||
@ -35,7 +35,7 @@ target_link_libraries(${TARGET_NAME} PRIVATE ov_intel_gpu_graph
|
||||
ngraph)
|
||||
|
||||
target_include_directories(${TARGET_NAME} PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/intel_gpu/plugin/
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/
|
||||
$<TARGET_PROPERTY:inference_engine_transformations,INTERFACE_INCLUDE_DIRECTORIES>)
|
||||
|
||||
set_target_properties(${TARGET_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO})
|
||||
|
@ -0,0 +1,36 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <cpp_interfaces/impl/ie_infer_async_request_thread_safe_default.hpp>
|
||||
#include "intel_gpu/plugin/infer_request.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
class AsyncInferRequest : public InferenceEngine::AsyncInferRequestThreadSafeDefault {
|
||||
public:
|
||||
using Parent = InferenceEngine::AsyncInferRequestThreadSafeDefault;
|
||||
AsyncInferRequest(const InferRequest::Ptr &inferRequest,
|
||||
const InferenceEngine::ITaskExecutor::Ptr& taskExecutor,
|
||||
const InferenceEngine::ITaskExecutor::Ptr& waitExecutor,
|
||||
const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor);
|
||||
|
||||
~AsyncInferRequest();
|
||||
|
||||
void Infer_ThreadUnsafe() override;
|
||||
void StartAsync_ThreadUnsafe() override;
|
||||
|
||||
private:
|
||||
InferRequest::Ptr _inferRequest;
|
||||
InferenceEngine::ITaskExecutor::Ptr _waitExecutor;
|
||||
};
|
||||
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
@ -1,32 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <cpp_interfaces/impl/ie_infer_async_request_thread_safe_default.hpp>
|
||||
#include "cldnn_infer_request.h"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
|
||||
class CLDNNAsyncInferRequest : public InferenceEngine::AsyncInferRequestThreadSafeDefault {
|
||||
public:
|
||||
using Parent = InferenceEngine::AsyncInferRequestThreadSafeDefault;
|
||||
CLDNNAsyncInferRequest(const CLDNNInferRequest::Ptr &inferRequest,
|
||||
const InferenceEngine::ITaskExecutor::Ptr& taskExecutor,
|
||||
const InferenceEngine::ITaskExecutor::Ptr& waitExecutor,
|
||||
const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor);
|
||||
|
||||
~CLDNNAsyncInferRequest();
|
||||
|
||||
void Infer_ThreadUnsafe() override;
|
||||
void StartAsync_ThreadUnsafe() override;
|
||||
|
||||
private:
|
||||
CLDNNInferRequest::Ptr _inferRequest;
|
||||
InferenceEngine::ITaskExecutor::Ptr _waitExecutor;
|
||||
};
|
||||
|
||||
} // namespace CLDNNPlugin
|
@ -1,20 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
/**
|
||||
* @brief Defines openvino domains for tracing
|
||||
* @file cldnn_itt.h
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <openvino/itt.hpp>
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace itt {
|
||||
namespace domains {
|
||||
OV_ITT_DOMAIN(CLDNNPlugin);
|
||||
}
|
||||
}
|
||||
}
|
@ -9,11 +9,13 @@
|
||||
|
||||
#include "ngraph/type/element_type.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
#define TensorValue(val) static_cast<cldnn::tensor::value_type>(val)
|
||||
|
||||
const auto CldnnTensorFromIEDims = [](const InferenceEngine::SizeVector& dims, int def = 1) {
|
||||
inline cldnn::tensor tensor_from_dims(const InferenceEngine::SizeVector& dims, int def = 1) {
|
||||
switch (dims.size()) {
|
||||
case 0: return cldnn::tensor(cldnn::batch(def), cldnn::feature(def), cldnn::spatial(def, def));
|
||||
case 1: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(def), cldnn::spatial(def, def));
|
||||
@ -22,9 +24,9 @@ const auto CldnnTensorFromIEDims = [](const InferenceEngine::SizeVector& dims, i
|
||||
case 4: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(dims[3], dims[2]));
|
||||
case 5: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(dims[4], dims[3], dims[2]));
|
||||
case 6: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(dims[5], dims[4], dims[3], dims[2]));
|
||||
default: IE_THROW() << "Invalid dimensions size(" << dims.size() << ") for clDNN tensor";
|
||||
default: IE_THROW() << "Invalid dimensions size(" << dims.size() << ") for gpu tensor";
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
inline cldnn::data_types DataTypeFromPrecision(InferenceEngine::Precision p) {
|
||||
switch (p) {
|
||||
@ -185,4 +187,6 @@ inline std::vector<uint16_t> ConvertPermuteOrder(const std::vector<uint16_t>& ie
|
||||
return cldnn_order;
|
||||
}
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
@ -13,17 +13,19 @@
|
||||
#include "ie_blob.h"
|
||||
#include "cpp/ie_cnn_network.h"
|
||||
#include <cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp>
|
||||
#include "cldnn_graph.h"
|
||||
#include "cldnn_config.h"
|
||||
#include "cldnn_remote_context.h"
|
||||
#include "intel_gpu/plugin/graph.hpp"
|
||||
#include "intel_gpu/plugin/device_config.hpp"
|
||||
#include "intel_gpu/plugin/remote_context.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
class CLDNNExecNetwork : public InferenceEngine::ExecutableNetworkThreadSafeDefault {
|
||||
class CompiledModel : public InferenceEngine::ExecutableNetworkThreadSafeDefault {
|
||||
public:
|
||||
typedef std::shared_ptr<CLDNNExecNetwork> Ptr;
|
||||
typedef std::shared_ptr<CompiledModel> Ptr;
|
||||
|
||||
CLDNNExecNetwork(InferenceEngine::CNNNetwork &network, std::shared_ptr<InferenceEngine::RemoteContext> context, Config config);
|
||||
CompiledModel(InferenceEngine::CNNNetwork &network, std::shared_ptr<InferenceEngine::RemoteContext> context, Config config);
|
||||
|
||||
std::shared_ptr<ngraph::Function> GetExecGraphInfo() override;
|
||||
InferenceEngine::IInferRequestInternal::Ptr CreateInferRequest() override;
|
||||
@ -36,11 +38,13 @@ public:
|
||||
InferenceEngine::Parameter GetConfig(const std::string &name) const override;
|
||||
std::shared_ptr<InferenceEngine::RemoteContext> GetContext() const override;
|
||||
|
||||
std::vector<std::shared_ptr<CLDNNGraph>> m_graphs;
|
||||
std::vector<std::shared_ptr<Graph>> m_graphs;
|
||||
InferenceEngine::gpu::ClContext::Ptr m_context;
|
||||
Config m_config;
|
||||
InferenceEngine::ITaskExecutor::Ptr m_taskExecutor;
|
||||
InferenceEngine::ITaskExecutor::Ptr m_waitExecutor;
|
||||
};
|
||||
|
||||
}; // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
@ -12,15 +12,17 @@
|
||||
#include "pugixml.hpp"
|
||||
#include "intel_gpu/runtime/tensor.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
using CLDNNCustomLayerPtr = std::shared_ptr<class CLDNNCustomLayer>;
|
||||
using CLDNNCustomLayerMap = std::map<std::string, CLDNNCustomLayerPtr>;
|
||||
class CLDNNCustomLayer{
|
||||
using CustomLayerPtr = std::shared_ptr<class CustomLayer>;
|
||||
using CustomLayerMap = std::map<std::string, CustomLayerPtr>;
|
||||
class CustomLayer{
|
||||
public:
|
||||
static void LoadFromFile(
|
||||
const std::string configFile,
|
||||
CLDNNCustomLayerMap& customLayers,
|
||||
CustomLayerMap& customLayers,
|
||||
bool can_be_missed = false);
|
||||
|
||||
typedef enum {
|
||||
@ -57,8 +59,8 @@ public:
|
||||
int InputDimSourceIndex() { return m_wgDimInputIdx; }
|
||||
|
||||
protected:
|
||||
CLDNNCustomLayer() : m_wgDimInputIdx(0) {}
|
||||
explicit CLDNNCustomLayer(const std::string dirname) : m_configDir(dirname), m_wgDimInputIdx(0) {}
|
||||
CustomLayer() : m_wgDimInputIdx(0) {}
|
||||
explicit CustomLayer(const std::string dirname) : m_configDir(dirname), m_wgDimInputIdx(0) {}
|
||||
|
||||
bool Error() const { return m_ErrorMessage.length() > 0; }
|
||||
void LoadSingleLayer(const pugi::xml_node& node);
|
||||
@ -82,4 +84,6 @@ protected:
|
||||
std::string m_ErrorMessage;
|
||||
};
|
||||
|
||||
}; // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
@ -7,12 +7,14 @@
|
||||
#include <map>
|
||||
#include <string>
|
||||
|
||||
#include "cldnn_custom_layer.h"
|
||||
#include "intel_gpu/plugin/custom_layer.hpp"
|
||||
#include <ie_performance_hints.hpp>
|
||||
#include "intel_gpu/graph/network.hpp"
|
||||
#include <threading/ie_cpu_streams_executor.hpp>
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
struct Config {
|
||||
Config(std::string device_id = "0") : device_id(device_id),
|
||||
@ -61,7 +63,7 @@ struct Config {
|
||||
cldnn::priority_mode_types queuePriority;
|
||||
cldnn::throttle_mode_types queueThrottle;
|
||||
int max_dynamic_batch;
|
||||
CLDNNCustomLayerMap customLayers;
|
||||
CustomLayerMap customLayers;
|
||||
cldnn::tuning_config_options tuningConfig;
|
||||
std::string graph_dumps_dir;
|
||||
std::string sources_dumps_dir;
|
||||
@ -93,4 +95,6 @@ private:
|
||||
std::map<std::string, Config> configs;
|
||||
};
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
@ -21,24 +21,26 @@
|
||||
#include "intel_gpu/graph/topology.hpp"
|
||||
|
||||
#include <cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp>
|
||||
#include "cldnn_custom_layer.h"
|
||||
#include "cldnn_config.h"
|
||||
#include "cldnn_remote_context.h"
|
||||
#include "cldnn_program.h"
|
||||
#include "intel_gpu/plugin/custom_layer.hpp"
|
||||
#include "intel_gpu/plugin/device_config.hpp"
|
||||
#include "intel_gpu/plugin/remote_context.hpp"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
class CLDNNGraph {
|
||||
class Graph {
|
||||
public:
|
||||
enum class Stage : uint32_t {
|
||||
PREPROC = 1,
|
||||
EXECUTE = 2,
|
||||
POSTPROC = 4
|
||||
};
|
||||
typedef std::shared_ptr<CLDNNGraph> Ptr;
|
||||
typedef std::shared_ptr<Graph> Ptr;
|
||||
|
||||
CLDNNGraph(InferenceEngine::CNNNetwork& network, InferenceEngine::gpu::ClContext::Ptr context, Config config, uint16_t stream_id = 0);
|
||||
explicit CLDNNGraph(std::shared_ptr<CLDNNGraph> graph, uint16_t stream_id = 0);
|
||||
Graph(InferenceEngine::CNNNetwork& network, InferenceEngine::gpu::ClContext::Ptr context, Config config, uint16_t stream_id = 0);
|
||||
explicit Graph(std::shared_ptr<Graph> graph, uint16_t stream_id = 0);
|
||||
std::shared_ptr<ngraph::Function> GetExecGraphInfo();
|
||||
|
||||
bool IsLoaded() const;
|
||||
@ -102,4 +104,6 @@ protected:
|
||||
bool filter_const_primitives = true);
|
||||
};
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
@ -9,21 +9,23 @@
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <atomic>
|
||||
#include "cldnn_graph.h"
|
||||
#include "intel_gpu/plugin/graph.hpp"
|
||||
#include <threading/ie_istreams_executor.hpp>
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
struct buf_info {
|
||||
size_t buf_offset;
|
||||
size_t buf_size;
|
||||
};
|
||||
|
||||
class CLDNNExecNetwork;
|
||||
class CompiledModel;
|
||||
|
||||
class CLDNNInferRequest : public InferenceEngine::IInferRequestInternal {
|
||||
class InferRequest : public InferenceEngine::IInferRequestInternal {
|
||||
public:
|
||||
using Ptr = std::shared_ptr<CLDNNInferRequest>;
|
||||
using Ptr = std::shared_ptr<InferRequest>;
|
||||
// make sure all blobs and cldnn::memory objects
|
||||
// are in place and valid
|
||||
void checkBlobs() override;
|
||||
@ -31,21 +33,21 @@ public:
|
||||
|
||||
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> GetPerformanceCounts() const override;
|
||||
|
||||
CLDNNInferRequest(InferenceEngine::InputsDataMap networkInputs, InferenceEngine::OutputsDataMap networkOutputs,
|
||||
const std::shared_ptr<CLDNNExecNetwork>& execNetwork);
|
||||
CLDNNInferRequest(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
|
||||
const std::vector<std::shared_ptr<const ov::Node>>& outputs,
|
||||
const std::shared_ptr<CLDNNExecNetwork>& execNetwork);
|
||||
InferRequest(InferenceEngine::InputsDataMap networkInputs, InferenceEngine::OutputsDataMap networkOutputs,
|
||||
const std::shared_ptr<CompiledModel>& execNetwork);
|
||||
InferRequest(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
|
||||
const std::vector<std::shared_ptr<const ov::Node>>& outputs,
|
||||
const std::shared_ptr<CompiledModel>& execNetwork);
|
||||
|
||||
CLDNNInferRequest(const CLDNNInferRequest &) = delete;
|
||||
InferRequest(const InferRequest &) = delete;
|
||||
|
||||
virtual ~CLDNNInferRequest() = default;
|
||||
virtual ~InferRequest() = default;
|
||||
|
||||
InferenceEngine::Blob::Ptr GetBlob(const std::string& name) override;
|
||||
void SetBlob(const std::string& name, const InferenceEngine::Blob::Ptr &data) override;
|
||||
|
||||
void SetBatch(int batch = -1) override;
|
||||
void SetGraph(std::shared_ptr<CLDNNGraph> graph);
|
||||
void SetGraph(std::shared_ptr<Graph> graph);
|
||||
void EnableProfiling() { m_useProfiling = true; }
|
||||
void EnableStreams() { m_useStreams = true; }
|
||||
|
||||
@ -73,7 +75,7 @@ private:
|
||||
bool m_useProfiling = false;
|
||||
bool m_useStreams = false;
|
||||
bool m_useExternalQueue = false;
|
||||
std::shared_ptr<CLDNNGraph> m_graph;
|
||||
std::shared_ptr<Graph> m_graph;
|
||||
|
||||
// dynamic batch stuff
|
||||
std::map<std::string, std::vector<buf_info>> batchInputs;
|
||||
@ -102,4 +104,6 @@ private:
|
||||
std::vector<std::map<cldnn::primitive_id, cldnn::network_output>> internal_outputs_dynamic;
|
||||
};
|
||||
|
||||
}; // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
24
src/plugins/intel_gpu/include/intel_gpu/plugin/itt.hpp
Normal file
24
src/plugins/intel_gpu/include/intel_gpu/plugin/itt.hpp
Normal file
@ -0,0 +1,24 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
/**
|
||||
* @brief Defines openvino domains for tracing
|
||||
* @file itt.hpp
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <openvino/itt.hpp>
|
||||
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
namespace itt {
|
||||
namespace domains {
|
||||
OV_ITT_DOMAIN(intel_gpu_plugin);
|
||||
} // namespace domains
|
||||
} // namespace itt
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
@ -10,14 +10,16 @@
|
||||
#include "intel_gpu/runtime/engine.hpp"
|
||||
#include <cpp_interfaces/interface/ie_iplugin_internal.hpp>
|
||||
#include <cpp_interfaces/interface/ie_iexecutable_network_internal.hpp>
|
||||
#include "cldnn_remote_context.h"
|
||||
#include "intel_gpu/plugin/remote_context.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
using CLDNNCustomLayerPtr = std::shared_ptr<class CLDNNCustomLayer>;
|
||||
using CustomLayerPtr = std::shared_ptr<class CustomLayer>;
|
||||
|
||||
class clDNNEngine : public InferenceEngine::IInferencePlugin,
|
||||
public InferenceEngine::gpu::details::param_map_obj_getter {
|
||||
class Plugin : public InferenceEngine::IInferencePlugin,
|
||||
public InferenceEngine::gpu::details::param_map_obj_getter {
|
||||
struct impl;
|
||||
std::shared_ptr<impl> _impl;
|
||||
bool streamsSet = false;
|
||||
@ -26,23 +28,23 @@ class clDNNEngine : public InferenceEngine::IInferencePlugin,
|
||||
// key: device_id, value: cldnn device
|
||||
std::map<std::string, cldnn::device::ptr> device_map;
|
||||
// key: cldnn context, value: memory statistics
|
||||
mutable std::map<CLDNNRemoteCLContext::Ptr, std::map<std::string, uint64_t>> statistics_map;
|
||||
mutable std::map<RemoteCLContext::Ptr, std::map<std::string, uint64_t>> statistics_map;
|
||||
mutable std::mutex engine_mutex;
|
||||
|
||||
mutable CLDNNRemoteCLContext::Ptr m_defaultContext;
|
||||
mutable RemoteCLContext::Ptr m_defaultContext;
|
||||
|
||||
cldnn::device_info GetDeviceInfo(const std::map<std::string, std::string> &config) const;
|
||||
InferenceEngine::CNNNetwork CloneAndTransformNetwork(const InferenceEngine::CNNNetwork& network,
|
||||
const CLDNNPlugin::Config& config) const;
|
||||
const Config& config) const;
|
||||
|
||||
std::map<std::string, std::string> ConvertPerfHintsToConfig(const std::map<std::string, std::string>& network_config,
|
||||
const CLDNNPlugin::Config& plugin_config) const;
|
||||
const Config& plugin_config) const;
|
||||
|
||||
void RegisterPrimitives();
|
||||
void UpdateConfig(Config& conf, const InferenceEngine::CNNNetwork &network, const std::map<std::string, std::string> ¶ms) const;
|
||||
void UpdateStatistics(const CLDNNRemoteCLContext::Ptr& context) const;
|
||||
void UpdateStatistics(const RemoteCLContext::Ptr& context) const;
|
||||
public:
|
||||
clDNNEngine();
|
||||
Plugin();
|
||||
|
||||
InferenceEngine::IExecutableNetworkInternal::Ptr LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network,
|
||||
const std::map<std::string, std::string> &config) override;
|
||||
@ -61,7 +63,7 @@ public:
|
||||
std::shared_ptr<InferenceEngine::RemoteContext> CreateContext(const InferenceEngine::ParamMap& params) override;
|
||||
std::shared_ptr<InferenceEngine::RemoteContext> GetDefaultContext(const InferenceEngine::ParamMap& params) override;
|
||||
|
||||
struct clDNNEngineParams {
|
||||
struct PluginParams {
|
||||
cldnn::queue_types queue_type;
|
||||
cldnn::engine_types engine_type;
|
||||
cldnn::runtime_types runtime_type;
|
||||
@ -69,9 +71,9 @@ public:
|
||||
InferenceEngine::ITaskExecutor::Ptr task_executor;
|
||||
};
|
||||
|
||||
static clDNNEngineParams GetEngineParams(const Config& config, const cldnn::device::ptr& dev,
|
||||
InferenceEngine::gpu_handle_param external_queue = nullptr) {
|
||||
clDNNEngineParams params;
|
||||
static PluginParams GetParams(const Config& config, const cldnn::device::ptr& dev,
|
||||
InferenceEngine::gpu_handle_param external_queue = nullptr) {
|
||||
PluginParams params;
|
||||
params.engine_type = cldnn::engine_types::ocl;
|
||||
params.runtime_type = cldnn::runtime_types::ocl;
|
||||
if (external_queue) {
|
||||
@ -87,4 +89,6 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
}; // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
@ -61,6 +61,7 @@ REGISTER_FACTORY(v0, Squeeze);
|
||||
REGISTER_FACTORY(v0, ShuffleChannels);
|
||||
REGISTER_FACTORY(v0, Tan);
|
||||
REGISTER_FACTORY(v0, Tanh);
|
||||
REGISTER_FACTORY(v0, TensorIterator);
|
||||
REGISTER_FACTORY(v0, Tile);
|
||||
REGISTER_FACTORY(v0, Unsqueeze);
|
||||
|
||||
@ -88,7 +89,6 @@ REGISTER_FACTORY(v0, Unsqueeze);
|
||||
// REGISTER_FACTORY(v0, Range);
|
||||
// REGISTER_FACTORY(v0, RNNCell);
|
||||
// REGISTER_FACTORY(v0, ShapeOf);
|
||||
REGISTER_FACTORY(v0, TensorIterator);
|
||||
|
||||
// ------------------------------ Supported v1 ops ------------------------------ //
|
||||
REGISTER_FACTORY(v1, Add);
|
||||
@ -156,6 +156,7 @@ REGISTER_FACTORY(v3, EmbeddingBagOffsetsSum);
|
||||
REGISTER_FACTORY(v3, EmbeddingBagPackedSum);
|
||||
REGISTER_FACTORY(v3, EmbeddingSegmentsSum);
|
||||
REGISTER_FACTORY(v3, ExtractImagePatches);
|
||||
REGISTER_FACTORY(v3, ROIAlign);
|
||||
REGISTER_FACTORY(v3, ScatterUpdate);
|
||||
REGISTER_FACTORY(v3, ScatterElementsUpdate);
|
||||
REGISTER_FACTORY(v3, ScatterNDUpdate);
|
||||
@ -166,7 +167,6 @@ REGISTER_FACTORY(v3, ScatterNDUpdate);
|
||||
// REGISTER_FACTORY(v3, Bucketize);
|
||||
// REGISTER_FACTORY(v3, GRUCell);
|
||||
// REGISTER_FACTORY(v3, NonZero);
|
||||
REGISTER_FACTORY(v3, ROIAlign);
|
||||
// REGISTER_FACTORY(v3, ReadValue);
|
||||
// REGISTER_FACTORY(v3, ShapeOf);
|
||||
// REGISTER_FACTORY(v3, TopK);
|
@ -15,7 +15,7 @@
|
||||
#include <ngraph/ngraph.hpp>
|
||||
#include <ngraph/compatibility.hpp>
|
||||
|
||||
#include "cldnn_config.h"
|
||||
#include "intel_gpu/plugin/device_config.hpp"
|
||||
|
||||
#include "intel_gpu/runtime/engine.hpp"
|
||||
#include "intel_gpu/graph/topology.hpp"
|
||||
@ -40,7 +40,9 @@ void __register ## _ ## op_name ## _ ## op_version() {
|
||||
}); \
|
||||
}
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
std::string layer_type_lower(const ngraph::Node* op);
|
||||
std::string layer_type_name_ID(const ngraph::Node* op);
|
||||
@ -135,7 +137,7 @@ public:
|
||||
template<typename PType>
|
||||
void AddPrimitive(PType prim) {
|
||||
if (m_topology == nullptr) {
|
||||
IE_THROW() << "m_topology object was not created in clDNNPlugin::Program";
|
||||
IE_THROW() << "m_topology object was not created in ov::runtime::intel_gpu::Program";
|
||||
}
|
||||
|
||||
m_topology->add(prim);
|
||||
@ -172,11 +174,13 @@ private:
|
||||
void ChangeInputBatch(int batch);
|
||||
};
|
||||
|
||||
void CreateCustomOp(Program& p, const std::shared_ptr<ngraph::Node>& node, CLDNNCustomLayerPtr customLayer);
|
||||
void CreateCustomOp(Program& p, const std::shared_ptr<ngraph::Node>& node, CustomLayerPtr customLayer);
|
||||
void CreateUnaryEltwiseOp(Program& p, const std::shared_ptr<ngraph::Node>& node,
|
||||
cldnn::activation_func func, cldnn::activation_additional_params params);
|
||||
void CreateElementwiseOp(Program& p, const std::shared_ptr<ngraph::Node>& node, cldnn::eltwise_mode mode);
|
||||
|
||||
bool IsNodeOnConstPath(const std::shared_ptr<ngraph::Node>& node);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
@ -6,12 +6,13 @@
|
||||
|
||||
#include "intel_gpu/runtime/memory.hpp"
|
||||
#include "intel_gpu/runtime/engine.hpp"
|
||||
#include "intel_gpu/plugin/device_config.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include <ie_parameter.hpp>
|
||||
#include <cpp_interfaces/interface/ie_iplugin_internal.hpp>
|
||||
#include <blob_factory.hpp>
|
||||
#include <ie_remote_context.hpp>
|
||||
#include "cldnn_config.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
|
||||
#ifndef NOMINMAX
|
||||
# define NOMINMAX
|
||||
@ -28,11 +29,13 @@
|
||||
#include <memory>
|
||||
#include <atomic>
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
class CLDNNRemoteAllocator;
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
class RemoteAllocator;
|
||||
|
||||
class CLDNNRemoteBlobImpl : public InferenceEngine::gpu::details::param_map_obj_getter {
|
||||
friend class CLDNNRemoteAllocator;
|
||||
class RemoteBlobImpl : public InferenceEngine::gpu::details::param_map_obj_getter {
|
||||
friend class RemoteAllocator;
|
||||
public:
|
||||
enum BlobType {
|
||||
BT_EMPTY,
|
||||
@ -46,13 +49,13 @@ public:
|
||||
BT_DX_BUF_SHARED,
|
||||
};
|
||||
|
||||
explicit CLDNNRemoteBlobImpl(InferenceEngine::gpu::ClContext::Ptr context,
|
||||
cldnn::stream& stream,
|
||||
const cldnn::layout& layout,
|
||||
cldnn::shared_handle mem = nullptr,
|
||||
cldnn::shared_surface surf = 0,
|
||||
uint32_t plane = 0,
|
||||
BlobType mem_type = BT_BUF_INTERNAL);
|
||||
explicit RemoteBlobImpl(InferenceEngine::gpu::ClContext::Ptr context,
|
||||
cldnn::stream& stream,
|
||||
const cldnn::layout& layout,
|
||||
cldnn::shared_handle mem = nullptr,
|
||||
cldnn::shared_surface surf = 0,
|
||||
uint32_t plane = 0,
|
||||
BlobType mem_type = BT_BUF_INTERNAL);
|
||||
|
||||
void allocate();
|
||||
bool deallocate() noexcept;
|
||||
@ -72,7 +75,7 @@ public:
|
||||
cldnn::memory::ptr getMemory() { return m_memObject; }
|
||||
|
||||
protected:
|
||||
static CLDNNRemoteAllocator m_allocator;
|
||||
static RemoteAllocator m_allocator;
|
||||
std::weak_ptr<InferenceEngine::gpu::ClContext> m_context;
|
||||
cldnn::stream& m_stream;
|
||||
|
||||
@ -95,18 +98,18 @@ protected:
|
||||
};
|
||||
|
||||
template<typename TpublicAPI>
|
||||
class typedCLDNNRemoteBlob : public TpublicAPI {
|
||||
class TypedRemoteBlob : public TpublicAPI {
|
||||
public:
|
||||
using Ptr = std::shared_ptr<typedCLDNNRemoteBlob>;
|
||||
using Ptr = std::shared_ptr<TypedRemoteBlob>;
|
||||
|
||||
explicit typedCLDNNRemoteBlob(InferenceEngine::gpu::ClContext::Ptr context,
|
||||
cldnn::stream& stream,
|
||||
const InferenceEngine::TensorDesc& desc,
|
||||
const cldnn::layout& layout,
|
||||
cldnn::shared_handle mem = nullptr,
|
||||
cldnn::shared_surface surf = 0,
|
||||
uint32_t plane = 0,
|
||||
CLDNNRemoteBlobImpl::BlobType mem_type = CLDNNRemoteBlobImpl::BlobType::BT_BUF_INTERNAL)
|
||||
explicit TypedRemoteBlob(InferenceEngine::gpu::ClContext::Ptr context,
|
||||
cldnn::stream& stream,
|
||||
const InferenceEngine::TensorDesc& desc,
|
||||
const cldnn::layout& layout,
|
||||
cldnn::shared_handle mem = nullptr,
|
||||
cldnn::shared_surface surf = 0,
|
||||
uint32_t plane = 0,
|
||||
RemoteBlobImpl::BlobType mem_type = RemoteBlobImpl::BlobType::BT_BUF_INTERNAL)
|
||||
: _impl(context, stream, layout, mem, surf, plane, mem_type)
|
||||
, TpublicAPI(desc) {}
|
||||
|
||||
@ -124,62 +127,62 @@ public:
|
||||
InferenceEngine::LockedMemory<void> rwmap() noexcept override { return _impl.rwmap(); }
|
||||
InferenceEngine::LockedMemory<const void> rmap() const noexcept override { return _impl.rmap(); }
|
||||
InferenceEngine::LockedMemory<void> wmap()noexcept override { return _impl.wmap(); }
|
||||
CLDNNRemoteBlobImpl* getImpl() { return &_impl; }
|
||||
RemoteBlobImpl* getImpl() { return &_impl; }
|
||||
|
||||
protected:
|
||||
const std::shared_ptr<InferenceEngine::IAllocator> &getAllocator() const noexcept override { return _impl.getAllocator(); }
|
||||
void *getHandle() const noexcept override { return _impl.getHandle(); }
|
||||
CLDNNRemoteBlobImpl _impl;
|
||||
RemoteBlobImpl _impl;
|
||||
};
|
||||
|
||||
using CLDNNRemoteCLbuffer = typedCLDNNRemoteBlob<InferenceEngine::gpu::ClBufferBlob>;
|
||||
using CLDNNRemoteUSMbuffer = typedCLDNNRemoteBlob<InferenceEngine::gpu::USMBlob>;
|
||||
using CLDNNRemoteCLImage2D = typedCLDNNRemoteBlob<InferenceEngine::gpu::ClImage2DBlob>;
|
||||
using RemoteCLbuffer = TypedRemoteBlob<InferenceEngine::gpu::ClBufferBlob>;
|
||||
using RemoteUSMbuffer = TypedRemoteBlob<InferenceEngine::gpu::USMBlob>;
|
||||
using RemoteCLImage2D = TypedRemoteBlob<InferenceEngine::gpu::ClImage2DBlob>;
|
||||
#ifdef _WIN32
|
||||
using CLDNNRemoteD3DBuffer = typedCLDNNRemoteBlob<InferenceEngine::gpu::D3DBufferBlob>;
|
||||
using CLDNNRemoteD3DSurface = typedCLDNNRemoteBlob<InferenceEngine::gpu::D3DSurface2DBlob>;
|
||||
using RemoteD3DBuffer = TypedRemoteBlob<InferenceEngine::gpu::D3DBufferBlob>;
|
||||
using RemoteD3DSurface = TypedRemoteBlob<InferenceEngine::gpu::D3DSurface2DBlob>;
|
||||
#else
|
||||
using CLDNNRemoteVASurface = typedCLDNNRemoteBlob<InferenceEngine::gpu::VASurfaceBlob>;
|
||||
using RemoteVASurface = TypedRemoteBlob<InferenceEngine::gpu::VASurfaceBlob>;
|
||||
#endif
|
||||
|
||||
inline CLDNNRemoteBlobImpl* getBlobImpl(InferenceEngine::gpu::ClBlob* blobPtr) {
|
||||
inline RemoteBlobImpl* getBlobImpl(InferenceEngine::gpu::ClBlob* blobPtr) {
|
||||
#ifdef _WIN32
|
||||
{
|
||||
auto ptr = blobPtr->as<CLDNNRemoteD3DSurface>();
|
||||
auto ptr = blobPtr->as<RemoteD3DSurface>();
|
||||
if (ptr) return ptr->getImpl();
|
||||
}
|
||||
{
|
||||
auto ptr = blobPtr->as<CLDNNRemoteD3DBuffer>();
|
||||
auto ptr = blobPtr->as<RemoteD3DBuffer>();
|
||||
if (ptr) return ptr->getImpl();
|
||||
}
|
||||
#else
|
||||
{
|
||||
auto ptr = blobPtr->as<CLDNNRemoteVASurface>();
|
||||
auto ptr = blobPtr->as<RemoteVASurface>();
|
||||
if (ptr) return ptr->getImpl();
|
||||
}
|
||||
#endif
|
||||
{
|
||||
auto ptr = blobPtr->as<CLDNNRemoteCLbuffer>();
|
||||
auto ptr = blobPtr->as<RemoteCLbuffer>();
|
||||
if (ptr) return ptr->getImpl();
|
||||
}
|
||||
{
|
||||
auto ptr = blobPtr->as<CLDNNRemoteCLImage2D>();
|
||||
auto ptr = blobPtr->as<RemoteCLImage2D>();
|
||||
if (ptr) return ptr->getImpl();
|
||||
}
|
||||
{
|
||||
auto ptr = blobPtr->as<CLDNNRemoteUSMbuffer>();
|
||||
auto ptr = blobPtr->as<RemoteUSMbuffer>();
|
||||
if (ptr) return ptr->getImpl();
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
class CLDNNRemoteAllocator : public InferenceEngine::IAllocator {
|
||||
class RemoteAllocator : public InferenceEngine::IAllocator {
|
||||
protected:
|
||||
friend class CLDNNRemoteBlobImpl;
|
||||
friend class RemoteBlobImpl;
|
||||
std::atomic_flag _lock;
|
||||
std::map<void*, const CLDNNRemoteBlobImpl*> m_lockedBlobs;
|
||||
std::map<void*, const RemoteBlobImpl*> m_lockedBlobs;
|
||||
|
||||
void regLockedBlob(void* handle, const CLDNNRemoteBlobImpl* blob);
|
||||
void regLockedBlob(void* handle, const RemoteBlobImpl* blob);
|
||||
|
||||
void acquire_lock() {
|
||||
while (_lock.test_and_set(std::memory_order_acquire)) {}
|
||||
@ -190,9 +193,9 @@ protected:
|
||||
}
|
||||
|
||||
public:
|
||||
using Ptr = std::shared_ptr<CLDNNRemoteAllocator>;
|
||||
using Ptr = std::shared_ptr<RemoteAllocator>;
|
||||
|
||||
CLDNNRemoteAllocator() { _lock.clear(std::memory_order_relaxed); }
|
||||
RemoteAllocator() { _lock.clear(std::memory_order_relaxed); }
|
||||
/**
|
||||
* @brief Maps handle to heap memory accessible by any memory manipulation routines.
|
||||
* @return Generic pointer to memory
|
||||
@ -269,19 +272,19 @@ public:
|
||||
};
|
||||
|
||||
|
||||
class CLDNNExecutionContextImpl : public InferenceEngine::gpu::details::param_map_obj_getter {
|
||||
class ExecutionContextImpl : public InferenceEngine::gpu::details::param_map_obj_getter {
|
||||
public:
|
||||
enum ContextType {
|
||||
OCL,
|
||||
DEV_SHARED
|
||||
};
|
||||
|
||||
using Ptr = std::shared_ptr<CLDNNExecutionContextImpl>;
|
||||
using CPtr = std::shared_ptr<const CLDNNExecutionContextImpl>;
|
||||
using Ptr = std::shared_ptr<ExecutionContextImpl>;
|
||||
using CPtr = std::shared_ptr<const ExecutionContextImpl>;
|
||||
|
||||
explicit CLDNNExecutionContextImpl(std::shared_ptr<InferenceEngine::IInferencePlugin> plugin,
|
||||
const InferenceEngine::ParamMap& params,
|
||||
const Config& config = {});
|
||||
explicit ExecutionContextImpl(std::shared_ptr<InferenceEngine::IInferencePlugin> plugin,
|
||||
const InferenceEngine::ParamMap& params,
|
||||
const Config& config = {});
|
||||
|
||||
InferenceEngine::ParamMap getParams() const;
|
||||
std::string getDeviceName() const noexcept;
|
||||
@ -313,7 +316,7 @@ protected:
|
||||
};
|
||||
|
||||
template<typename TpublicContextAPI>
|
||||
class typedCLDNNExecutionContext : public TpublicContextAPI {
|
||||
class TypedExecutionContext : public TpublicContextAPI {
|
||||
template<typename T1, typename T2>
|
||||
struct _Key {
|
||||
T1 _surf;
|
||||
@ -357,17 +360,17 @@ class typedCLDNNExecutionContext : public TpublicContextAPI {
|
||||
// unlickily, not found - create new and insert into registry
|
||||
cldnn::layout layout(DataTypeFromPrecision(tensorDesc.getPrecision()),
|
||||
ImageFormatFromLayout(tensorDesc.getLayout()),
|
||||
CldnnTensorFromIEDims(tensorDesc.getDims()));
|
||||
tensor_from_dims(tensorDesc.getDims()));
|
||||
auto smart_this =
|
||||
std::dynamic_pointer_cast<InferenceEngine::gpu::ClContext>(this->shared_from_this());
|
||||
#ifdef _WIN32
|
||||
ret = std::make_shared<CLDNNRemoteD3DSurface>(smart_this, stream,
|
||||
ret = std::make_shared<RemoteD3DSurface>(smart_this, stream,
|
||||
tensorDesc, layout, mem, 0, plane,
|
||||
CLDNNRemoteBlobImpl::BlobType::BT_SURF_SHARED);
|
||||
RemoteBlobImpl::BlobType::BT_SURF_SHARED);
|
||||
#else
|
||||
ret = std::make_shared<CLDNNRemoteVASurface>(smart_this, stream,
|
||||
ret = std::make_shared<RemoteVASurface>(smart_this, stream,
|
||||
tensorDesc, layout, nullptr, surf, plane,
|
||||
CLDNNRemoteBlobImpl::BlobType::BT_SURF_SHARED);
|
||||
RemoteBlobImpl::BlobType::BT_SURF_SHARED);
|
||||
#endif
|
||||
shared_surf_reg[skey] = ret;
|
||||
}
|
||||
@ -378,7 +381,7 @@ class typedCLDNNExecutionContext : public TpublicContextAPI {
|
||||
|
||||
InferenceEngine::RemoteBlob::Ptr reuse_obj(const InferenceEngine::TensorDesc& tensorDesc,
|
||||
cldnn::shared_handle mem,
|
||||
CLDNNRemoteBlobImpl::BlobType blob_type) {
|
||||
RemoteBlobImpl::BlobType blob_type) {
|
||||
InferenceEngine::RemoteBlob::Ptr ret = nullptr;
|
||||
|
||||
_impl.acquire_lock();
|
||||
@ -392,24 +395,24 @@ class typedCLDNNExecutionContext : public TpublicContextAPI {
|
||||
// unlickily, not found - create new and insert into registry
|
||||
cldnn::layout layout(DataTypeFromPrecision(tensorDesc.getPrecision()),
|
||||
FormatFromLayout(tensorDesc.getLayout()),
|
||||
CldnnTensorFromIEDims(tensorDesc.getDims()));
|
||||
tensor_from_dims(tensorDesc.getDims()));
|
||||
auto smart_this =
|
||||
std::dynamic_pointer_cast<InferenceEngine::gpu::ClContext>(this->shared_from_this());
|
||||
|
||||
switch (blob_type) {
|
||||
case CLDNNRemoteBlobImpl::BlobType::BT_BUF_SHARED:
|
||||
ret = std::make_shared<CLDNNRemoteCLbuffer>(smart_this, stream, tensorDesc, layout, mem, 0, 0, blob_type);
|
||||
case RemoteBlobImpl::BlobType::BT_BUF_SHARED:
|
||||
ret = std::make_shared<RemoteCLbuffer>(smart_this, stream, tensorDesc, layout, mem, 0, 0, blob_type);
|
||||
break;
|
||||
case CLDNNRemoteBlobImpl::BlobType::BT_USM_SHARED:
|
||||
ret = std::make_shared<CLDNNRemoteUSMbuffer>(smart_this, stream, tensorDesc, layout, mem, 0, 0, blob_type);
|
||||
case RemoteBlobImpl::BlobType::BT_USM_SHARED:
|
||||
ret = std::make_shared<RemoteUSMbuffer>(smart_this, stream, tensorDesc, layout, mem, 0, 0, blob_type);
|
||||
break;
|
||||
case CLDNNRemoteBlobImpl::BlobType::BT_IMG_SHARED:
|
||||
case RemoteBlobImpl::BlobType::BT_IMG_SHARED:
|
||||
layout.format = ImageFormatFromLayout(tensorDesc.getLayout());
|
||||
ret = std::make_shared<CLDNNRemoteCLImage2D>(smart_this, stream, tensorDesc, layout, mem, 0, 0, blob_type);
|
||||
ret = std::make_shared<RemoteCLImage2D>(smart_this, stream, tensorDesc, layout, mem, 0, 0, blob_type);
|
||||
break;
|
||||
#ifdef _WIN32
|
||||
case CLDNNRemoteBlobImpl::BlobType::BT_DX_BUF_SHARED:
|
||||
ret = std::make_shared<CLDNNRemoteD3DBuffer>(smart_this, stream, tensorDesc, layout, mem, 0, 0, blob_type);
|
||||
case RemoteBlobImpl::BlobType::BT_DX_BUF_SHARED:
|
||||
ret = std::make_shared<RemoteD3DBuffer>(smart_this, stream, tensorDesc, layout, mem, 0, 0, blob_type);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
@ -425,44 +428,44 @@ class typedCLDNNExecutionContext : public TpublicContextAPI {
|
||||
InferenceEngine::RemoteBlob::Ptr create_buffer(const InferenceEngine::TensorDesc& tensorDesc) {
|
||||
cldnn::layout layout(DataTypeFromPrecision(tensorDesc.getPrecision()),
|
||||
FormatFromLayout(tensorDesc.getLayout()),
|
||||
CldnnTensorFromIEDims(tensorDesc.getDims()));
|
||||
tensor_from_dims(tensorDesc.getDims()));
|
||||
auto smart_this = std::dynamic_pointer_cast<InferenceEngine::gpu::ClContext>(this->shared_from_this());
|
||||
auto& stream = _impl.GetEngine()->get_program_stream();
|
||||
return std::make_shared<CLDNNRemoteCLbuffer>(smart_this,
|
||||
stream,
|
||||
tensorDesc,
|
||||
layout,
|
||||
nullptr, 0, 0,
|
||||
CLDNNRemoteBlobImpl::BlobType::BT_BUF_INTERNAL);
|
||||
return std::make_shared<RemoteCLbuffer>(smart_this,
|
||||
stream,
|
||||
tensorDesc,
|
||||
layout,
|
||||
nullptr, 0, 0,
|
||||
RemoteBlobImpl::BlobType::BT_BUF_INTERNAL);
|
||||
}
|
||||
|
||||
InferenceEngine::RemoteBlob::Ptr create_usm(const InferenceEngine::TensorDesc& tensorDesc, CLDNNRemoteBlobImpl::BlobType alloc_type) {
|
||||
InferenceEngine::RemoteBlob::Ptr create_usm(const InferenceEngine::TensorDesc& tensorDesc, RemoteBlobImpl::BlobType alloc_type) {
|
||||
cldnn::layout layout(DataTypeFromPrecision(tensorDesc.getPrecision()),
|
||||
FormatFromLayout(tensorDesc.getLayout()),
|
||||
CldnnTensorFromIEDims(tensorDesc.getDims()));
|
||||
tensor_from_dims(tensorDesc.getDims()));
|
||||
auto smart_this = std::dynamic_pointer_cast<InferenceEngine::gpu::ClContext>(this->shared_from_this());
|
||||
auto& stream = _impl.GetEngine()->get_program_stream();
|
||||
|
||||
return std::make_shared<CLDNNRemoteUSMbuffer>(smart_this,
|
||||
stream,
|
||||
tensorDesc,
|
||||
layout,
|
||||
nullptr, 0, 0,
|
||||
alloc_type);
|
||||
return std::make_shared<RemoteUSMbuffer>(smart_this,
|
||||
stream,
|
||||
tensorDesc,
|
||||
layout,
|
||||
nullptr, 0, 0,
|
||||
alloc_type);
|
||||
}
|
||||
|
||||
void check_if_shared() {
|
||||
if (GetType() != CLDNNExecutionContextImpl::ContextType::DEV_SHARED)
|
||||
if (GetType() != ExecutionContextImpl::ContextType::DEV_SHARED)
|
||||
IE_THROW() << "Shared context is required to to share this type of memory";
|
||||
}
|
||||
|
||||
public:
|
||||
using Ptr = std::shared_ptr<typedCLDNNExecutionContext>;
|
||||
using CPtr = std::shared_ptr<const typedCLDNNExecutionContext>;
|
||||
using Ptr = std::shared_ptr<TypedExecutionContext>;
|
||||
using CPtr = std::shared_ptr<const TypedExecutionContext>;
|
||||
|
||||
explicit typedCLDNNExecutionContext(std::shared_ptr<InferenceEngine::IInferencePlugin> plugin,
|
||||
const InferenceEngine::ParamMap& params,
|
||||
const Config& config = {})
|
||||
explicit TypedExecutionContext(std::shared_ptr<InferenceEngine::IInferencePlugin> plugin,
|
||||
const InferenceEngine::ParamMap& params,
|
||||
const Config& config = {})
|
||||
: _impl(plugin, params, config) {}
|
||||
|
||||
InferenceEngine::ParamMap getParams() const override { return _impl.getParams(); }
|
||||
@ -479,7 +482,7 @@ public:
|
||||
using namespace InferenceEngine;
|
||||
using InferenceEngine::gpu::details::param_map_obj_getter;
|
||||
if (params.empty()) {
|
||||
// user wants clDNN to allocate blob by itself and return handle
|
||||
// user wants plugin to allocate blob by itself and return handle
|
||||
return create_buffer(tensorDesc);
|
||||
} else {
|
||||
// user will supply shared object handle
|
||||
@ -497,25 +500,25 @@ public:
|
||||
check_if_shared();
|
||||
return reuse_surf(tensorDesc, params);
|
||||
} else if (GPU_PARAM_VALUE(USM_HOST_BUFFER) == memTypeStr) {
|
||||
return create_usm(tensorDesc, CLDNNRemoteBlobImpl::BlobType::BT_USM_HOST_INTERNAL);
|
||||
return create_usm(tensorDesc, RemoteBlobImpl::BlobType::BT_USM_HOST_INTERNAL);
|
||||
} else if (GPU_PARAM_VALUE(USM_DEVICE_BUFFER) == memTypeStr) {
|
||||
return create_usm(tensorDesc, CLDNNRemoteBlobImpl::BlobType::BT_USM_DEVICE_INTERNAL);
|
||||
return create_usm(tensorDesc, RemoteBlobImpl::BlobType::BT_USM_DEVICE_INTERNAL);
|
||||
} else {
|
||||
CLDNNRemoteBlobImpl::BlobType blob_type;
|
||||
RemoteBlobImpl::BlobType blob_type;
|
||||
cldnn::shared_handle mem = nullptr;
|
||||
|
||||
if (GPU_PARAM_VALUE(OCL_BUFFER) == memTypeStr) {
|
||||
blob_type = CLDNNRemoteBlobImpl::BlobType::BT_BUF_SHARED;
|
||||
blob_type = RemoteBlobImpl::BlobType::BT_BUF_SHARED;
|
||||
mem = param_map_obj_getter::_ObjFromParamSimple<cldnn::shared_handle>(params, GPU_PARAM_KEY(MEM_HANDLE));
|
||||
} else if (GPU_PARAM_VALUE(USM_USER_BUFFER) == memTypeStr) {
|
||||
blob_type = CLDNNRemoteBlobImpl::BlobType::BT_USM_SHARED;
|
||||
blob_type = RemoteBlobImpl::BlobType::BT_USM_SHARED;
|
||||
mem = param_map_obj_getter::_ObjFromParamSimple<cldnn::shared_handle>(params, GPU_PARAM_KEY(MEM_HANDLE));
|
||||
} else if (GPU_PARAM_VALUE(OCL_IMAGE2D) == memTypeStr) {
|
||||
blob_type = CLDNNRemoteBlobImpl::BlobType::BT_IMG_SHARED;
|
||||
blob_type = RemoteBlobImpl::BlobType::BT_IMG_SHARED;
|
||||
mem = param_map_obj_getter::_ObjFromParamSimple<cldnn::shared_handle>(params, GPU_PARAM_KEY(MEM_HANDLE));
|
||||
#ifdef _WIN32
|
||||
} else if (GPU_PARAM_VALUE(DX_BUFFER) == memTypeStr) {
|
||||
blob_type = CLDNNRemoteBlobImpl::BlobType::BT_DX_BUF_SHARED;
|
||||
blob_type = RemoteBlobImpl::BlobType::BT_DX_BUF_SHARED;
|
||||
mem = param_map_obj_getter::_ObjFromParamSimple<cldnn::shared_handle>(params, GPU_PARAM_KEY(DEV_OBJECT_HANDLE));
|
||||
check_if_shared();
|
||||
#endif
|
||||
@ -529,38 +532,40 @@ public:
|
||||
}
|
||||
|
||||
Config& GetConfig() { return _impl.GetConfig(); }
|
||||
CLDNNExecutionContextImpl::ContextType GetType() const { return _impl.GetType(); }
|
||||
ExecutionContextImpl::ContextType GetType() const { return _impl.GetType(); }
|
||||
|
||||
CLDNNExecutionContextImpl* getImpl() { return &_impl; }
|
||||
ExecutionContextImpl* getImpl() { return &_impl; }
|
||||
|
||||
protected:
|
||||
CLDNNExecutionContextImpl _impl;
|
||||
ExecutionContextImpl _impl;
|
||||
};
|
||||
|
||||
using CLDNNRemoteCLContext = typedCLDNNExecutionContext<InferenceEngine::gpu::ClContext>;
|
||||
using RemoteCLContext = TypedExecutionContext<InferenceEngine::gpu::ClContext>;
|
||||
#ifdef _WIN32
|
||||
using CLDNNRemoteD3DContext = typedCLDNNExecutionContext<InferenceEngine::gpu::D3DContext>;
|
||||
using RemoteD3DContext = TypedExecutionContext<InferenceEngine::gpu::D3DContext>;
|
||||
#else
|
||||
using CLDNNRemoteVAContext = typedCLDNNExecutionContext<InferenceEngine::gpu::VAContext>;
|
||||
using RemoteVAContext = TypedExecutionContext<InferenceEngine::gpu::VAContext>;
|
||||
#endif
|
||||
|
||||
inline CLDNNExecutionContextImpl* getContextImpl(InferenceEngine::gpu::ClContext::Ptr ctxPtr) {
|
||||
inline ExecutionContextImpl* getContextImpl(InferenceEngine::gpu::ClContext::Ptr ctxPtr) {
|
||||
#ifdef _WIN32
|
||||
{
|
||||
auto ptr = ctxPtr->as<CLDNNRemoteD3DContext>();
|
||||
auto ptr = ctxPtr->as<RemoteD3DContext>();
|
||||
if (ptr) return ptr->getImpl();
|
||||
}
|
||||
#else
|
||||
{
|
||||
auto ptr = ctxPtr->as<CLDNNRemoteVAContext>();
|
||||
auto ptr = ctxPtr->as<RemoteVAContext>();
|
||||
if (ptr) return ptr->getImpl();
|
||||
}
|
||||
#endif
|
||||
{
|
||||
auto ptr = ctxPtr->as<CLDNNRemoteCLContext>();
|
||||
auto ptr = ctxPtr->as<RemoteCLContext>();
|
||||
if (ptr) return ptr->getImpl();
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
@ -8,9 +8,11 @@
|
||||
|
||||
#include <ngraph/function.hpp>
|
||||
|
||||
#include "cldnn_config.h"
|
||||
#include "intel_gpu/plugin/device_config.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
class TransformationsPipeline {
|
||||
public:
|
||||
@ -23,4 +25,6 @@ private:
|
||||
cldnn::device_info device_info;
|
||||
};
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
@ -2,21 +2,25 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_async_infer_request.h"
|
||||
#include "cldnn_itt.h"
|
||||
#include "intel_gpu/plugin/async_infer_request.hpp"
|
||||
#include "intel_gpu/plugin/itt.hpp"
|
||||
#include <memory>
|
||||
|
||||
CLDNNPlugin::CLDNNAsyncInferRequest::CLDNNAsyncInferRequest(const CLDNNInferRequest::Ptr &inferRequest,
|
||||
const InferenceEngine::ITaskExecutor::Ptr& taskExecutor,
|
||||
const InferenceEngine::ITaskExecutor::Ptr& waitExecutor,
|
||||
const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor)
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
AsyncInferRequest::AsyncInferRequest(const InferRequest::Ptr &inferRequest,
|
||||
const InferenceEngine::ITaskExecutor::Ptr& taskExecutor,
|
||||
const InferenceEngine::ITaskExecutor::Ptr& waitExecutor,
|
||||
const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor)
|
||||
: AsyncInferRequestThreadSafeDefault(inferRequest, taskExecutor, callbackExecutor), _inferRequest(inferRequest), _waitExecutor(waitExecutor) {
|
||||
_pipeline = {};
|
||||
|
||||
if (!_inferRequest->use_external_queue()) {
|
||||
_pipeline.push_back({taskExecutor,
|
||||
[this] {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNAsyncInferRequest::PreprocessingAndStartPipeline");
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "AsyncInferRequest::PreprocessingAndStartPipeline");
|
||||
_inferRequest->setup_stream_graph();
|
||||
_inferRequest->preprocess();
|
||||
_inferRequest->enqueue();
|
||||
@ -25,13 +29,13 @@ CLDNNPlugin::CLDNNAsyncInferRequest::CLDNNAsyncInferRequest(const CLDNNInferRequ
|
||||
} else {
|
||||
_pipeline.push_back({ _waitExecutor,
|
||||
[this] {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNAsyncInferRequest::WaitPipeline");
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "AsyncInferRequest::WaitPipeline");
|
||||
_inferRequest->wait_notify();
|
||||
} });
|
||||
}
|
||||
}
|
||||
|
||||
void CLDNNPlugin::CLDNNAsyncInferRequest::Infer_ThreadUnsafe() {
|
||||
void AsyncInferRequest::Infer_ThreadUnsafe() {
|
||||
if (_inferRequest->use_external_queue()) {
|
||||
_inferRequest->setup_stream_graph();
|
||||
_inferRequest->preprocess_notify();
|
||||
@ -40,7 +44,7 @@ void CLDNNPlugin::CLDNNAsyncInferRequest::Infer_ThreadUnsafe() {
|
||||
Parent::Infer_ThreadUnsafe();
|
||||
}
|
||||
|
||||
void CLDNNPlugin::CLDNNAsyncInferRequest::StartAsync_ThreadUnsafe() {
|
||||
void AsyncInferRequest::StartAsync_ThreadUnsafe() {
|
||||
if (_inferRequest->use_external_queue()) {
|
||||
_inferRequest->setup_stream_graph();
|
||||
_inferRequest->preprocess_notify();
|
||||
@ -49,6 +53,10 @@ void CLDNNPlugin::CLDNNAsyncInferRequest::StartAsync_ThreadUnsafe() {
|
||||
Parent::StartAsync_ThreadUnsafe();
|
||||
}
|
||||
|
||||
CLDNNPlugin::CLDNNAsyncInferRequest::~CLDNNAsyncInferRequest() {
|
||||
AsyncInferRequest::~AsyncInferRequest() {
|
||||
StopAndWait();
|
||||
}
|
||||
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
@ -2,48 +2,44 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
|
||||
#include "ie_metric_helpers.hpp"
|
||||
#include <chrono>
|
||||
#include <cmath>
|
||||
#include <algorithm>
|
||||
|
||||
#include "ie_metric_helpers.hpp"
|
||||
#include <chrono>
|
||||
#include <cmath>
|
||||
#include <algorithm>
|
||||
#include "cldnn_graph.h"
|
||||
#include "cldnn_itt.h"
|
||||
#include "intel_gpu/plugin/graph.hpp"
|
||||
#include "intel_gpu/plugin/itt.hpp"
|
||||
#include "intel_gpu/plugin/infer_request.hpp"
|
||||
#include "intel_gpu/plugin/compiled_model.hpp"
|
||||
#include "intel_gpu/plugin/async_infer_request.hpp"
|
||||
|
||||
#include <description_buffer.hpp>
|
||||
#include "cldnn_infer_request.h"
|
||||
#include <threading/ie_executor_manager.hpp>
|
||||
#include "cldnn_async_infer_request.h"
|
||||
#include <fstream>
|
||||
#include <utility>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include "cldnn_executable_network.h"
|
||||
#include "threading/ie_cpu_streams_executor.hpp"
|
||||
#include "cpp_interfaces/interface/ie_iinfer_request_internal.hpp"
|
||||
#include "ie_icore.hpp"
|
||||
|
||||
#include <fstream>
|
||||
#include <utility>
|
||||
#include <sys/types.h>
|
||||
#include <chrono>
|
||||
#include <cmath>
|
||||
#include <algorithm>
|
||||
|
||||
using namespace InferenceEngine;
|
||||
using namespace InferenceEngine::details;
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
CLDNNExecNetwork::CLDNNExecNetwork(InferenceEngine::CNNNetwork &network, std::shared_ptr<RemoteContext> context, Config config) :
|
||||
InferenceEngine::ExecutableNetworkThreadSafeDefault{[&]()->InferenceEngine::ITaskExecutor::Ptr {
|
||||
CompiledModel::CompiledModel(InferenceEngine::CNNNetwork &network, std::shared_ptr<InferenceEngine::RemoteContext> context, Config config) :
|
||||
InferenceEngine::ExecutableNetworkThreadSafeDefault{[&]() -> InferenceEngine::ITaskExecutor::Ptr {
|
||||
if (config.exclusiveAsyncRequests) {
|
||||
//exclusiveAsyncRequests essentially disables the streams (and hence should be checked first) => aligned with the CPU behavior
|
||||
return ExecutorManager::getInstance()->getExecutor("GPU");
|
||||
} else if (config.throughput_streams > 1) {
|
||||
return std::make_shared<InferenceEngine::CPUStreamsExecutor>(
|
||||
IStreamsExecutor::Config{"CLDNNPlugin executor", config.throughput_streams});
|
||||
IStreamsExecutor::Config{"Intel GPU plugin executor", config.throughput_streams});
|
||||
} else {
|
||||
return std::make_shared<InferenceEngine::CPUStreamsExecutor>(
|
||||
IStreamsExecutor::Config{"CLDNNPlugin executor", 1});
|
||||
IStreamsExecutor::Config{"Intel GPU plugin executor", 1});
|
||||
}
|
||||
}()},
|
||||
m_config(config),
|
||||
@ -57,18 +53,18 @@ CLDNNExecNetwork::CLDNNExecNetwork(InferenceEngine::CNNNetwork &network, std::sh
|
||||
|
||||
m_context = casted_context;
|
||||
|
||||
auto graph_base = std::make_shared<CLDNNGraph>(network, m_context, m_config, 0);
|
||||
auto graph_base = std::make_shared<Graph>(network, m_context, m_config, 0);
|
||||
for (uint16_t n = 0; n < m_config.throughput_streams; n++) {
|
||||
auto graph = n == 0 ? graph_base : std::make_shared<CLDNNGraph>(graph_base, n);
|
||||
auto graph = n == 0 ? graph_base : std::make_shared<Graph>(graph_base, n);
|
||||
m_graphs.push_back(graph);
|
||||
}
|
||||
}
|
||||
|
||||
IInferRequestInternal::Ptr CLDNNExecNetwork::CreateInferRequestImpl(InputsDataMap networkInputs,
|
||||
OutputsDataMap networkOutputs) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNExecNetwork::CreateInferRequestImpl");
|
||||
auto ptr = std::make_shared<CLDNNInferRequest>(networkInputs, networkOutputs,
|
||||
std::static_pointer_cast<CLDNNExecNetwork>(shared_from_this()));
|
||||
IInferRequestInternal::Ptr CompiledModel::CreateInferRequestImpl(InputsDataMap networkInputs,
|
||||
OutputsDataMap networkOutputs) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "CompiledModel::CreateInferRequestImpl");
|
||||
auto ptr = std::make_shared<InferRequest>(networkInputs, networkOutputs,
|
||||
std::static_pointer_cast<CompiledModel>(shared_from_this()));
|
||||
if (m_config.throughput_streams > 1) {
|
||||
ptr->EnableStreams();
|
||||
}
|
||||
@ -82,11 +78,11 @@ IInferRequestInternal::Ptr CLDNNExecNetwork::CreateInferRequestImpl(InputsDataMa
|
||||
return ptr;
|
||||
}
|
||||
|
||||
IInferRequestInternal::Ptr CLDNNExecNetwork::CreateInferRequestImpl(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
|
||||
const std::vector<std::shared_ptr<const ov::Node>>& outputs) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNExecNetwork::CreateInferRequestImpl");
|
||||
auto ptr = std::make_shared<CLDNNInferRequest>(inputs, outputs,
|
||||
std::static_pointer_cast<CLDNNExecNetwork>(shared_from_this()));
|
||||
IInferRequestInternal::Ptr CompiledModel::CreateInferRequestImpl(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
|
||||
const std::vector<std::shared_ptr<const ov::Node>>& outputs) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "CompiledModel::CreateInferRequestImpl");
|
||||
auto ptr = std::make_shared<InferRequest>(inputs, outputs,
|
||||
std::static_pointer_cast<CompiledModel>(shared_from_this()));
|
||||
if (m_config.throughput_streams > 1) {
|
||||
ptr->EnableStreams();
|
||||
}
|
||||
@ -101,8 +97,8 @@ IInferRequestInternal::Ptr CLDNNExecNetwork::CreateInferRequestImpl(const std::v
|
||||
return ptr;
|
||||
}
|
||||
|
||||
IInferRequestInternal::Ptr CLDNNExecNetwork::CreateInferRequest() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNExecNetwork::CreateInferRequest");
|
||||
IInferRequestInternal::Ptr CompiledModel::CreateInferRequest() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "CompiledModel::CreateInferRequest");
|
||||
InferenceEngine::IInferRequestInternal::Ptr internalRequest;
|
||||
if (m_graphs.empty()) {
|
||||
IE_THROW(NetworkNotLoaded);
|
||||
@ -123,20 +119,20 @@ IInferRequestInternal::Ptr CLDNNExecNetwork::CreateInferRequest() {
|
||||
if (!internalRequest)
|
||||
internalRequest = CreateInferRequestImpl(_networkInputs, _networkOutputs);
|
||||
internalRequest->setPointerToExecutableNetworkInternal(shared_from_this());
|
||||
return std::make_shared<CLDNNAsyncInferRequest>(std::static_pointer_cast<CLDNNInferRequest>(internalRequest),
|
||||
m_taskExecutor,
|
||||
m_waitExecutor,
|
||||
_callbackExecutor);
|
||||
return std::make_shared<AsyncInferRequest>(std::static_pointer_cast<InferRequest>(internalRequest),
|
||||
m_taskExecutor,
|
||||
m_waitExecutor,
|
||||
_callbackExecutor);
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Function> CLDNNExecNetwork::GetExecGraphInfo() {
|
||||
std::shared_ptr<ngraph::Function> CompiledModel::GetExecGraphInfo() {
|
||||
if (m_graphs.empty())
|
||||
IE_THROW(NetworkNotLoaded);
|
||||
|
||||
return m_graphs.front()->GetExecGraphInfo();
|
||||
}
|
||||
|
||||
InferenceEngine::Parameter CLDNNExecNetwork::GetConfig(const std::string &name) const {
|
||||
InferenceEngine::Parameter CompiledModel::GetConfig(const std::string &name) const {
|
||||
auto it = m_config.key_config_map.find(name);
|
||||
if (it != m_config.key_config_map.end()) {
|
||||
return it->second;
|
||||
@ -145,7 +141,7 @@ InferenceEngine::Parameter CLDNNExecNetwork::GetConfig(const std::string &name)
|
||||
}
|
||||
}
|
||||
|
||||
InferenceEngine::Parameter CLDNNExecNetwork::GetMetric(const std::string &name) const {
|
||||
InferenceEngine::Parameter CompiledModel::GetMetric(const std::string &name) const {
|
||||
if (name == METRIC_KEY(NETWORK_NAME)) {
|
||||
IE_ASSERT(!m_graphs.empty());
|
||||
IE_SET_METRIC_RETURN(NETWORK_NAME, m_graphs[0]->getName());
|
||||
@ -171,8 +167,10 @@ InferenceEngine::Parameter CLDNNExecNetwork::GetMetric(const std::string &name)
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<RemoteContext> CLDNNExecNetwork::GetContext() const {
|
||||
std::shared_ptr<InferenceEngine::RemoteContext> CompiledModel::GetContext() const {
|
||||
return m_context;
|
||||
}
|
||||
|
||||
}; // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
@ -2,7 +2,10 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_custom_layer.h"
|
||||
#include "intel_gpu/plugin/custom_layer.hpp"
|
||||
#include "intel_gpu/plugin/simple_math.hpp"
|
||||
#include "intel_gpu/plugin/itt.hpp"
|
||||
|
||||
#include "xml_parse_utils.h"
|
||||
#include <description_buffer.hpp>
|
||||
#include <map>
|
||||
@ -14,9 +17,6 @@
|
||||
# include <windows.h>
|
||||
#endif
|
||||
|
||||
#include "simple_math.h"
|
||||
#include "cldnn_itt.h"
|
||||
|
||||
using namespace InferenceEngine;
|
||||
using namespace XMLParseUtils;
|
||||
|
||||
@ -29,9 +29,11 @@ using namespace XMLParseUtils;
|
||||
#define CheckIntAttrAndReturnError(node, attr, value) \
|
||||
CheckAndReturnError(GetIntAttr(node, attr, -1) != (value), "Wrong attribute value! expected: " << value << " found: " << GetIntAttr(node, attr, -1))
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
void CLDNNCustomLayer::LoadSingleLayer(const pugi::xml_node & node) {
|
||||
void CustomLayer::LoadSingleLayer(const pugi::xml_node & node) {
|
||||
// Root checks
|
||||
CheckNodeTypeAndReturnError(node, "CustomLayer");
|
||||
CheckStrAttrAndReturnError(node, "type", "SimpleGPU");
|
||||
@ -46,7 +48,7 @@ void CLDNNCustomLayer::LoadSingleLayer(const pugi::xml_node & node) {
|
||||
ProcessWorkSizesNode(node.child("WorkSizes"));
|
||||
}
|
||||
|
||||
void CLDNNCustomLayer::ProcessKernelNode(const pugi::xml_node & node) {
|
||||
void CustomLayer::ProcessKernelNode(const pugi::xml_node & node) {
|
||||
CheckNodeTypeAndReturnError(node, "Kernel");
|
||||
CheckAndReturnError(m_kernelSource.length() > 0, "Multiple definition of Kernel");
|
||||
m_kernelEntry = GetStrAttr(node, "entry", "");
|
||||
@ -89,7 +91,7 @@ void CLDNNCustomLayer::ProcessKernelNode(const pugi::xml_node & node) {
|
||||
}
|
||||
}
|
||||
|
||||
void CLDNNCustomLayer::ProcessBuffersNode(const pugi::xml_node & node) {
|
||||
void CustomLayer::ProcessBuffersNode(const pugi::xml_node & node) {
|
||||
CheckNodeTypeAndReturnError(node, "Buffers");
|
||||
FOREACH_CHILD(tensorNode, node, "Tensor") {
|
||||
KerenlParam kp;
|
||||
@ -120,7 +122,7 @@ void CLDNNCustomLayer::ProcessBuffersNode(const pugi::xml_node & node) {
|
||||
}
|
||||
}
|
||||
|
||||
void CLDNNCustomLayer::ProcessCompilerOptionsNode(const pugi::xml_node & node) {
|
||||
void CustomLayer::ProcessCompilerOptionsNode(const pugi::xml_node & node) {
|
||||
if (node.empty()) {
|
||||
return; // Optional node doesn't exist
|
||||
}
|
||||
@ -129,7 +131,7 @@ void CLDNNCustomLayer::ProcessCompilerOptionsNode(const pugi::xml_node & node) {
|
||||
m_compilerOptions = GetStrAttr(node, "options", "");
|
||||
}
|
||||
|
||||
void CLDNNCustomLayer::ProcessWorkSizesNode(const pugi::xml_node & node) {
|
||||
void CustomLayer::ProcessWorkSizesNode(const pugi::xml_node & node) {
|
||||
if (node.empty()) {
|
||||
return; // Optional node doesn't exist
|
||||
}
|
||||
@ -180,7 +182,7 @@ void CLDNNCustomLayer::ProcessWorkSizesNode(const pugi::xml_node & node) {
|
||||
}
|
||||
}
|
||||
|
||||
bool CLDNNCustomLayer::IsLegalSizeRule(const std::string & rule) {
|
||||
bool CustomLayer::IsLegalSizeRule(const std::string & rule) {
|
||||
SimpleMathExpression expr;
|
||||
expr.SetVariables({
|
||||
{ 'b', 1 }, { 'B', 1 },
|
||||
@ -200,7 +202,7 @@ bool CLDNNCustomLayer::IsLegalSizeRule(const std::string & rule) {
|
||||
return true;
|
||||
}
|
||||
|
||||
cldnn::format CLDNNCustomLayer::FormatFromString(const std::string & str) {
|
||||
cldnn::format CustomLayer::FormatFromString(const std::string & str) {
|
||||
static const std::map<std::string, cldnn::format> FormatNameToType = {
|
||||
{ "BFYX" , cldnn::format::bfyx },
|
||||
{ "bfyx" , cldnn::format::bfyx },
|
||||
@ -224,8 +226,8 @@ cldnn::format CLDNNCustomLayer::FormatFromString(const std::string & str) {
|
||||
return cldnn::format::format_num;
|
||||
}
|
||||
|
||||
void CLDNNCustomLayer::LoadFromFile(const std::string configFile, CLDNNCustomLayerMap& customLayers, bool can_be_missed) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNCustomLayer::LoadFromFile");
|
||||
void CustomLayer::LoadFromFile(const std::string configFile, CustomLayerMap& customLayers, bool can_be_missed) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "CustomLayer::LoadFromFile");
|
||||
pugi::xml_document xmlDoc;
|
||||
pugi::xml_parse_result res = xmlDoc.load_file(configFile.c_str());
|
||||
if (res.status != pugi::status_ok) {
|
||||
@ -267,7 +269,7 @@ void CLDNNCustomLayer::LoadFromFile(const std::string configFile, CLDNNCustomLay
|
||||
}
|
||||
|
||||
for (auto r = xmlDoc.document_element(); r; r = r.next_sibling()) {
|
||||
CLDNNCustomLayerPtr layer = std::make_shared<CLDNNCustomLayer>(CLDNNCustomLayer(dir_path));
|
||||
CustomLayerPtr layer = std::make_shared<CustomLayer>(CustomLayer(dir_path));
|
||||
layer->LoadSingleLayer(r);
|
||||
if (layer->Error()) {
|
||||
customLayers.clear();
|
||||
@ -278,4 +280,6 @@ void CLDNNCustomLayer::LoadFromFile(const std::string configFile, CLDNNCustomLay
|
||||
}
|
||||
}
|
||||
|
||||
}; // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
@ -6,11 +6,11 @@
|
||||
|
||||
#include <cldnn/cldnn_config.hpp>
|
||||
#include <gpu/gpu_config.hpp>
|
||||
#include "cldnn_config.h"
|
||||
#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
|
||||
#include "ie_api.h"
|
||||
#include "file_utils.h"
|
||||
#include "cldnn_itt.h"
|
||||
#include "intel_gpu/plugin/device_config.hpp"
|
||||
#include "intel_gpu/plugin/itt.hpp"
|
||||
#include <ie_system_conf.h>
|
||||
#include <thread>
|
||||
|
||||
@ -25,7 +25,9 @@
|
||||
|
||||
using namespace InferenceEngine;
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static void createDirectory(std::string _path) {
|
||||
#if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
|
||||
@ -57,7 +59,7 @@ static int getNumberOfCores(const IStreamsExecutor::Config::PreferredCoreType co
|
||||
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
void Config::UpdateFromMap(const std::map<std::string, std::string>& configMap) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "Config::UpdateFromMap");
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Config::UpdateFromMap");
|
||||
for (auto& kvp : configMap) {
|
||||
std::string key = kvp.first;
|
||||
std::string val = kvp.second;
|
||||
@ -199,7 +201,7 @@ void Config::UpdateFromMap(const std::map<std::string, std::string>& configMap)
|
||||
std::istream_iterator<std::string> end;
|
||||
std::vector<std::string> configFiles(begin, end);
|
||||
for (auto& file : configFiles) {
|
||||
CLDNNCustomLayer::LoadFromFile(file, customLayers);
|
||||
CustomLayer::LoadFromFile(file, customLayers);
|
||||
}
|
||||
} else if (key.compare(PluginConfigParams::KEY_TUNING_MODE) == 0) {
|
||||
if (val.compare(PluginConfigParams::TUNING_DISABLED) == 0) {
|
||||
@ -329,7 +331,7 @@ void Config::UpdateFromMap(const std::map<std::string, std::string>& configMap)
|
||||
}
|
||||
|
||||
void Config::adjustKeyMapValues() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "Config::AdjustKeyMapValues");
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Config::AdjustKeyMapValues");
|
||||
if (useProfiling)
|
||||
key_config_map[PluginConfigParams::KEY_PERF_COUNT] = PluginConfigParams::YES;
|
||||
else
|
||||
@ -465,4 +467,6 @@ Config& Configs::GetDefaultDeviceConfig() {
|
||||
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
@ -6,10 +6,11 @@
|
||||
#include "intel_gpu/runtime/profiling.hpp"
|
||||
#include "intel_gpu/runtime/debug_configuration.hpp"
|
||||
|
||||
#include "cldnn_graph.h"
|
||||
#include "simple_math.h"
|
||||
#include "intel_gpu/plugin/graph.hpp"
|
||||
#include "intel_gpu/plugin/simple_math.hpp"
|
||||
#include <cldnn/cldnn_config.hpp>
|
||||
#include "cldnn_infer_request.h"
|
||||
#include "intel_gpu/plugin/infer_request.hpp"
|
||||
#include "intel_gpu/plugin/itt.hpp"
|
||||
|
||||
#include <description_buffer.hpp>
|
||||
#include <threading/ie_executor_manager.hpp>
|
||||
@ -33,14 +34,15 @@
|
||||
#include <ie_ngraph_utils.hpp>
|
||||
#include <ngraph/variant.hpp>
|
||||
#include <ngraph/ngraph.hpp>
|
||||
#include "cldnn_itt.h"
|
||||
|
||||
using namespace InferenceEngine;
|
||||
using namespace InferenceEngine::details;
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
CLDNNGraph::CLDNNGraph(InferenceEngine::CNNNetwork& network, gpu::ClContext::Ptr context, Config config, uint16_t stream_id)
|
||||
Graph::Graph(InferenceEngine::CNNNetwork& network, gpu::ClContext::Ptr context, Config config, uint16_t stream_id)
|
||||
: m_context(context)
|
||||
, m_networkName(network.getName())
|
||||
, m_config(config)
|
||||
@ -50,7 +52,7 @@ CLDNNGraph::CLDNNGraph(InferenceEngine::CNNNetwork& network, gpu::ClContext::Ptr
|
||||
Build();
|
||||
}
|
||||
|
||||
CLDNNGraph::CLDNNGraph(std::shared_ptr<CLDNNGraph> graph, uint16_t stream_id)
|
||||
Graph::Graph(std::shared_ptr<Graph> graph, uint16_t stream_id)
|
||||
: m_context(graph->m_context)
|
||||
, m_program(graph->m_program)
|
||||
, m_networkName(graph->m_networkName)
|
||||
@ -60,8 +62,8 @@ CLDNNGraph::CLDNNGraph(std::shared_ptr<CLDNNGraph> graph, uint16_t stream_id)
|
||||
Build();
|
||||
}
|
||||
|
||||
void CLDNNGraph::UpdateLayersMaps() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNGraph::UpdateLayersMaps");
|
||||
void Graph::UpdateLayersMaps() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Graph::UpdateLayersMaps");
|
||||
primitiveIDs = m_program->primitiveIDs;
|
||||
prevPrimitiveIDs = m_program->prevPrimitiveIDs;
|
||||
profilingIDs = m_program->profilingIDs;
|
||||
@ -69,8 +71,8 @@ void CLDNNGraph::UpdateLayersMaps() {
|
||||
outputDims = m_program->outputDims;
|
||||
}
|
||||
|
||||
void CLDNNGraph::Build() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNGraph::Build");
|
||||
void Graph::Build() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Graph::Build");
|
||||
UpdateLayersMaps();
|
||||
|
||||
if (GetMaxDynamicBatchSize() > 1) {
|
||||
@ -92,13 +94,13 @@ void CLDNNGraph::Build() {
|
||||
}
|
||||
}
|
||||
|
||||
bool CLDNNGraph::use_external_queue() const {
|
||||
bool Graph::use_external_queue() const {
|
||||
auto impl = getContextImpl(m_context);
|
||||
return impl->GetExternalQueue() != nullptr;
|
||||
}
|
||||
|
||||
std::shared_ptr<cldnn::network> CLDNNGraph::BuildNetwork(std::shared_ptr<cldnn::program> program) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNGraph::BuildNetwork");
|
||||
std::shared_ptr<cldnn::network> Graph::BuildNetwork(std::shared_ptr<cldnn::program> program) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Graph::BuildNetwork");
|
||||
std::shared_ptr<cldnn::network> network = nullptr;
|
||||
|
||||
auto impl = getContextImpl(m_context);
|
||||
@ -129,9 +131,9 @@ std::shared_ptr<cldnn::network> CLDNNGraph::BuildNetwork(std::shared_ptr<cldnn::
|
||||
return network;
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Function> CLDNNGraph::GetExecGraphInfoByPrimitivesInfo(std::vector<cldnn::primitive_info>& primitives_info,
|
||||
bool filter_const_primitives) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNGraph::GetExecGraphInfoByPrimitivesInfo");
|
||||
std::shared_ptr<ngraph::Function> Graph::GetExecGraphInfoByPrimitivesInfo(std::vector<cldnn::primitive_info>& primitives_info,
|
||||
bool filter_const_primitives) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Graph::GetExecGraphInfoByPrimitivesInfo");
|
||||
if (m_config.useProfiling) {
|
||||
try {
|
||||
// Update may throw an exception for step-by-step runtime graph dump,
|
||||
@ -289,7 +291,7 @@ std::shared_ptr<ngraph::Function> CLDNNGraph::GetExecGraphInfoByPrimitivesInfo(s
|
||||
auto desc_from_layout = [&](cldnn::layout layout) -> TensorDesc {
|
||||
Precision precision = data_type_to_precision(layout.data_type);
|
||||
SizeVector dims;
|
||||
Layout l = Layout::NCHW;
|
||||
auto l = InferenceEngine::Layout::NCHW;
|
||||
auto size = layout.size;
|
||||
if (layout.format.dimension() == 4) {
|
||||
dims = {static_cast<size_t>(size.batch[0]),
|
||||
@ -302,7 +304,7 @@ std::shared_ptr<ngraph::Function> CLDNNGraph::GetExecGraphInfoByPrimitivesInfo(s
|
||||
static_cast<size_t>(size.spatial[2]),
|
||||
static_cast<size_t>(size.spatial[1]),
|
||||
static_cast<size_t>(size.spatial[0])};
|
||||
l = Layout::NCDHW;
|
||||
l = InferenceEngine::Layout::NCDHW;
|
||||
} else if (layout.format.dimension() == 6) {
|
||||
dims = {static_cast<size_t>(size.batch[0]),
|
||||
static_cast<size_t>(size.feature[0]),
|
||||
@ -311,7 +313,7 @@ std::shared_ptr<ngraph::Function> CLDNNGraph::GetExecGraphInfoByPrimitivesInfo(s
|
||||
static_cast<size_t>(size.spatial[1]),
|
||||
static_cast<size_t>(size.spatial[0])};
|
||||
// Should be NC?DHW but there is no such layout yet
|
||||
l = Layout::BLOCKED;
|
||||
l = InferenceEngine::Layout::BLOCKED;
|
||||
}
|
||||
TensorDesc dst{precision, dims, l};
|
||||
return dst;
|
||||
@ -465,14 +467,14 @@ std::shared_ptr<ngraph::Function> CLDNNGraph::GetExecGraphInfoByPrimitivesInfo(s
|
||||
return std::make_shared<ngraph::Function>(results, params, "runtime_gpu_graph");
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Function> CLDNNGraph::GetExecGraphInfo() {
|
||||
std::shared_ptr<ngraph::Function> Graph::GetExecGraphInfo() {
|
||||
auto primitives_info = GetNetwork()->get_primitives_info();
|
||||
return GetExecGraphInfoByPrimitivesInfo(primitives_info, true);
|
||||
}
|
||||
|
||||
|
||||
void CLDNNGraph::UpdatePerfStatistics() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNGraph::UpdatePerfStatistics");
|
||||
void Graph::UpdatePerfStatistics() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Graph::UpdatePerfStatistics");
|
||||
if (GetNetworksCount() == 0) {
|
||||
return;
|
||||
}
|
||||
@ -539,12 +541,12 @@ void CLDNNGraph::UpdatePerfStatistics() {
|
||||
}
|
||||
}
|
||||
|
||||
bool CLDNNGraph::IsLoaded() const {
|
||||
bool Graph::IsLoaded() const {
|
||||
return GetNetwork() != nullptr;
|
||||
}
|
||||
|
||||
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> CLDNNGraph::GetPerformanceCounts() const {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNGraph::GetPerformanceCounts");
|
||||
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> Graph::GetPerformanceCounts() const {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Graph::GetPerformanceCounts");
|
||||
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> result;
|
||||
bool combinePrimByIRLayers = false;
|
||||
unsigned i = 0;
|
||||
@ -624,7 +626,7 @@ std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> CLDNNGraph::G
|
||||
return true;
|
||||
};
|
||||
|
||||
// Step 1. Get all primitives in execution order which was added by clDNNPlugin
|
||||
// Step 1. Get all primitives in execution order which was added by GPU plugin
|
||||
for (auto& primId : profilingIDs) {
|
||||
getFromProfiling(primId);
|
||||
}
|
||||
@ -693,7 +695,7 @@ std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> CLDNNGraph::G
|
||||
}
|
||||
}
|
||||
|
||||
// Step 3. Checking primitives which has been deleted from execution order but added by clDNNPlugin
|
||||
// Step 3. Checking primitives which has been deleted from execution order but added by GPU plugin
|
||||
for (auto& primId : profilingIDs) {
|
||||
if (std::find(allIds.begin(), allIds.end(), primId) == allIds.end()) {
|
||||
getFromProfiling(primId);
|
||||
@ -718,7 +720,7 @@ std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> CLDNNGraph::G
|
||||
return result;
|
||||
}
|
||||
|
||||
std::shared_ptr<cldnn::network> CLDNNGraph::GetNetwork(size_t idx) const {
|
||||
std::shared_ptr<cldnn::network> Graph::GetNetwork(size_t idx) const {
|
||||
if (idx >= GetNetworksCount())
|
||||
IE_THROW() << "Unable to find network with id=" << idx << ". Stored networks count: " << GetNetworksCount();
|
||||
|
||||
@ -726,7 +728,7 @@ std::shared_ptr<cldnn::network> CLDNNGraph::GetNetwork(size_t idx) const {
|
||||
}
|
||||
|
||||
|
||||
std::string CLDNNGraph::MapOutputName(std::string outName) const {
|
||||
std::string Graph::MapOutputName(std::string outName) const {
|
||||
auto networkOutputsIDs = GetNetwork()->get_output_ids();
|
||||
auto allPrimitiveIds = GetNetwork()->get_all_primitives();
|
||||
|
||||
@ -751,7 +753,7 @@ std::string CLDNNGraph::MapOutputName(std::string outName) const {
|
||||
return outputID;
|
||||
}
|
||||
|
||||
InferenceEngine::SizeVector CLDNNGraph::GetOutputSize(std::string outName) const {
|
||||
InferenceEngine::SizeVector Graph::GetOutputSize(std::string outName) const {
|
||||
auto res_output = outputDims.find(outName);
|
||||
|
||||
InferenceEngine::SizeVector sz;
|
||||
@ -763,4 +765,6 @@ InferenceEngine::SizeVector CLDNNGraph::GetOutputSize(std::string outName) const
|
||||
return sz;
|
||||
}
|
||||
|
||||
}; // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
@ -8,10 +8,10 @@
|
||||
#include <functional>
|
||||
#include <utility>
|
||||
#include <description_buffer.hpp>
|
||||
#include "cldnn_infer_request.h"
|
||||
#include "cldnn_remote_context.h"
|
||||
#include "cldnn_executable_network.h"
|
||||
#include "cldnn_itt.h"
|
||||
#include "intel_gpu/plugin/infer_request.hpp"
|
||||
#include "intel_gpu/plugin/remote_context.hpp"
|
||||
#include "intel_gpu/plugin/compiled_model.hpp"
|
||||
#include "intel_gpu/plugin/itt.hpp"
|
||||
#include "intel_gpu/runtime/debug_configuration.hpp"
|
||||
#include <ie_algorithm.hpp>
|
||||
#include <debug.h>
|
||||
@ -47,7 +47,7 @@ void copyToFloat(float* dst, const InferenceEngine::Blob* src) {
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void copyResultToOutputBlob(cldnn::memory::ptr src, Blob::Ptr dst, CLDNNPlugin::buf_info* bi, cldnn::stream& stream) {
|
||||
void copyResultToOutputBlob(cldnn::memory::ptr src, Blob::Ptr dst, ov::runtime::intel_gpu::buf_info* bi, cldnn::stream& stream) {
|
||||
size_t n = (bi == nullptr) ? dst->size() : bi->buf_size;
|
||||
size_t offset = (bi == nullptr) ? 0 : bi->buf_offset;
|
||||
|
||||
@ -89,7 +89,7 @@ inline void checkAlloc(const Blob::Ptr& blob, const std::string& err_str) {
|
||||
if (!blob->is<gpu::ClBlob>()) {
|
||||
not_allocated = (blob->buffer() == nullptr);
|
||||
} else {
|
||||
not_allocated = !CLDNNPlugin::getBlobImpl(blob->as<gpu::ClBlob>())->is_allocated();
|
||||
not_allocated = !ov::runtime::intel_gpu::getBlobImpl(blob->as<gpu::ClBlob>())->is_allocated();
|
||||
}
|
||||
if (not_allocated) {
|
||||
IE_THROW(NotAllocated) << err_str;
|
||||
@ -173,14 +173,16 @@ bool same_host_mem(cldnn::memory::ptr memPtr, uint8_t* hostPtr) {
|
||||
|
||||
} // namespace
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
|
||||
// ----------------------------------------------------------------------------------------- //
|
||||
// ---------------------------- IE API impl ------------------------------------------------ //
|
||||
// ----------------------------------------------------------------------------------------- //
|
||||
Blob::Ptr CLDNNInferRequest::GetBlob(const std::string& name) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::GetBlob");
|
||||
Blob::Ptr InferRequest::GetBlob(const std::string& name) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::GetBlob");
|
||||
Blob::Ptr data;
|
||||
InputInfo::Ptr foundInput;
|
||||
DataPtr foundOutput;
|
||||
@ -202,8 +204,8 @@ Blob::Ptr CLDNNInferRequest::GetBlob(const std::string& name) {
|
||||
return data;
|
||||
}
|
||||
|
||||
void CLDNNInferRequest::SetBlob(const std::string& name, const Blob::Ptr& data) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::SetBlob");
|
||||
void InferRequest::SetBlob(const std::string& name, const Blob::Ptr& data) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::SetBlob");
|
||||
|
||||
// perform all common checks first
|
||||
if (name.empty()) {
|
||||
@ -339,8 +341,8 @@ void CLDNNInferRequest::SetBlob(const std::string& name, const Blob::Ptr& data)
|
||||
}
|
||||
}
|
||||
|
||||
void CLDNNInferRequest::checkBlobs() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::checkBlobs");
|
||||
void InferRequest::checkBlobs() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::checkBlobs");
|
||||
for (auto const &input : _inputs) {
|
||||
InputInfo::Ptr foundInput = nullptr;
|
||||
auto foundInputPair = std::find_if(std::begin(_networkInputs), std::end(_networkInputs),
|
||||
@ -369,8 +371,8 @@ void CLDNNInferRequest::checkBlobs() {
|
||||
}
|
||||
}
|
||||
|
||||
void CLDNNInferRequest::SetGraph(std::shared_ptr<CLDNNPlugin::CLDNNGraph> graph) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::SetGraph");
|
||||
void InferRequest::SetGraph(std::shared_ptr<Graph> graph) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::SetGraph");
|
||||
m_graph = graph;
|
||||
|
||||
if (m_graph == nullptr) {
|
||||
@ -387,8 +389,8 @@ void CLDNNInferRequest::SetGraph(std::shared_ptr<CLDNNPlugin::CLDNNGraph> graph)
|
||||
}
|
||||
}
|
||||
|
||||
void CLDNNInferRequest::SetBatch(int new_batch) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::SetBatch");
|
||||
void InferRequest::SetBatch(int new_batch) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::SetBatch");
|
||||
if (m_graph->GetMaxDynamicBatchSize() < 0)
|
||||
IE_THROW() << "Dynamic batch is not enabled.";
|
||||
|
||||
@ -456,16 +458,16 @@ void CLDNNInferRequest::SetBatch(int new_batch) {
|
||||
m_curBatch = new_batch;
|
||||
}
|
||||
|
||||
CLDNNInferRequest::CLDNNInferRequest(InputsDataMap networkInputs, OutputsDataMap networkOutputs,
|
||||
const CLDNNExecNetwork::Ptr& execNetwork)
|
||||
InferRequest::InferRequest(InputsDataMap networkInputs, OutputsDataMap networkOutputs,
|
||||
const CompiledModel::Ptr& execNetwork)
|
||||
: IInferRequestInternal(networkInputs, networkOutputs) {
|
||||
IE_ASSERT(nullptr != execNetwork);
|
||||
streamExecutor = dynamic_cast<InferenceEngine::IStreamsExecutor*>(execNetwork->m_taskExecutor.get());
|
||||
}
|
||||
|
||||
CLDNNInferRequest::CLDNNInferRequest(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
|
||||
InferRequest::InferRequest(const std::vector<std::shared_ptr<const ov::Node>>& inputs,
|
||||
const std::vector<std::shared_ptr<const ov::Node>>& outputs,
|
||||
const CLDNNExecNetwork::Ptr& execNetwork)
|
||||
const CompiledModel::Ptr& execNetwork)
|
||||
: IInferRequestInternal(inputs, outputs) {
|
||||
IE_ASSERT(nullptr != execNetwork);
|
||||
streamExecutor = dynamic_cast<InferenceEngine::IStreamsExecutor*>(execNetwork->m_taskExecutor.get());
|
||||
@ -474,17 +476,17 @@ CLDNNInferRequest::CLDNNInferRequest(const std::vector<std::shared_ptr<const ov:
|
||||
// ----------------------------------------------------------------------------------------- //
|
||||
// ---------------------------- internal pipeline stages ----------------------------------- //
|
||||
// ----------------------------------------------------------------------------------------- //
|
||||
void CLDNNInferRequest::preprocess_notify() {
|
||||
m_graph->wait(CLDNNGraph::Stage::PREPROC);
|
||||
void InferRequest::preprocess_notify() {
|
||||
m_graph->wait(Graph::Stage::PREPROC);
|
||||
if (m_graph->GetMaxDynamicBatchSize() > 1) {
|
||||
preprocess_dynamic();
|
||||
} else {
|
||||
execDataPreprocessing(_inputs, true); // "true" stands for serial preprocessing in case of OpenMP
|
||||
}
|
||||
m_graph->notify(CLDNNGraph::Stage::PREPROC);
|
||||
m_graph->notify(Graph::Stage::PREPROC);
|
||||
}
|
||||
|
||||
void CLDNNInferRequest::preprocess() {
|
||||
void InferRequest::preprocess() {
|
||||
if (m_graph->GetMaxDynamicBatchSize() > 1) {
|
||||
preprocess_dynamic();
|
||||
} else {
|
||||
@ -492,12 +494,12 @@ void CLDNNInferRequest::preprocess() {
|
||||
}
|
||||
}
|
||||
|
||||
void CLDNNInferRequest::enqueue_notify() {
|
||||
m_graph->wait(CLDNNGraph::Stage::EXECUTE);
|
||||
void InferRequest::enqueue_notify() {
|
||||
m_graph->wait(Graph::Stage::EXECUTE);
|
||||
enqueue();
|
||||
}
|
||||
|
||||
void CLDNNInferRequest::enqueue() {
|
||||
void InferRequest::enqueue() {
|
||||
if (m_graph->GetMaxDynamicBatchSize() > 1) {
|
||||
enqueue_dynamic();
|
||||
return;
|
||||
@ -546,12 +548,12 @@ void CLDNNInferRequest::enqueue() {
|
||||
internal_outputs = m_graph->GetNetwork()->execute(dependencies);
|
||||
}
|
||||
|
||||
void CLDNNInferRequest::wait_notify() {
|
||||
void InferRequest::wait_notify() {
|
||||
wait();
|
||||
m_graph->notify(CLDNNGraph::Stage::EXECUTE);
|
||||
m_graph->notify(Graph::Stage::EXECUTE);
|
||||
}
|
||||
|
||||
void CLDNNInferRequest::wait() {
|
||||
void InferRequest::wait() {
|
||||
if (m_graph->GetMaxDynamicBatchSize() > 1) {
|
||||
wait_dynamic();
|
||||
return;
|
||||
@ -588,12 +590,12 @@ void CLDNNInferRequest::wait() {
|
||||
}
|
||||
}
|
||||
|
||||
void CLDNNInferRequest::preprocess_dynamic() {
|
||||
void InferRequest::preprocess_dynamic() {
|
||||
// execute input pre-processing.
|
||||
execDataPreprocessing(_inputs, true); // "true" stands for serial preprocessing in case of OpenMP
|
||||
}
|
||||
|
||||
void CLDNNInferRequest::enqueue_dynamic() {
|
||||
void InferRequest::enqueue_dynamic() {
|
||||
internal_outputs_dynamic.clear();
|
||||
auto numNets = m_graph->GetNetworksCount();
|
||||
internal_outputs_dynamic.resize(numNets);
|
||||
@ -616,7 +618,7 @@ void CLDNNInferRequest::enqueue_dynamic() {
|
||||
}
|
||||
}
|
||||
|
||||
void CLDNNInferRequest::wait_dynamic() {
|
||||
void InferRequest::wait_dynamic() {
|
||||
if (internal_outputs_dynamic.empty()) {
|
||||
IE_THROW() << "Inference was not started!\n";
|
||||
}
|
||||
@ -640,9 +642,9 @@ void CLDNNInferRequest::wait_dynamic() {
|
||||
// ----------------------------------------------------------------------------------------- //
|
||||
// ---------------------------- internal utils --------- ----------------------------------- //
|
||||
// ----------------------------------------------------------------------------------------- //
|
||||
void CLDNNInferRequest::setup_stream_graph() {
|
||||
void InferRequest::setup_stream_graph() {
|
||||
int streamID = 0;
|
||||
auto& streamGraphs = static_cast<CLDNNExecNetwork*>(_exeNetwork.get())->m_graphs;
|
||||
auto& streamGraphs = static_cast<CompiledModel*>(_exeNetwork.get())->m_graphs;
|
||||
if (nullptr != streamExecutor) {
|
||||
streamID = streamExecutor->GetStreamId();
|
||||
int numGraphs = streamGraphs.size();
|
||||
@ -651,8 +653,8 @@ void CLDNNInferRequest::setup_stream_graph() {
|
||||
m_graph = streamGraphs[streamID];
|
||||
}
|
||||
|
||||
Blob::Ptr CLDNNInferRequest::create_host_blob(const TensorDesc& desc, uint8_t* mem_ptr) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::create_host_blob");
|
||||
Blob::Ptr InferRequest::create_host_blob(const TensorDesc& desc, uint8_t* mem_ptr) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::create_host_blob");
|
||||
const Precision& p = desc.getPrecision();
|
||||
|
||||
switch (p) {
|
||||
@ -706,8 +708,8 @@ Blob::Ptr CLDNNInferRequest::create_host_blob(const TensorDesc& desc, uint8_t* m
|
||||
}
|
||||
}
|
||||
|
||||
void CLDNNInferRequest::copy_output_data(cldnn::memory::ptr src, Blob::Ptr dst, buf_info* bi) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::copy_output_data");
|
||||
void InferRequest::copy_output_data(cldnn::memory::ptr src, Blob::Ptr dst, buf_info* bi) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::copy_output_data");
|
||||
auto& stream = m_graph->GetNetwork()->get_stream();
|
||||
switch (dst->getTensorDesc().getPrecision()) {
|
||||
case Precision::FP32: copyResultToOutputBlob<float>(src, dst, bi, stream); break;
|
||||
@ -720,11 +722,11 @@ void CLDNNInferRequest::copy_output_data(cldnn::memory::ptr src, Blob::Ptr dst,
|
||||
}
|
||||
}
|
||||
|
||||
void CLDNNInferRequest::copy_input_data(std::shared_ptr<cldnn::network> network,
|
||||
void InferRequest::copy_input_data(std::shared_ptr<cldnn::network> network,
|
||||
const cldnn::primitive_id &inputName,
|
||||
const cldnn::layout& inputLayout,
|
||||
const Blob &inputBlob, buf_info* bi) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::copy_input_data");
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::copy_input_data");
|
||||
|
||||
size_t offset = (bi == nullptr) ? 0 : bi->buf_offset;
|
||||
|
||||
@ -771,7 +773,7 @@ void CLDNNInferRequest::copy_input_data(std::shared_ptr<cldnn::network> network,
|
||||
}
|
||||
}
|
||||
|
||||
Blob::Ptr CLDNNInferRequest::host_blob_from_device_blob(Blob::Ptr blobPtr) {
|
||||
Blob::Ptr InferRequest::host_blob_from_device_blob(Blob::Ptr blobPtr) {
|
||||
uint8_t* bufferMem = nullptr;
|
||||
auto clblobPtr = std::dynamic_pointer_cast<InferenceEngine::gpu::ClBlob>(blobPtr);
|
||||
if (clblobPtr) {
|
||||
@ -786,8 +788,8 @@ Blob::Ptr CLDNNInferRequest::host_blob_from_device_blob(Blob::Ptr blobPtr) {
|
||||
return hostBlob;
|
||||
}
|
||||
|
||||
void CLDNNInferRequest::allocate_inputs() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::allocate_inputs");
|
||||
void InferRequest::allocate_inputs() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::allocate_inputs");
|
||||
auto inputLayouts = m_graph->GetInputLayouts();
|
||||
// allocate inputs
|
||||
for (auto& ni : _networkInputs) {
|
||||
@ -823,8 +825,8 @@ void CLDNNInferRequest::allocate_inputs() {
|
||||
}
|
||||
}
|
||||
|
||||
void CLDNNInferRequest::allocate_inputs_dynamic() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::allocate_inputs_dynamic");
|
||||
void InferRequest::allocate_inputs_dynamic() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::allocate_inputs_dynamic");
|
||||
// allocate inputs
|
||||
for (auto &input : m_graph->GetInputLayouts()) {
|
||||
InputInfo::Ptr ni = _networkInputs.at(input.first);
|
||||
@ -849,8 +851,8 @@ void CLDNNInferRequest::allocate_inputs_dynamic() {
|
||||
}
|
||||
}
|
||||
|
||||
void CLDNNInferRequest::allocate_outputs() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::allocate_outputs");
|
||||
void InferRequest::allocate_outputs() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::allocate_outputs");
|
||||
// allocate outputs
|
||||
for (auto& no : _networkOutputs) {
|
||||
std::string outputID = m_graph->MapOutputName(no.first);
|
||||
@ -868,8 +870,8 @@ void CLDNNInferRequest::allocate_outputs() {
|
||||
}
|
||||
}
|
||||
|
||||
void CLDNNInferRequest::allocate_outputs_dynamic() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::allocate_outputs_dynamic");
|
||||
void InferRequest::allocate_outputs_dynamic() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::allocate_outputs_dynamic");
|
||||
// allocate outputs
|
||||
for (auto& no : _networkOutputs) {
|
||||
std::string outputID = m_graph->MapOutputName(no.first);
|
||||
@ -890,8 +892,8 @@ void CLDNNInferRequest::allocate_outputs_dynamic() {
|
||||
}
|
||||
}
|
||||
|
||||
void CLDNNInferRequest::InferImpl() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::InferImpl");
|
||||
void InferRequest::InferImpl() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::InferImpl");
|
||||
setup_stream_graph();
|
||||
std::lock_guard<std::mutex> lk(m_graph->get_mutex());
|
||||
preprocess();
|
||||
@ -899,8 +901,8 @@ void CLDNNInferRequest::InferImpl() {
|
||||
wait();
|
||||
}
|
||||
|
||||
std::map<std::string, InferenceEngineProfileInfo> CLDNNInferRequest::GetPerformanceCounts() const {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::GetPerformanceCounts");
|
||||
std::map<std::string, InferenceEngineProfileInfo> InferRequest::GetPerformanceCounts() const {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::GetPerformanceCounts");
|
||||
if (!m_useProfiling) {
|
||||
IE_THROW() << "Performance counters were not enabled";
|
||||
} else {
|
||||
@ -908,9 +910,9 @@ std::map<std::string, InferenceEngineProfileInfo> CLDNNInferRequest::GetPerforma
|
||||
}
|
||||
}
|
||||
|
||||
void CLDNNInferRequest::prepare_input(const cldnn::primitive_id& inputName, Blob::Ptr& inputBlob,
|
||||
void InferRequest::prepare_input(const cldnn::primitive_id& inputName, Blob::Ptr& inputBlob,
|
||||
std::vector<cldnn::event::ptr>& dependencies) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::prepare_input");
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::prepare_input");
|
||||
auto inputLayoutItr = m_graph->GetInputLayouts().find(inputName);
|
||||
if (inputLayoutItr == m_graph->GetInputLayouts().end()) {
|
||||
IE_THROW() << "Input name mismatch.";
|
||||
@ -943,7 +945,7 @@ void CLDNNInferRequest::prepare_input(const cldnn::primitive_id& inputName, Blob
|
||||
|
||||
if (!is_dev_input) {
|
||||
if (prec == Precision::I16 || prec == Precision::U16) {
|
||||
// clDNN doesn't support I16 input precision,
|
||||
// GPU plugin doesn't support I16 input precision,
|
||||
// so have to convert input data to fp32 precision
|
||||
cldnn::mem_lock<float> ptr{ inputMem, stream };
|
||||
if (prec == Precision::I16) {
|
||||
@ -968,8 +970,8 @@ void CLDNNInferRequest::prepare_input(const cldnn::primitive_id& inputName, Blob
|
||||
}
|
||||
}
|
||||
|
||||
void CLDNNInferRequest::prepare_output(const cldnn::primitive_id& outputName, Blob::Ptr& outputBlob) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNInferRequest::prepare_output");
|
||||
void InferRequest::prepare_output(const cldnn::primitive_id& outputName, Blob::Ptr& outputBlob) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "InferRequest::prepare_output");
|
||||
Blob::Ptr reqBlob = _deviceOutputs.at(outputName);
|
||||
cldnn::primitive_id internalName = outputsMap[outputName];
|
||||
auto _nw_ptr = m_graph->GetNetwork();
|
||||
@ -985,26 +987,28 @@ void CLDNNInferRequest::prepare_output(const cldnn::primitive_id& outputName, Bl
|
||||
_nw_ptr->set_output_memory(internalName, outputMem);
|
||||
}
|
||||
|
||||
InferenceEngine::Blob::Ptr CLDNNInferRequest::create_device_blob(const InferenceEngine::TensorDesc& desc, const cldnn::layout& layout) {
|
||||
InferenceEngine::Blob::Ptr InferRequest::create_device_blob(const InferenceEngine::TensorDesc& desc, const cldnn::layout& layout) {
|
||||
if (m_graph->GetEngine()->use_unified_shared_memory()) {
|
||||
auto blobPtr = std::make_shared<CLDNNRemoteUSMbuffer>(m_graph->GetContext(),
|
||||
m_graph->GetNetwork()->get_stream(),
|
||||
desc,
|
||||
layout,
|
||||
nullptr,
|
||||
0,
|
||||
0,
|
||||
CLDNNRemoteBlobImpl::BlobType::BT_USM_HOST_INTERNAL);
|
||||
auto blobPtr = std::make_shared<RemoteUSMbuffer>(m_graph->GetContext(),
|
||||
m_graph->GetNetwork()->get_stream(),
|
||||
desc,
|
||||
layout,
|
||||
nullptr,
|
||||
0,
|
||||
0,
|
||||
RemoteBlobImpl::BlobType::BT_USM_HOST_INTERNAL);
|
||||
getBlobImpl(blobPtr.get())->allocate();
|
||||
return blobPtr;
|
||||
} else {
|
||||
auto blobPtr = std::make_shared<CLDNNRemoteCLbuffer>(m_graph->GetContext(),
|
||||
m_graph->GetNetwork()->get_stream(),
|
||||
desc,
|
||||
layout);
|
||||
auto blobPtr = std::make_shared<RemoteCLbuffer>(m_graph->GetContext(),
|
||||
m_graph->GetNetwork()->get_stream(),
|
||||
desc,
|
||||
layout);
|
||||
getBlobImpl(blobPtr.get())->allocate();
|
||||
return blobPtr;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
@ -2,15 +2,17 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/batch_to_space.hpp"
|
||||
#include "ngraph/op/constant.hpp"
|
||||
|
||||
#include "intel_gpu/primitives/batch_to_space.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static void CreateBatchToSpaceOp(Program& p, const std::shared_ptr<ngraph::op::v1::BatchToSpace>& op) {
|
||||
p.ValidateInputs(op, {4});
|
||||
@ -35,7 +37,7 @@ static void CreateBatchToSpaceOp(Program& p, const std::shared_ptr<ngraph::op::v
|
||||
}
|
||||
inputs.emplace_back(format, sizes, default_size);
|
||||
}
|
||||
auto out_size = CldnnTensorFromIEDims(op->get_output_shape(0));
|
||||
auto out_size = tensor_from_dims(op->get_output_shape(0));
|
||||
|
||||
auto batchToSpacePrim = cldnn::batch_to_space(layerName,
|
||||
inputPrimitives[0], // input
|
||||
@ -51,4 +53,6 @@ static void CreateBatchToSpaceOp(Program& p, const std::shared_ptr<ngraph::op::v
|
||||
|
||||
REGISTER_FACTORY_IMPL(v1, BatchToSpace);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/broadcast.hpp"
|
||||
#include "ngraph/op/constant.hpp"
|
||||
@ -12,7 +12,9 @@
|
||||
#include "intel_gpu/primitives/reorder.hpp"
|
||||
#include "intel_gpu/primitives/reshape.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static void CreateCommonBroadcastOp(Program& p, const std::shared_ptr<ngraph::Node>& op, const ngraph::AxisSet axis_mapping) {
|
||||
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
|
||||
@ -69,7 +71,7 @@ static void CreateCommonBroadcastOp(Program& p, const std::shared_ptr<ngraph::No
|
||||
inputShape = tmp_shape;
|
||||
}
|
||||
|
||||
auto targetShape = CldnnTensorFromIEDims(inputShape);
|
||||
auto targetShape = tensor_from_dims(inputShape);
|
||||
|
||||
auto reshapePrim = cldnn::reshape(reshapeName, inputPrimitive, targetShape, op->get_friendly_name());
|
||||
p.AddPrimitive(reshapePrim);
|
||||
@ -80,7 +82,7 @@ static void CreateCommonBroadcastOp(Program& p, const std::shared_ptr<ngraph::No
|
||||
|
||||
auto broadcastPrim = cldnn::broadcast(layerName,
|
||||
inputPrimitive,
|
||||
CldnnTensorFromIEDims(op->get_output_shape(0)),
|
||||
tensor_from_dims(op->get_output_shape(0)),
|
||||
{},
|
||||
op->get_friendly_name());
|
||||
|
||||
@ -119,4 +121,6 @@ static void CreateBroadcastOp(Program& p, const std::shared_ptr<ngraph::op::v3::
|
||||
REGISTER_FACTORY_IMPL(v1, Broadcast);
|
||||
REGISTER_FACTORY_IMPL(v3, Broadcast);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,21 +2,23 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/concat.hpp"
|
||||
|
||||
#include "intel_gpu/primitives/concatenation.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static cldnn::concatenation::concatenation_axis GetConcatAxis(int32_t axis, size_t rank) {
|
||||
unsigned cldnn_axis = axis >= 0 ? axis : axis + static_cast<int32_t>(rank);
|
||||
if (cldnn_axis >= rank)
|
||||
IE_THROW() << "Concatenation axis exceeds number of dimensions";
|
||||
|
||||
// Difference in dimension ordering between IE and clDNN,
|
||||
// Difference in dimension ordering between IE and GPU plugin,
|
||||
// reverse spatial dimensions after batch and feature.
|
||||
if (cldnn_axis >= 2) {
|
||||
auto spatial_axis = cldnn_axis - 2;
|
||||
@ -54,4 +56,6 @@ static void CreateConcatOp(Program& p, const std::shared_ptr<ngraph::op::v0::Con
|
||||
|
||||
REGISTER_FACTORY_IMPL(v0, Concat);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/constant.hpp"
|
||||
#include "ngraph/op/convolution.hpp"
|
||||
@ -20,7 +20,9 @@
|
||||
#include "intel_gpu/primitives/data.hpp"
|
||||
#include "intel_gpu/runtime/debug_configuration.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static cldnn::tensor getConstTensor(const ngraph::Shape constDims) {
|
||||
cldnn::tensor constTensor;
|
||||
@ -216,4 +218,6 @@ void createClDnnConstant(Program& p, const ngraph::Shape& constDims, const std::
|
||||
|
||||
REGISTER_FACTORY_IMPL(v0, Constant);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,15 +2,17 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/convert.hpp"
|
||||
#include "ngraph/op/convert_like.hpp"
|
||||
|
||||
#include "intel_gpu/primitives/reorder.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static void CreateConvertLikeOp(Program& p, const std::shared_ptr<ngraph::op::v1::ConvertLike>& op) {
|
||||
p.ValidateInputs(op, {2});
|
||||
@ -52,4 +54,6 @@ static void CreateConvertOp(Program& p, const std::shared_ptr<ngraph::op::v0::Co
|
||||
REGISTER_FACTORY_IMPL(v0, Convert);
|
||||
REGISTER_FACTORY_IMPL(v1, ConvertLike);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/convolution.hpp"
|
||||
#include "ngraph/op/binary_convolution.hpp"
|
||||
@ -19,7 +19,9 @@
|
||||
#include "intel_gpu/primitives/permute.hpp"
|
||||
#include "intel_gpu/primitives/reorder.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
struct ConvoltuionParameters {
|
||||
cldnn::tensor stride;
|
||||
@ -82,7 +84,7 @@ static void CreateGroupConvolutionOp(Program& p, const std::shared_ptr<ngraph::o
|
||||
params.stride,
|
||||
params.padding,
|
||||
params.dilation,
|
||||
CldnnTensorFromIEDims(outDims),
|
||||
tensor_from_dims(outDims),
|
||||
DataTypeFromPrecision(outPrecision),
|
||||
weights_have_group_dim,
|
||||
op->get_friendly_name());
|
||||
@ -111,7 +113,7 @@ static void CreateConvolutionOp(Program& p, const std::shared_ptr<ngraph::op::v1
|
||||
params.stride,
|
||||
params.padding,
|
||||
params.dilation,
|
||||
CldnnTensorFromIEDims(outDims),
|
||||
tensor_from_dims(outDims),
|
||||
DataTypeFromPrecision(outPrecision),
|
||||
weights_have_group_dim,
|
||||
op->get_friendly_name());
|
||||
@ -168,7 +170,7 @@ static void CreateConvolutionBackpropDataOp(Program& p, const std::shared_ptr<ng
|
||||
params.groups,
|
||||
params.stride,
|
||||
params.padding,
|
||||
CldnnTensorFromIEDims(op->get_output_tensor(0).get_shape()),
|
||||
tensor_from_dims(op->get_output_tensor(0).get_shape()),
|
||||
weights_have_group_dim,
|
||||
op->get_friendly_name());
|
||||
|
||||
@ -225,7 +227,7 @@ static void CreateGroupConvolutionBackpropDataOp(Program& p, const std::shared_p
|
||||
params.groups,
|
||||
params.stride,
|
||||
params.padding,
|
||||
CldnnTensorFromIEDims(op->get_output_tensor(0).get_shape()),
|
||||
tensor_from_dims(op->get_output_tensor(0).get_shape()),
|
||||
weights_have_group_dim,
|
||||
op->get_friendly_name());
|
||||
|
||||
@ -272,7 +274,7 @@ static void DeformableConvolutionImpl(Program& p,
|
||||
params.stride,
|
||||
params.padding,
|
||||
params.dilation,
|
||||
CldnnTensorFromIEDims(outDims),
|
||||
tensor_from_dims(outDims),
|
||||
kernel,
|
||||
bilinearInterpolationPad,
|
||||
op->get_friendly_name());
|
||||
@ -283,7 +285,7 @@ static void DeformableConvolutionImpl(Program& p,
|
||||
weights,
|
||||
{},
|
||||
params.groups,
|
||||
CldnnTensorFromIEDims(outDims),
|
||||
tensor_from_dims(outDims),
|
||||
op->get_friendly_name());
|
||||
p.AddPrimitive(defConvPrim);
|
||||
p.AddPrimitiveToProfiler(defConvLayerNameConv, op);
|
||||
@ -297,7 +299,7 @@ static void DeformableConvolutionImpl(Program& p,
|
||||
params.stride,
|
||||
params.padding,
|
||||
params.dilation,
|
||||
CldnnTensorFromIEDims(outDims),
|
||||
tensor_from_dims(outDims),
|
||||
bilinearInterpolationPad,
|
||||
op->get_friendly_name());
|
||||
|
||||
@ -334,7 +336,7 @@ static void CreateBinaryConvolutionOp(Program& p, const std::shared_ptr<ngraph::
|
||||
params.stride,
|
||||
params.padding,
|
||||
params.dilation,
|
||||
CldnnTensorFromIEDims(outDims),
|
||||
tensor_from_dims(outDims),
|
||||
params.groups,
|
||||
op->get_pad_value(),
|
||||
calc_precision,
|
||||
@ -352,4 +354,6 @@ REGISTER_FACTORY_IMPL(v1, DeformableConvolution);
|
||||
REGISTER_FACTORY_IMPL(v8, DeformableConvolution);
|
||||
REGISTER_FACTORY_IMPL(v1, BinaryConvolution);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/ctc_greedy_decoder.hpp"
|
||||
#include "ngraph/op/ctc_greedy_decoder_seq_len.hpp"
|
||||
@ -15,7 +15,9 @@
|
||||
|
||||
#include "transformations/utils/utils.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static void CreateCommonCTCGreedyDecoderOp(Program& p, const std::shared_ptr<ngraph::Node>& op, bool ctc_merge_repeated) {
|
||||
p.ValidateInputs(op, {2, 3});
|
||||
@ -27,7 +29,7 @@ static void CreateCommonCTCGreedyDecoderOp(Program& p, const std::shared_ptr<ngr
|
||||
for (size_t portIndex = 0; portIndex < inputPrimitives.size(); portIndex++) {
|
||||
auto inputDataType = DataTypeFromPrecision(op->get_input_element_type(portIndex));
|
||||
if (inputDataType == cldnn::data_types::i64) {
|
||||
// clDNN primitive supports only i32 data type for 'sequence_length' and 'blank_index' inputs
|
||||
// GPU primitive supports only i32 data type for 'sequence_length' and 'blank_index' inputs
|
||||
// so we need additional reorder if it's provided as i64
|
||||
auto reorderPrimName = inputPrimitives[portIndex] + "_" + op->get_friendly_name() + Program::m_preProcessTag;
|
||||
auto targetFormat = DefaultFormatForDims(op->get_input_shape(portIndex).size());
|
||||
@ -72,7 +74,7 @@ static void CreateCommonCTCGreedyDecoderOp(Program& p, const std::shared_ptr<ngr
|
||||
cldnn::layout mutableLayout = cldnn::layout(
|
||||
DataTypeFromPrecision(mutable_precision),
|
||||
DefaultFormatForDims(op->get_output_shape(1).size()),
|
||||
CldnnTensorFromIEDims(op->get_output_shape(1)));
|
||||
tensor_from_dims(op->get_output_shape(1)));
|
||||
|
||||
GPU_DEBUG_GET_INSTANCE(debug_config);
|
||||
GPU_DEBUG_IF(debug_config->verbose >= 2) {
|
||||
@ -95,10 +97,10 @@ static void CreateCommonCTCGreedyDecoderOp(Program& p, const std::shared_ptr<ngr
|
||||
reorderedInputs,
|
||||
blank_index,
|
||||
ctc_merge_repeated,
|
||||
CldnnTensorFromIEDims(op->get_output_shape(0)),
|
||||
tensor_from_dims(op->get_output_shape(0)),
|
||||
op->get_friendly_name());
|
||||
|
||||
// clDNN primitive supports only i32 as output data type
|
||||
// GPU primitive supports only i32 as output data type
|
||||
primitive.output_data_type = DataTypeFromPrecision(ngraph::element::i32);
|
||||
|
||||
if (num_output == 2) {
|
||||
@ -131,4 +133,6 @@ static void CreateCTCGreedyDecoderSeqLenOp(Program& p, const std::shared_ptr<ngr
|
||||
REGISTER_FACTORY_IMPL(v0, CTCGreedyDecoder);
|
||||
REGISTER_FACTORY_IMPL(v6, CTCGreedyDecoderSeqLen);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,15 +2,17 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/cum_sum.hpp"
|
||||
#include "ngraph/op/constant.hpp"
|
||||
|
||||
#include "intel_gpu/primitives/cum_sum.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static inline cldnn::cum_sum::cum_sum_axis GetCumSumAxis(int32_t axis, uint32_t rank) {
|
||||
if (axis < 0)
|
||||
@ -18,7 +20,7 @@ static inline cldnn::cum_sum::cum_sum_axis GetCumSumAxis(int32_t axis, uint32_t
|
||||
if (axis < 0 || axis >= rank)
|
||||
IE_THROW() << "CumSum axis is not correspond to number of dimensions";
|
||||
|
||||
// Difference in dimension ordering between IE and clDNN,
|
||||
// Difference in dimension ordering between IE and GPU plugin,
|
||||
// reverse spatial dimensions after batch and feature.
|
||||
uint32_t cldnn_axis = axis;
|
||||
if (axis >= 2) {
|
||||
@ -72,4 +74,6 @@ static void CreateCumSumOp(Program& p, const std::shared_ptr<ngraph::op::v0::Cum
|
||||
|
||||
REGISTER_FACTORY_IMPL(v0, CumSum);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,9 +2,9 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "simple_math.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
#include "intel_gpu/plugin/simple_math.hpp"
|
||||
|
||||
#include "ngraph/attribute_visitor.hpp"
|
||||
#include "ngraph/node.hpp"
|
||||
@ -12,7 +12,9 @@
|
||||
#include "intel_gpu/primitives/custom_gpu_primitive.hpp"
|
||||
#include "intel_gpu/primitives/reorder.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
template<typename T>
|
||||
static inline std::string vecToString(std::vector<T> vec) {
|
||||
@ -100,7 +102,7 @@ protected:
|
||||
std::map<std::string, std::string> m_values;
|
||||
};
|
||||
|
||||
void CreateCustomOp(Program& p, const std::shared_ptr<ngraph::Node>& op, CLDNNCustomLayerPtr customLayer) {
|
||||
void CreateCustomOp(Program& p, const std::shared_ptr<ngraph::Node>& op, CustomLayerPtr customLayer) {
|
||||
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
|
||||
std::string layerName = layer_type_name_ID(op);
|
||||
|
||||
@ -130,7 +132,7 @@ void CreateCustomOp(Program& p, const std::shared_ptr<ngraph::Node>& op, CLDNNCu
|
||||
cldnn::format outputFormat(cldnn::format::any);
|
||||
for (const auto& param : customLayer->KernelParams()) {
|
||||
switch (param.type) {
|
||||
case CLDNNCustomLayer::ParamType::Input: {
|
||||
case CustomLayer::ParamType::Input: {
|
||||
kernelParameters.resize(kernelParameters.size() > size_t(param.paramIndex + 1) ? kernelParameters.size() : size_t(param.paramIndex + 1));
|
||||
kernelParameters[param.paramIndex].type = cldnn::custom_gpu_primitive::arg_input;
|
||||
kernelParameters[param.paramIndex].index =
|
||||
@ -159,7 +161,7 @@ void CreateCustomOp(Program& p, const std::shared_ptr<ngraph::Node>& op, CLDNNCu
|
||||
}
|
||||
break;
|
||||
}
|
||||
case CLDNNCustomLayer::ParamType::Output: {
|
||||
case CustomLayer::ParamType::Output: {
|
||||
kernelParameters.resize(kernelParameters.size() > size_t(param.paramIndex + 1) ? kernelParameters.size() : size_t(param.paramIndex + 1));
|
||||
kernelParameters[param.paramIndex].type = cldnn::custom_gpu_primitive::arg_output;
|
||||
kernelParameters[param.paramIndex].index =
|
||||
@ -255,4 +257,6 @@ void CreateCustomOp(Program& p, const std::shared_ptr<ngraph::Node>& op, CLDNNCu
|
||||
p.primitiveIDs[genericLayerName] = prevLayerName;
|
||||
}
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,14 +2,16 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/depth_to_space.hpp"
|
||||
|
||||
#include "intel_gpu/primitives/depth_to_space.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static cldnn::depth_to_space_mode GetDepthMode(ngraph::op::v0::DepthToSpace::DepthToSpaceMode mode) {
|
||||
switch (mode) {
|
||||
@ -42,4 +44,6 @@ static void CreateDepthToSpaceOp(Program& p, const std::shared_ptr<ngraph::op::v
|
||||
|
||||
REGISTER_FACTORY_IMPL(v0, DepthToSpace);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,14 +2,16 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/detection_output.hpp"
|
||||
|
||||
#include "intel_gpu/primitives/detection_output.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static cldnn::prior_box_code_type PriorBoxCodeFromString(const std::string& str) {
|
||||
static const std::map<std::string, cldnn::prior_box_code_type> CodeNameToType = {
|
||||
@ -84,4 +86,6 @@ static void CreateDetectionOutputOp(Program& p, const std::shared_ptr<ngraph::op
|
||||
|
||||
REGISTER_FACTORY_IMPL(v0, DetectionOutput);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
#include "transformations/utils/utils.hpp"
|
||||
|
||||
#include "ngraph/op/add.hpp"
|
||||
@ -30,7 +30,9 @@
|
||||
#include "intel_gpu/primitives/reorder.hpp"
|
||||
#include "intel_gpu/primitives/reshape.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
void CreateElementwiseOp(Program& p, const std::shared_ptr<ngraph::Node>& op, cldnn::eltwise_mode mode) {
|
||||
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
|
||||
@ -65,7 +67,7 @@ void CreateElementwiseOp(Program& p, const std::shared_ptr<ngraph::Node>& op, cl
|
||||
// Extend input dimensions by prepending ones
|
||||
inputShape.insert(inputShape.begin(), outRank - inputRank, 1ul);
|
||||
|
||||
auto targetShape = CldnnTensorFromIEDims(inputShape);
|
||||
auto targetShape = tensor_from_dims(inputShape);
|
||||
|
||||
auto reshapePrim = cldnn::reshape(reshapeName, inputPrimitives[i], targetShape, op->get_friendly_name());
|
||||
p.AddPrimitive(reshapePrim);
|
||||
@ -194,4 +196,6 @@ REGISTER_FACTORY_IMPL(v1, Power);
|
||||
REGISTER_FACTORY_IMPL(v1, FloorMod);
|
||||
REGISTER_FACTORY_IMPL(v1, Mod);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/embedding_segments_sum.hpp"
|
||||
#include "ngraph/op/embeddingbag_offsets_sum.hpp"
|
||||
@ -14,7 +14,9 @@
|
||||
|
||||
#include "transformations/utils/utils.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static void CreateEmbeddingBagOffsetsSumOp(Program& p, const std::shared_ptr<ngraph::op::v3::EmbeddingBagOffsetsSum>& op) {
|
||||
p.ValidateInputs(op, {3, 4, 5});
|
||||
@ -42,7 +44,7 @@ static void CreateEmbeddingBagOffsetsSumOp(Program& p, const std::shared_ptr<ngr
|
||||
for (size_t portIndex = 0; portIndex < inputPrimitives.size(); portIndex++) {
|
||||
auto inputDataType = DataTypeFromPrecision(op->get_input_element_type(portIndex));
|
||||
if (((portIndex == 1) || (portIndex == 2)) && (inputDataType == cldnn::data_types::i64)) {
|
||||
// clDNN primitive supports only i32 data type for indices inputs,
|
||||
// GPU primitive supports only i32 data type for indices inputs,
|
||||
// so we need additional reorders if they are provided as i64
|
||||
auto reorderPrimName = inputPrimitives[portIndex] + "_" + op->get_friendly_name() + Program::m_preProcessTag;
|
||||
auto targetFormat = DefaultFormatForDims(op->get_input_shape(portIndex).size());
|
||||
@ -64,7 +66,7 @@ static void CreateEmbeddingBagOffsetsSumOp(Program& p, const std::shared_ptr<ngr
|
||||
auto embeddingBagPrim = cldnn::embedding_bag(layerName,
|
||||
reorderedInputs,
|
||||
cldnn::embedding_bag::offsets_sum,
|
||||
CldnnTensorFromIEDims(op->get_output_shape(0)),
|
||||
tensor_from_dims(op->get_output_shape(0)),
|
||||
defaultIndex,
|
||||
op->get_friendly_name());
|
||||
|
||||
@ -83,7 +85,7 @@ static void CreateEmbeddingBagPackedSumOp(Program& p, const std::shared_ptr<ngra
|
||||
for (size_t portIndex = 0; portIndex < inputPrimitives.size(); portIndex++) {
|
||||
auto inputDataType = DataTypeFromPrecision(op->get_input_element_type(portIndex));
|
||||
if ((portIndex == 1) && (inputDataType == cldnn::data_types::i64)) {
|
||||
// clDNN primitive supports only i32 data type for indices input,
|
||||
// GPU primitive supports only i32 data type for indices input,
|
||||
// so we need additional reorder if it's provided as i64
|
||||
auto reorderPrimName = inputPrimitives[portIndex] + "_" + op->get_friendly_name() + Program::m_preProcessTag;
|
||||
auto targetFormat = DefaultFormatForDims(op->get_input_shape(portIndex).size());
|
||||
@ -105,7 +107,7 @@ static void CreateEmbeddingBagPackedSumOp(Program& p, const std::shared_ptr<ngra
|
||||
auto embeddingBagPrim = cldnn::embedding_bag(layerName,
|
||||
reorderedInputs,
|
||||
cldnn::embedding_bag::packed_sum,
|
||||
CldnnTensorFromIEDims(op->get_output_shape(0)),
|
||||
tensor_from_dims(op->get_output_shape(0)),
|
||||
-1,
|
||||
op->get_friendly_name());
|
||||
|
||||
@ -142,7 +144,7 @@ static void CreateEmbeddingSegmentsSumOp(Program& p, const std::shared_ptr<ngrap
|
||||
for (size_t portIndex = 0; portIndex < inputPrimitives.size(); portIndex++) {
|
||||
auto inputDataType = DataTypeFromPrecision(op->get_input_element_type(portIndex));
|
||||
if (((portIndex == 1) || (portIndex == 2)) && (inputDataType == cldnn::data_types::i64)) {
|
||||
// clDNN primitive supports only i32 data type for indices inputs,
|
||||
// GPU primitive supports only i32 data type for indices inputs,
|
||||
// so we need additional reorders if they are provided as i64
|
||||
auto reorderPrimName = inputPrimitives[portIndex] + "_" + op->get_friendly_name() + Program::m_preProcessTag;
|
||||
auto targetFormat = DefaultFormatForDims(op->get_input_shape(portIndex).size());
|
||||
@ -164,7 +166,7 @@ static void CreateEmbeddingSegmentsSumOp(Program& p, const std::shared_ptr<ngrap
|
||||
auto embeddingBagPrim = cldnn::embedding_bag(layerName,
|
||||
reorderedInputs,
|
||||
cldnn::embedding_bag::segments_sum,
|
||||
CldnnTensorFromIEDims(op->get_output_shape(0)),
|
||||
tensor_from_dims(op->get_output_shape(0)),
|
||||
defaultIndex,
|
||||
op->get_friendly_name());
|
||||
|
||||
@ -176,4 +178,6 @@ REGISTER_FACTORY_IMPL(v3, EmbeddingBagOffsetsSum);
|
||||
REGISTER_FACTORY_IMPL(v3, EmbeddingBagPackedSum);
|
||||
REGISTER_FACTORY_IMPL(v3, EmbeddingSegmentsSum);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,15 +2,17 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/experimental_detectron_roi_feature.hpp"
|
||||
|
||||
#include "intel_gpu/primitives/mutable_data.hpp"
|
||||
#include "intel_gpu/primitives/experimental_detectron_roi_feature_extractor.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static void CreateExperimentalDetectronROIFeatureExtractorOp(Program& p, const std::shared_ptr<ngraph::op::v6::ExperimentalDetectronROIFeatureExtractor>& op) {
|
||||
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
|
||||
@ -19,7 +21,7 @@ static void CreateExperimentalDetectronROIFeatureExtractorOp(Program& p, const s
|
||||
cldnn::layout mutableLayout = cldnn::layout(
|
||||
DataTypeFromPrecision(op->get_output_element_type(1)),
|
||||
DefaultFormatForDims(op->get_output_shape(1).size()),
|
||||
CldnnTensorFromIEDims(op->get_output_shape(1)));
|
||||
tensor_from_dims(op->get_output_shape(1)));
|
||||
|
||||
cldnn::memory::ptr shared_memory {p.GetEngine().allocate_memory(mutableLayout)};
|
||||
|
||||
@ -54,4 +56,6 @@ static void CreateExperimentalDetectronROIFeatureExtractorOp(Program& p, const s
|
||||
|
||||
REGISTER_FACTORY_IMPL(v6, ExperimentalDetectronROIFeatureExtractor);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,14 +2,16 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/extractimagepatches.hpp"
|
||||
|
||||
#include "intel_gpu/primitives/extract_image_patches.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static inline std::string PadToString(ngraph::op::PadType pad) {
|
||||
switch (pad) {
|
||||
@ -38,7 +40,7 @@ static void CreateExtractImagePatchesOp(Program& p, const std::shared_ptr<ngraph
|
||||
strides,
|
||||
rates,
|
||||
auto_pad,
|
||||
CldnnTensorFromIEDims(op->get_output_shape(0)),
|
||||
tensor_from_dims(op->get_output_shape(0)),
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(extractImagePatchesPrim);
|
||||
@ -47,4 +49,6 @@ static void CreateExtractImagePatchesOp(Program& p, const std::shared_ptr<ngraph
|
||||
|
||||
REGISTER_FACTORY_IMPL(v3, ExtractImagePatches);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,14 +2,16 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/fake_quantize.hpp"
|
||||
|
||||
#include "intel_gpu/primitives/quantize.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static void CreateFakeQuantizeOp(Program& p, const std::shared_ptr<ngraph::op::v0::FakeQuantize>& op) {
|
||||
p.ValidateInputs(op, {5});
|
||||
@ -40,4 +42,6 @@ static void CreateFakeQuantizeOp(Program& p, const std::shared_ptr<ngraph::op::v
|
||||
|
||||
REGISTER_FACTORY_IMPL(v0, FakeQuantize);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,15 +2,17 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/gather_tree.hpp"
|
||||
|
||||
#include "intel_gpu/primitives/gather_tree.hpp"
|
||||
#include "intel_gpu/primitives/reorder.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static void CreateGatherTreeOp(Program& p, const std::shared_ptr<ngraph::op::v1::GatherTree>& op) {
|
||||
p.ValidateInputs(op, {4});
|
||||
@ -23,7 +25,7 @@ static void CreateGatherTreeOp(Program& p, const std::shared_ptr<ngraph::op::v1:
|
||||
for (size_t portIndex = 0; portIndex < inputPrimitives.size(); portIndex++) {
|
||||
auto inputDataType = DataTypeFromPrecision(op->get_input_element_type(portIndex));
|
||||
if (inputDataType == cldnn::data_types::i64) {
|
||||
// clDNN primitive does not support i64 inputs,
|
||||
// GPU primitive does not support i64 inputs,
|
||||
// so we need additional reorders to convert them to i32
|
||||
auto reorderPrimName = inputPrimitives[portIndex] + "_" + op->get_friendly_name() + Program::m_preProcessTag;
|
||||
auto targetFormat = DefaultFormatForDims(op->get_input_shape(portIndex).size());
|
||||
@ -55,4 +57,6 @@ static void CreateGatherTreeOp(Program& p, const std::shared_ptr<ngraph::op::v1:
|
||||
|
||||
REGISTER_FACTORY_IMPL(v1, GatherTree);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,15 +2,17 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/gather.hpp"
|
||||
|
||||
#include "intel_gpu/primitives/gather.hpp"
|
||||
#include "intel_gpu/primitives/reorder.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static cldnn::gather::gather_axis GetGatherAxis(int32_t axis, cldnn::format inputFormat) {
|
||||
if (axis == 0) {
|
||||
@ -70,7 +72,7 @@ void CreateGatherOpBase(Program& p, const std::shared_ptr<T>& op, const int64_t
|
||||
for (size_t portIndex = 0; portIndex < inputPrimitives.size(); portIndex++) {
|
||||
auto inputDataType = DataTypeFromPrecision(op->get_input_element_type(portIndex));
|
||||
if (inputDataType == cldnn::data_types::i64) {
|
||||
// clDNN primitive does not support i64 inputs,
|
||||
// GPU primitive does not support i64 inputs,
|
||||
// so we need additional reorders to convert them to i32
|
||||
auto reorderPrimName = inputPrimitives[portIndex] + "_" + op->get_friendly_name() + Program::m_preProcessTag;
|
||||
auto targetFormat = DefaultFormatForDims(op->get_input_shape(portIndex).size());
|
||||
@ -95,7 +97,7 @@ void CreateGatherOpBase(Program& p, const std::shared_ptr<T>& op, const int64_t
|
||||
reorderedInputs[1],
|
||||
GetGatherAxis(axis, DefaultFormatForDims(op->get_input_shape(0).size())),
|
||||
outLayout,
|
||||
CldnnTensorFromIEDims(op->get_output_shape(0)),
|
||||
tensor_from_dims(op->get_output_shape(0)),
|
||||
batch_dim,
|
||||
support_neg_ind,
|
||||
op->get_friendly_name());
|
||||
@ -125,4 +127,6 @@ static void CreateGatherOp(Program& p, const std::shared_ptr<ngraph::op::v8::Gat
|
||||
|
||||
REGISTER_FACTORY_IMPL(v8, Gather);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,15 +2,17 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/gather_elements.hpp"
|
||||
#include "ngraph/op/constant.hpp"
|
||||
|
||||
#include "intel_gpu/primitives/gather_elements.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static cldnn::gather_elements::gather_elements_axis GetGatherAxis(int axis, unsigned rank) {
|
||||
if (axis < 0)
|
||||
@ -18,7 +20,7 @@ static cldnn::gather_elements::gather_elements_axis GetGatherAxis(int axis, unsi
|
||||
if (axis < 0 || axis >= rank)
|
||||
IE_THROW() << "GatherElements axis is not correspond to number of dimensions";
|
||||
|
||||
// Difference in dimension ordering between IE and clDNN,
|
||||
// Difference in dimension ordering between IE and GPU plugin,
|
||||
// reverse spatial dimensions after batch and feature.
|
||||
unsigned cldnn_axis = axis;
|
||||
if (axis >= 2) {
|
||||
@ -54,7 +56,7 @@ static void CreateGatherElementsOp(Program& p, const std::shared_ptr<ngraph::op:
|
||||
inputPrimitives[0],
|
||||
inputPrimitives[1],
|
||||
outLayout,
|
||||
CldnnTensorFromIEDims(op->get_output_shape(0)),
|
||||
tensor_from_dims(op->get_output_shape(0)),
|
||||
GetGatherAxis(axis, rank),
|
||||
op->get_friendly_name());
|
||||
|
||||
@ -64,4 +66,6 @@ static void CreateGatherElementsOp(Program& p, const std::shared_ptr<ngraph::op:
|
||||
|
||||
REGISTER_FACTORY_IMPL(v6, GatherElements);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,15 +2,17 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/gather_nd.hpp"
|
||||
#include "ngraph/op/constant.hpp"
|
||||
|
||||
#include "intel_gpu/primitives/gather_nd.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static void CreateGatherNDOp(Program& p, const std::shared_ptr<ngraph::op::v5::GatherND>& op) {
|
||||
p.ValidateInputs(op, {2});
|
||||
@ -62,4 +64,6 @@ static void CreateGatherNDOp(Program& p, const std::shared_ptr<ngraph::op::v8::G
|
||||
|
||||
REGISTER_FACTORY_IMPL(v8, GatherND);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,14 +2,16 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/grn.hpp"
|
||||
|
||||
#include "intel_gpu/primitives/grn.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static void CreateGRNOp(Program& p, const std::shared_ptr<ngraph::op::v0::GRN>& op) {
|
||||
p.ValidateInputs(op, {1});
|
||||
@ -28,4 +30,6 @@ static void CreateGRNOp(Program& p, const std::shared_ptr<ngraph::op::v0::GRN>&
|
||||
|
||||
REGISTER_FACTORY_IMPL(v0, GRN);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
#include "caseless.hpp"
|
||||
|
||||
#include "ngraph/op/interpolate.hpp"
|
||||
@ -11,7 +11,9 @@
|
||||
|
||||
#include "intel_gpu/primitives/resample.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static cldnn::coordinate_transformation_mode GetCoordinateTransformationMode(ngraph::op::v4::Interpolate::CoordinateTransformMode mode) {
|
||||
switch (mode) {
|
||||
@ -71,7 +73,7 @@ static cldnn::resample::resample_axis GetInterpolationAxis(int32_t axis, uint32_
|
||||
if (axis < 0 || axis >= sz)
|
||||
IE_THROW() << "Interpolate axis is not correspond to number of dimensions";
|
||||
|
||||
// Difference in dimension ordering between IE and clDNN,
|
||||
// Difference in dimension ordering between IE and GPU plugin,
|
||||
// reverse spatial dimensions after batch and feature.
|
||||
uint32_t cldnn_axis = axis;
|
||||
if (axis >= 2) {
|
||||
@ -111,7 +113,7 @@ static void CreateInterpolateOp(Program& p, const std::shared_ptr<ngraph::op::v4
|
||||
auto attrs = op->get_attrs();
|
||||
auto inputRank = op->get_input_shape(0).size();
|
||||
auto outDims = op->get_output_shape(0).size();
|
||||
auto outTensor = CldnnTensorFromIEDims(op->get_output_shape(0));
|
||||
auto outTensor = tensor_from_dims(op->get_output_shape(0));
|
||||
|
||||
std::vector<int> pad_begin(attrs.pads_begin.begin(), attrs.pads_begin.end());
|
||||
std::vector<int> pad_end(attrs.pads_end.begin(), attrs.pads_end.end());
|
||||
@ -202,4 +204,6 @@ static void CreateInterpolateOp(Program& p, const std::shared_ptr<ngraph::op::v4
|
||||
|
||||
REGISTER_FACTORY_IMPL(v4, Interpolate);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -3,9 +3,9 @@
|
||||
//
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "cldnn_engine.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
#include "intel_gpu/plugin/plugin.hpp"
|
||||
|
||||
#include <cpp/ie_cnn_network.h>
|
||||
|
||||
@ -26,7 +26,9 @@
|
||||
|
||||
using Loop = ngraph::op::v5::Loop;
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
template<class DATA_TYPE>
|
||||
static DATA_TYPE CreateScalarData(Program &p, const cldnn::primitive_id& id, int64_t num, const cldnn::primitive_id& ext_prim_id) {
|
||||
@ -41,7 +43,7 @@ static cldnn::mutable_data CreateAdditionalOutputData(Program &p, const std::sha
|
||||
const int32_t output_idx) {
|
||||
const auto precision = DataTypeFromPrecision(op->get_output_element_type(output_idx));
|
||||
const auto format = DefaultFormatForDims(op->get_output_shape(output_idx).size());
|
||||
const auto tensor = CldnnTensorFromIEDims(op->get_output_shape(output_idx));
|
||||
const auto tensor = tensor_from_dims(op->get_output_shape(output_idx));
|
||||
cldnn::layout output_layout = cldnn::layout(precision, format, tensor);
|
||||
auto mem = p.GetEngine().allocate_memory(output_layout);
|
||||
auto md = cldnn::mutable_data(id, {input}, mem, op->get_friendly_name()); // cldnn::data cannot set dependency
|
||||
@ -224,4 +226,6 @@ static void CreateLoopOp(Program& p, const std::shared_ptr<Loop>& op) {
|
||||
|
||||
REGISTER_FACTORY_IMPL(v5, Loop);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,15 +2,17 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/lrn.hpp"
|
||||
#include "ngraph/op/constant.hpp"
|
||||
|
||||
#include "intel_gpu/primitives/lrn.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static cldnn::lrn_norm_region GetNormRegion(std::vector<int64_t> axis_value) {
|
||||
if (axis_value.size() == 1 && axis_value[0] == 1) {
|
||||
@ -47,4 +49,6 @@ static void CreateLRNOp(Program& p, const std::shared_ptr<ngraph::op::v0::LRN>&
|
||||
|
||||
REGISTER_FACTORY_IMPL(v0, LRN);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/matmul.hpp"
|
||||
#include "ngraph/op/constant.hpp"
|
||||
@ -15,7 +15,9 @@
|
||||
#include "intel_gpu/primitives/reorder.hpp"
|
||||
#include "intel_gpu/primitives/permute.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
/*
|
||||
* get_aligned_shapes function align two input shapes to have the same size and
|
||||
@ -128,7 +130,7 @@ static void CreateMatMulOp(Program& p, const std::shared_ptr<ngraph::op::v0::Mat
|
||||
auto reshapeInName = op->get_friendly_name() + suffix;
|
||||
auto reshapeInPrim = cldnn::reshape(reshapeInName,
|
||||
inputName,
|
||||
CldnnTensorFromIEDims(reshapeSize),
|
||||
tensor_from_dims(reshapeSize),
|
||||
op->get_friendly_name());
|
||||
p.AddPrimitive(reshapeInPrim);
|
||||
p.AddInnerPrimitiveToProfiler(reshapeInName, layerName, op);
|
||||
@ -157,7 +159,7 @@ static void CreateMatMulOp(Program& p, const std::shared_ptr<ngraph::op::v0::Mat
|
||||
|
||||
auto lastLayerName = layerName;
|
||||
if (reshape_fc) {
|
||||
auto outputShape = CldnnTensorFromIEDims(op->get_output_shape(0));
|
||||
auto outputShape = tensor_from_dims(op->get_output_shape(0));
|
||||
auto outReshapeName = layerName + "_cldnn_out_reshape";
|
||||
auto outReshapePrim = cldnn::reshape(outReshapeName, layerName, outputShape, op->get_friendly_name());
|
||||
|
||||
@ -269,7 +271,7 @@ static void CreateMatMulOp(Program& p, const std::shared_ptr<ngraph::op::v0::Mat
|
||||
|
||||
// Reshape output if gemm specific shape does not match default one
|
||||
if (outDimsN < 4) {
|
||||
auto outputShape = CldnnTensorFromIEDims(outDims);
|
||||
auto outputShape = tensor_from_dims(outDims);
|
||||
auto outReshapeName = layerName + "_cldnn_out_reshape";
|
||||
auto outReshapePrim = cldnn::reshape(outReshapeName, layerName, outputShape, op->get_friendly_name());
|
||||
|
||||
@ -285,4 +287,6 @@ static void CreateMatMulOp(Program& p, const std::shared_ptr<ngraph::op::v0::Mat
|
||||
|
||||
REGISTER_FACTORY_IMPL(v0, MatMul);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/mvn.hpp"
|
||||
#include "ngraph/op/constant.hpp"
|
||||
@ -12,7 +12,9 @@
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static void CreateCommonMVNOp(Program& p, const std::shared_ptr<ngraph::Node>& op,
|
||||
bool across_channels, bool normalize_variance, float eps, bool eps_inside_sqrt = true) {
|
||||
@ -65,4 +67,6 @@ static void CreateMVNOp(Program& p, const std::shared_ptr<ngraph::op::v6::MVN>&
|
||||
REGISTER_FACTORY_IMPL(v0, MVN);
|
||||
REGISTER_FACTORY_IMPL(v6, MVN);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/non_max_suppression.hpp"
|
||||
#include <ngraph/opsets/opset3.hpp>
|
||||
@ -14,7 +14,9 @@
|
||||
#include "intel_gpu/primitives/non_max_suppression.hpp"
|
||||
#include "intel_gpu/runtime/debug_configuration.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static bool GetCenterPointBox(ngraph::op::v5::NonMaxSuppression::BoxEncodingType encoding) {
|
||||
switch (encoding) {
|
||||
@ -35,7 +37,7 @@ static void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_pt
|
||||
for (size_t portIndex = 0; portIndex < inputPrimitives.size(); portIndex++) {
|
||||
auto inputDataType = DataTypeFromPrecision(op->get_input_element_type(portIndex));
|
||||
if ((portIndex == 2) && (inputDataType == cldnn::data_types::i64)) {
|
||||
// clDNN primitive supports only i32 data type for 'max_output_boxes_per_class' input
|
||||
// GPU primitive supports only i32 data type for 'max_output_boxes_per_class' input
|
||||
// so we need additional reorder if it's provided as i64
|
||||
auto reorderPrimName = inputPrimitives[portIndex] + "_" + op->get_friendly_name() + Program::m_preProcessTag;
|
||||
auto targetFormat = DefaultFormatForDims(op->get_input_shape(portIndex).size());
|
||||
@ -54,7 +56,7 @@ static void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_pt
|
||||
}
|
||||
}
|
||||
|
||||
// clDNN primitive supports only i32 as output data type
|
||||
// GPU primitive supports only i32 as output data type
|
||||
auto out_type = op->get_output_element_type(0);
|
||||
if (out_type == ngraph::element::i64) {
|
||||
out_type = ngraph::element::i32;
|
||||
@ -77,7 +79,7 @@ static void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_pt
|
||||
cldnn::layout mutableLayoutSecond = cldnn::layout(
|
||||
DataTypeFromPrecision(mutable_precision_second),
|
||||
DefaultFormatForDims(op->get_output_shape(2).size()),
|
||||
CldnnTensorFromIEDims(op->get_output_shape(2)));
|
||||
tensor_from_dims(op->get_output_shape(2)));
|
||||
|
||||
GPU_DEBUG_IF(debug_config->verbose >= 2) {
|
||||
GPU_DEBUG_COUT << "[" << layer_type_name_ID(op) << ": mutable data]" << std::endl;
|
||||
@ -175,4 +177,6 @@ static void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_pt
|
||||
|
||||
REGISTER_FACTORY_IMPL(internal, NonMaxSuppressionIEInternal);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/normalize_l2.hpp"
|
||||
#include "ngraph/op/constant.hpp"
|
||||
@ -11,7 +11,9 @@
|
||||
#include "intel_gpu/primitives/normalize.hpp"
|
||||
#include "intel_gpu/primitives/data.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static void CreateNormalizeL2Op(Program& p, const std::shared_ptr<ngraph::op::v0::NormalizeL2>& op) {
|
||||
p.ValidateInputs(op, {2});
|
||||
@ -61,4 +63,6 @@ static void CreateNormalizeL2Op(Program& p, const std::shared_ptr<ngraph::op::v0
|
||||
|
||||
REGISTER_FACTORY_IMPL(v0, NormalizeL2);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,15 +2,17 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
#include "transformations/utils/utils.hpp"
|
||||
|
||||
#include "ngraph/op/one_hot.hpp"
|
||||
|
||||
#include "intel_gpu/primitives/one_hot.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static void CreateOneHotOp(Program& p, const std::shared_ptr<ngraph::op::v1::OneHot>& op) {
|
||||
p.ValidateInputs(op, {4});
|
||||
@ -49,7 +51,7 @@ static void CreateOneHotOp(Program& p, const std::shared_ptr<ngraph::op::v1::One
|
||||
|
||||
auto oneHotPrim = cldnn::one_hot(layerName,
|
||||
inputPrimitives[0],
|
||||
CldnnTensorFromIEDims(op->get_output_shape(0)),
|
||||
tensor_from_dims(op->get_output_shape(0)),
|
||||
DataTypeFromPrecision(op->get_output_element_type(0)),
|
||||
static_cast<uint16_t>(axis),
|
||||
on_value,
|
||||
@ -62,4 +64,6 @@ static void CreateOneHotOp(Program& p, const std::shared_ptr<ngraph::op::v1::One
|
||||
|
||||
REGISTER_FACTORY_IMPL(v1, OneHot);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,15 +2,17 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
#include "transformations/utils/utils.hpp"
|
||||
|
||||
#include "ngraph/op/pad.hpp"
|
||||
|
||||
#include "intel_gpu/primitives/border.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static cldnn::border_type GetBorderType(ngraph::op::PadMode mode) {
|
||||
switch (mode) {
|
||||
@ -73,4 +75,6 @@ static void CreatePadOp(Program& p, const std::shared_ptr<ngraph::op::v1::Pad>&
|
||||
|
||||
REGISTER_FACTORY_IMPL(v1, Pad);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/parameter.hpp"
|
||||
|
||||
@ -14,7 +14,9 @@
|
||||
|
||||
using namespace InferenceEngine;
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static void CreateParameterOp(Program& p, const std::shared_ptr<ngraph::op::v0::Parameter>& op) {
|
||||
auto networkInputs = p.GetNetworkInputs();
|
||||
@ -26,11 +28,11 @@ static void CreateParameterOp(Program& p, const std::shared_ptr<ngraph::op::v0::
|
||||
// first create and add the input layout
|
||||
const auto inputDesc = inputInfo->getTensorDesc();
|
||||
const auto inputDims = inputDesc.getDims();
|
||||
Layout l = inputDesc.getLayout();
|
||||
Precision ip = inputDesc.getPrecision();
|
||||
InferenceEngine::Layout l = inputDesc.getLayout();
|
||||
InferenceEngine::Precision ip = inputDesc.getPrecision();
|
||||
|
||||
cldnn::format inputFormat = cldnn::format::bfyx;
|
||||
if (Layout::BLOCKED == l && 6 == inputDims.size()) {
|
||||
if (InferenceEngine::Layout::BLOCKED == l && 6 == inputDims.size()) {
|
||||
inputFormat = cldnn::format::bfwzyx;
|
||||
} else {
|
||||
inputFormat = FormatFromLayout(l);
|
||||
@ -46,7 +48,7 @@ static void CreateParameterOp(Program& p, const std::shared_ptr<ngraph::op::v0::
|
||||
cldnn::spatial(inputDims[5], inputDims[4], inputDims[3], inputDims[2]));
|
||||
break;
|
||||
case 5:
|
||||
if (Layout::NCDHW == l) {
|
||||
if (InferenceEngine::Layout::NCDHW == l) {
|
||||
dataTensor = cldnn::tensor(cldnn::batch(batch),
|
||||
cldnn::feature(inputDims[1]),
|
||||
cldnn::spatial(inputDims[4], inputDims[3], inputDims[2]));
|
||||
@ -55,10 +57,10 @@ static void CreateParameterOp(Program& p, const std::shared_ptr<ngraph::op::v0::
|
||||
}
|
||||
break;
|
||||
case 4:
|
||||
if (Layout::NCHW == l || Layout::CHW == l) {
|
||||
if (InferenceEngine::Layout::NCHW == l || InferenceEngine::Layout::CHW == l) {
|
||||
dataTensor = cldnn::tensor(batch,
|
||||
TensorValue(inputDims[1]), TensorValue(inputDims[3]), TensorValue(inputDims[2]));
|
||||
} else if (Layout::NHWC == l) {
|
||||
} else if (InferenceEngine::Layout::NHWC == l) {
|
||||
dataTensor = cldnn::tensor(batch,
|
||||
TensorValue(inputDims[1]), TensorValue(inputDims[3]), TensorValue(inputDims[2]));
|
||||
} else {
|
||||
@ -66,14 +68,14 @@ static void CreateParameterOp(Program& p, const std::shared_ptr<ngraph::op::v0::
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
if (Layout::CHW == l) {
|
||||
if (InferenceEngine::Layout::CHW == l) {
|
||||
dataTensor = cldnn::tensor(TensorValue(inputDims[0]), TensorValue(inputDims[1]), 1, TensorValue(inputDims[2]));
|
||||
} else {
|
||||
IE_THROW() << "Unsupported layout (" << l << ") in 3D input " + inputInfo->name();
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
if (Layout::NCHW == l || NC == l) {
|
||||
if (InferenceEngine::Layout::NCHW == l || NC == l) {
|
||||
dataTensor = cldnn::tensor(batch, TensorValue(inputDims[1]), 1, 1);
|
||||
} else {
|
||||
IE_THROW() << "Unsupported layout (" << l << ") in 2D input " << inputInfo->name();
|
||||
@ -177,8 +179,8 @@ static void CreateParameterOp(Program& p, const std::shared_ptr<ngraph::op::v0::
|
||||
if (ColorFormat::NV12 == preProcess.getColorFormat() && p.GetConfig().nv12_two_inputs) {
|
||||
// for NV12, create two input layouts with reorder instead of one,
|
||||
// and then would expect compound blob in inferRequest
|
||||
if (Layout::NCHW != l &&
|
||||
(Precision::I8 != ip || Precision::U8 != ip)) {
|
||||
if (InferenceEngine::Layout::NCHW != l &&
|
||||
(InferenceEngine::Precision::I8 != ip || InferenceEngine::Precision::U8 != ip)) {
|
||||
IE_THROW() << "Unsupported layout (" << l << ") or precision "
|
||||
<< ip.name() << ") for NV12 input " + inputInfo->name();
|
||||
}
|
||||
@ -280,4 +282,6 @@ static void CreateParameterOp(Program& p, const std::shared_ptr<ngraph::op::v0::
|
||||
|
||||
REGISTER_FACTORY_IMPL(v0, Parameter);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,15 +2,17 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/max_pool.hpp"
|
||||
#include "ngraph/op/avg_pool.hpp"
|
||||
|
||||
#include "intel_gpu/primitives/pooling.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
struct PoolingParameters {
|
||||
cldnn::tensor kernel;
|
||||
@ -69,7 +71,7 @@ static void CreateAvgPoolOp(Program& p, const std::shared_ptr<ngraph::op::v1::Av
|
||||
params.kernel,
|
||||
params.stride,
|
||||
params.pad_begin,
|
||||
CldnnTensorFromIEDims(op->get_output_shape(0)),
|
||||
tensor_from_dims(op->get_output_shape(0)),
|
||||
DataTypeFromPrecision(op->get_output_element_type(0)),
|
||||
op->get_friendly_name());
|
||||
poolPrim.pad_end = params.pad_end;
|
||||
@ -89,7 +91,7 @@ static void CreateMaxPoolOp(Program& p, const std::shared_ptr<ngraph::op::v1::Ma
|
||||
params.kernel,
|
||||
params.stride,
|
||||
params.pad_begin,
|
||||
CldnnTensorFromIEDims(op->get_output_shape(0)),
|
||||
tensor_from_dims(op->get_output_shape(0)),
|
||||
DataTypeFromPrecision(op->get_output_element_type(0)),
|
||||
op->get_friendly_name());
|
||||
poolPrim.pad_end = params.pad_end;
|
||||
@ -100,4 +102,6 @@ static void CreateMaxPoolOp(Program& p, const std::shared_ptr<ngraph::op::v1::Ma
|
||||
REGISTER_FACTORY_IMPL(v1, MaxPool);
|
||||
REGISTER_FACTORY_IMPL(v1, AvgPool);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,15 +2,17 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/prior_box.hpp"
|
||||
#include "ngraph/op/prior_box_clustered.hpp"
|
||||
|
||||
#include "intel_gpu/primitives/prior_box.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static void CreatePriorBoxClusteredOp(Program& p, const std::shared_ptr<ngraph::op::v0::PriorBoxClustered>& op) {
|
||||
p.ValidateInputs(op, {2});
|
||||
@ -114,4 +116,6 @@ static void CreatePriorBoxOp(Program& p, const std::shared_ptr<ngraph::op::v0::P
|
||||
REGISTER_FACTORY_IMPL(v0, PriorBoxClustered);
|
||||
REGISTER_FACTORY_IMPL(v0, PriorBox);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/proposal.hpp"
|
||||
|
||||
@ -11,7 +11,9 @@
|
||||
#include "intel_gpu/primitives/mutable_data.hpp"
|
||||
#include "intel_gpu/runtime/debug_configuration.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static void CreateProposalOp(Program& p, const std::shared_ptr<ngraph::op::v0::Proposal>& op) {
|
||||
p.ValidateInputs(op, {3});
|
||||
@ -61,7 +63,7 @@ static void CreateProposalOp(Program& p, const std::shared_ptr<ngraph::op::v0::P
|
||||
|
||||
cldnn::layout mutableLayout = cldnn::layout(DataTypeFromPrecision(mutable_precision),
|
||||
DefaultFormatForDims(op->get_output_shape(1).size()),
|
||||
CldnnTensorFromIEDims(op->get_output_shape(1)));
|
||||
tensor_from_dims(op->get_output_shape(1)));
|
||||
|
||||
GPU_DEBUG_GET_INSTANCE(debug_config);
|
||||
GPU_DEBUG_IF(debug_config->verbose >= 2) {
|
||||
@ -153,4 +155,6 @@ static void CreateProposalOp(Program& p, const std::shared_ptr<ngraph::op::v0::P
|
||||
REGISTER_FACTORY_IMPL(v0, Proposal);
|
||||
REGISTER_FACTORY_IMPL(v4, Proposal);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/reduce_sum.hpp"
|
||||
#include "ngraph/op/reduce_prod.hpp"
|
||||
@ -20,7 +20,9 @@
|
||||
#include "intel_gpu/primitives/reorder.hpp"
|
||||
#include "intel_gpu/primitives/reshape.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static void CreateReduceOp(Program& p, const std::shared_ptr<ngraph::Node>& op, cldnn::reduce_mode mode, bool keep_dims) {
|
||||
p.ValidateInputs(op, {2});
|
||||
@ -173,4 +175,6 @@ REGISTER_FACTORY_IMPL(v1, ReduceSum);
|
||||
REGISTER_FACTORY_IMPL(v4, ReduceL1);
|
||||
REGISTER_FACTORY_IMPL(v4, ReduceL2);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,14 +2,16 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/region_yolo.hpp"
|
||||
|
||||
#include "intel_gpu/primitives/region_yolo.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static void CreateRegionYoloOp(Program& p, const std::shared_ptr<ngraph::op::v0::RegionYolo>& op) {
|
||||
p.ValidateInputs(op, {1});
|
||||
@ -37,4 +39,6 @@ static void CreateRegionYoloOp(Program& p, const std::shared_ptr<ngraph::op::v0:
|
||||
|
||||
REGISTER_FACTORY_IMPL(v0, RegionYolo);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,14 +2,16 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/reorg_yolo.hpp"
|
||||
|
||||
#include "intel_gpu/primitives/reorg_yolo.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static void CreateReorgYoloOp(Program& p, const std::shared_ptr<ngraph::op::v0::ReorgYolo>& op) {
|
||||
p.ValidateInputs(op, {1});
|
||||
@ -29,4 +31,6 @@ static void CreateReorgYoloOp(Program& p, const std::shared_ptr<ngraph::op::v0::
|
||||
|
||||
REGISTER_FACTORY_IMPL(v0, ReorgYolo);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/reshape.hpp"
|
||||
#include "ngraph/op/squeeze.hpp"
|
||||
@ -12,7 +12,9 @@
|
||||
#include "intel_gpu/primitives/reshape.hpp"
|
||||
#include "intel_gpu/primitives/reorder.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static void CreateCommonReshapeOp(Program& p, const std::shared_ptr<ngraph::Node>& op) {
|
||||
p.ValidateInputs(op, {1, 2});
|
||||
@ -21,7 +23,7 @@ static void CreateCommonReshapeOp(Program& p, const std::shared_ptr<ngraph::Node
|
||||
|
||||
auto inDims = op->get_input_shape(0);
|
||||
auto outDims = op->get_output_shape(0);
|
||||
auto outTensor = CldnnTensorFromIEDims(outDims);
|
||||
auto outTensor = tensor_from_dims(outDims);
|
||||
|
||||
// if we convert from or to 5D/6D, additional reorder also required to change format
|
||||
cldnn::primitive_id reshapeInputId = inputPrimitives[0];
|
||||
@ -74,4 +76,6 @@ REGISTER_FACTORY_IMPL(v1, Reshape);
|
||||
REGISTER_FACTORY_IMPL(v0, Squeeze);
|
||||
REGISTER_FACTORY_IMPL(v0, Unsqueeze);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/result.hpp"
|
||||
|
||||
@ -11,7 +11,9 @@
|
||||
|
||||
using namespace InferenceEngine;
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static void CreateResultOp(Program& p, const std::shared_ptr<ngraph::op::v0::Result>& op) {
|
||||
OutputsDataMap networkOutputs = p.GetNetworkOutputs();
|
||||
@ -73,4 +75,6 @@ static void CreateResultOp(Program& p, const std::shared_ptr<ngraph::op::v0::Res
|
||||
|
||||
REGISTER_FACTORY_IMPL(v0, Result);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,14 +2,16 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/reverse_sequence.hpp"
|
||||
|
||||
#include "intel_gpu/primitives/reverse_sequence.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static void CreateReverseSequenceOp(Program& p, const std::shared_ptr<ngraph::op::v0::ReverseSequence>& op) {
|
||||
p.ValidateInputs(op, {2});
|
||||
@ -31,4 +33,6 @@ static void CreateReverseSequenceOp(Program& p, const std::shared_ptr<ngraph::op
|
||||
|
||||
REGISTER_FACTORY_IMPL(v0, ReverseSequence);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/lstm_cell.hpp"
|
||||
#include "ngraph/op/lstm_sequence.hpp"
|
||||
@ -15,7 +15,9 @@
|
||||
#include "intel_gpu/primitives/crop.hpp"
|
||||
#include "intel_gpu/primitives/concatenation.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
static cldnn::activation_func GetActivationFunc(std::string name) {
|
||||
static const std::map<std::string, cldnn::activation_func> name_mapping = {
|
||||
{"sigmoid", cldnn::activation_func::logistic},
|
||||
@ -276,7 +278,7 @@ static void CreateLSTMSequenceOp(Program& p, const std::shared_ptr<ngraph::op::v
|
||||
|
||||
std::vector<size_t> WRreshapeSize = { 4 * size_t(lstm_hidden_size), size_t(lstm_input_size + lstm_hidden_size) };
|
||||
cldnn::primitive_id WRreshapeID = WRconcatID + "_reshape";
|
||||
auto reshapeInPrim = cldnn::reshape(WRreshapeID, WRconcatID, CldnnTensorFromIEDims(WRreshapeSize), op->get_friendly_name());
|
||||
auto reshapeInPrim = cldnn::reshape(WRreshapeID, WRconcatID, tensor_from_dims(WRreshapeSize), op->get_friendly_name());
|
||||
p.AddPrimitive(reshapeInPrim);
|
||||
p.AddInnerPrimitiveToProfiler(WRreshapeID, op->get_friendly_name(), op);
|
||||
|
||||
@ -353,4 +355,6 @@ static void CreateLSTMSequenceOp(Program& p, const std::shared_ptr<ngraph::op::v
|
||||
REGISTER_FACTORY_IMPL(v4, LSTMCell);
|
||||
REGISTER_FACTORY_IMPL(v5, LSTMSequence);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -1,13 +1,15 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
#include "ngraph/op/roi_align.hpp"
|
||||
#include "intel_gpu/primitives/roi_align.hpp"
|
||||
#include <memory>
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
namespace {
|
||||
|
||||
@ -39,4 +41,6 @@ void CreateROIAlignOp(Program& p, const std::shared_ptr<ngraph::op::v3::ROIAlign
|
||||
|
||||
REGISTER_FACTORY_IMPL(v3, ROIAlign);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/roi_pooling.hpp"
|
||||
#include "ngraph/op/psroi_pooling.hpp"
|
||||
@ -11,7 +11,9 @@
|
||||
|
||||
#include "intel_gpu/primitives/roi_pooling.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static cldnn::pooling_mode GetPoolingMode(std::string method) {
|
||||
if (method == "bilinear")
|
||||
@ -126,4 +128,6 @@ REGISTER_FACTORY_IMPL(v1, DeformablePSROIPooling);
|
||||
REGISTER_FACTORY_IMPL(v0, PSROIPooling);
|
||||
REGISTER_FACTORY_IMPL(v0, ROIPooling);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,15 +2,17 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/scatter_elements_update.hpp"
|
||||
#include "ngraph/op/constant.hpp"
|
||||
|
||||
#include "intel_gpu/primitives/scatter_elements_update.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static inline cldnn::scatter_elements_update::scatter_elements_update_axis GetScatterElementsUpdateAxis(int axis, unsigned rank) {
|
||||
if (axis < 0)
|
||||
@ -18,7 +20,7 @@ static inline cldnn::scatter_elements_update::scatter_elements_update_axis GetSc
|
||||
if (axis < 0 || axis >= rank)
|
||||
IE_THROW() << "ScatterElementsUpdate axis is not correspond to number of dimensions";
|
||||
|
||||
// Difference in dimension ordering between IE and clDNN,
|
||||
// Difference in dimension ordering between IE and GPU plugin,
|
||||
// reverse spatial dimensions after batch and feature.
|
||||
unsigned cldnn_axis = axis;
|
||||
if (axis >= 2) {
|
||||
@ -66,4 +68,6 @@ static void CreateScatterElementsUpdateOp(Program& p, const std::shared_ptr<ngra
|
||||
|
||||
REGISTER_FACTORY_IMPL(v3, ScatterElementsUpdate);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,15 +2,17 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/scatter_nd_update.hpp"
|
||||
#include "ngraph/op/constant.hpp"
|
||||
|
||||
#include "intel_gpu/primitives/scatter_nd_update.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static void CreateScatterNDUpdateOp(Program& p, const std::shared_ptr<ngraph::op::v3::ScatterNDUpdate>& op) {
|
||||
p.ValidateInputs(op, {3});
|
||||
@ -31,4 +33,6 @@ static void CreateScatterNDUpdateOp(Program& p, const std::shared_ptr<ngraph::op
|
||||
|
||||
REGISTER_FACTORY_IMPL(v3, ScatterNDUpdate);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,15 +2,17 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/scatter_update.hpp"
|
||||
#include "ngraph/op/constant.hpp"
|
||||
|
||||
#include "intel_gpu/primitives/scatter_update.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static inline cldnn::scatter_update::scatter_update_axis GetScatterUpdateAxis(int axis, unsigned rank) {
|
||||
if (axis < 0)
|
||||
@ -18,7 +20,7 @@ static inline cldnn::scatter_update::scatter_update_axis GetScatterUpdateAxis(in
|
||||
if (axis < 0 || axis >= rank)
|
||||
IE_THROW() << "ScatterUpdate axis is not correspond to number of dimensions";
|
||||
|
||||
// Difference in dimension ordering between IE and clDNN,
|
||||
// Difference in dimension ordering between IE and GPU plugin,
|
||||
// reverse spatial dimensions after batch and feature.
|
||||
unsigned cldnn_axis = axis;
|
||||
if (axis >= 2) {
|
||||
@ -66,4 +68,6 @@ static void CreateScatterUpdateOp(Program& p, const std::shared_ptr<ngraph::op::
|
||||
|
||||
REGISTER_FACTORY_IMPL(v3, ScatterUpdate);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/select.hpp"
|
||||
|
||||
@ -11,7 +11,9 @@
|
||||
#include "intel_gpu/primitives/reorder.hpp"
|
||||
#include "intel_gpu/primitives/reshape.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static void CreateSelectOp(Program& p, const std::shared_ptr<ngraph::op::v1::Select>& op) {
|
||||
p.ValidateInputs(op, {3});
|
||||
@ -61,7 +63,7 @@ static void CreateSelectOp(Program& p, const std::shared_ptr<ngraph::op::v1::Sel
|
||||
// Extend input dimensions to the same size as output dimensions by prepending ones
|
||||
inputDims.insert(inputDims.begin(), outDimsN - inputDimsN, 1ul);
|
||||
|
||||
auto targetShape = CldnnTensorFromIEDims(inputDims);
|
||||
auto targetShape = tensor_from_dims(inputDims);
|
||||
|
||||
auto reshapePrim = cldnn::reshape(reshapeName, inputPrimitives[i], targetShape, op->get_friendly_name());
|
||||
|
||||
@ -89,4 +91,6 @@ static void CreateSelectOp(Program& p, const std::shared_ptr<ngraph::op::v1::Sel
|
||||
|
||||
REGISTER_FACTORY_IMPL(v1, Select);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,14 +2,16 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/shuffle_channels.hpp"
|
||||
|
||||
#include "intel_gpu/primitives/shuffle_channels.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static void CreateShuffleChannelsOp(Program& p, const std::shared_ptr<ngraph::op::v0::ShuffleChannels>& op) {
|
||||
p.ValidateInputs(op, {1, 2});
|
||||
@ -45,4 +47,6 @@ static void CreateShuffleChannelsOp(Program& p, const std::shared_ptr<ngraph::op
|
||||
|
||||
REGISTER_FACTORY_IMPL(v0, ShuffleChannels);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/softmax.hpp"
|
||||
#include "ngraph/op/log_softmax.hpp"
|
||||
@ -11,7 +11,9 @@
|
||||
#include "intel_gpu/primitives/softmax.hpp"
|
||||
#include "intel_gpu/primitives/activation.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static cldnn::softmax::dimension_t GetSoftmaxAxis(int64_t axis, size_t rank) {
|
||||
switch (axis) {
|
||||
@ -73,4 +75,6 @@ static void CreateLogSoftmaxOp(Program& p, const std::shared_ptr<ngraph::op::v5:
|
||||
REGISTER_FACTORY_IMPL(v1, Softmax);
|
||||
REGISTER_FACTORY_IMPL(v5, LogSoftmax);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,15 +2,17 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/space_to_batch.hpp"
|
||||
#include "ngraph/op/constant.hpp"
|
||||
|
||||
#include "intel_gpu/primitives/space_to_batch.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static void CreateSpaceToBatchOp(Program& p, const std::shared_ptr<ngraph::op::v1::SpaceToBatch>& op) {
|
||||
p.ValidateInputs(op, {4});
|
||||
@ -35,7 +37,7 @@ static void CreateSpaceToBatchOp(Program& p, const std::shared_ptr<ngraph::op::v
|
||||
}
|
||||
inputs.emplace_back(format, sizes, default_size);
|
||||
}
|
||||
auto out_size = CldnnTensorFromIEDims(op->get_output_shape(0));
|
||||
auto out_size = tensor_from_dims(op->get_output_shape(0));
|
||||
|
||||
auto batchToSpacePrim = cldnn::space_to_batch(layerName,
|
||||
inputPrimitives[0], // input
|
||||
@ -51,4 +53,6 @@ static void CreateSpaceToBatchOp(Program& p, const std::shared_ptr<ngraph::op::v
|
||||
|
||||
REGISTER_FACTORY_IMPL(v1, SpaceToBatch);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,14 +2,16 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/space_to_depth.hpp"
|
||||
|
||||
#include "intel_gpu/primitives/space_to_depth.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static cldnn::space_to_depth::depth_mode GetDepthMode(ngraph::op::v0::SpaceToDepth::SpaceToDepthMode mode) {
|
||||
switch (mode) {
|
||||
@ -36,4 +38,6 @@ static void CreateSpaceToDepthOp(Program& p, const std::shared_ptr<ngraph::op::v
|
||||
|
||||
REGISTER_FACTORY_IMPL(v0, SpaceToDepth);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,15 +2,17 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/split.hpp"
|
||||
#include "ngraph/op/variadic_split.hpp"
|
||||
|
||||
#include "intel_gpu/primitives/crop.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static void CreateCommonSplitOp(Program& p, const std::shared_ptr<ngraph::Node>& op) {
|
||||
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
|
||||
@ -37,8 +39,8 @@ static void CreateCommonSplitOp(Program& p, const std::shared_ptr<ngraph::Node>&
|
||||
}
|
||||
NGRAPH_SUPPRESS_DEPRECATED_END
|
||||
|
||||
auto outTensor = CldnnTensorFromIEDims(outLayerDims, 1);
|
||||
auto offsetTensor = CldnnTensorFromIEDims(startOffset, 0);
|
||||
auto outTensor = tensor_from_dims(outLayerDims, 1);
|
||||
auto offsetTensor = tensor_from_dims(startOffset, 0);
|
||||
|
||||
auto cropPrim = cldnn::crop(outLayerName, inputPrimitives[0], outTensor, offsetTensor, op->get_friendly_name());
|
||||
p.primitiveIDs[outLayerName] = outLayerName;
|
||||
@ -71,4 +73,6 @@ static void CreateVariadicSplitOp(Program& p, const std::shared_ptr<ngraph::op::
|
||||
REGISTER_FACTORY_IMPL(v1, Split);
|
||||
REGISTER_FACTORY_IMPL(v1, VariadicSplit);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/strided_slice.hpp"
|
||||
#include "ngraph/op/constant.hpp"
|
||||
@ -12,7 +12,9 @@
|
||||
#include "intel_gpu/primitives/reshape.hpp"
|
||||
#include "intel_gpu/primitives/crop.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static void CreateStridedSliceOp(Program& p, const std::shared_ptr<ngraph::op::v1::StridedSlice>& op) {
|
||||
p.ValidateInputs(op, {4});
|
||||
@ -187,7 +189,7 @@ static void CreateStridedSliceOp(Program& p, const std::shared_ptr<ngraph::op::v
|
||||
auto inPrimitive = inputPrimitives[0];
|
||||
// Reshape in case of new axis
|
||||
if (!new_axis_mask.empty()) {
|
||||
auto targetShape = CldnnTensorFromIEDims(reshape_pattern);
|
||||
auto targetShape = tensor_from_dims(reshape_pattern);
|
||||
auto reshapeInName = op->get_friendly_name() + "/Reshape_before";
|
||||
auto reshapePrim = cldnn::reshape(reshapeInName, inputPrimitives[0], targetShape, op->get_friendly_name());
|
||||
p.AddPrimitive(reshapePrim);
|
||||
@ -211,8 +213,8 @@ static void CreateStridedSliceOp(Program& p, const std::shared_ptr<ngraph::op::v
|
||||
}
|
||||
|
||||
|
||||
cldnn::tensor refSize = CldnnTensorFromIEDims(crop_shape);
|
||||
cldnn::tensor offSize = CldnnTensorFromIEDims(offset, 0);
|
||||
cldnn::tensor refSize = tensor_from_dims(crop_shape);
|
||||
cldnn::tensor offSize = tensor_from_dims(offset, 0);
|
||||
|
||||
|
||||
auto cropPrim = cldnn::crop(layerName, inPrimitive, refSize, offSize, op->get_friendly_name());
|
||||
@ -221,7 +223,7 @@ static void CreateStridedSliceOp(Program& p, const std::shared_ptr<ngraph::op::v
|
||||
|
||||
// Reshape in case of deleting of axis
|
||||
if (!shrink_axis_mask.empty()) {
|
||||
auto targetShape = CldnnTensorFromIEDims(output_shape);
|
||||
auto targetShape = tensor_from_dims(output_shape);
|
||||
auto reshapeOutName = op->get_friendly_name() + "/Crop";
|
||||
auto reshapePrim = cldnn::reshape(reshapeOutName, layerName, targetShape, op->get_friendly_name());
|
||||
p.AddPrimitive(reshapePrim);
|
||||
@ -249,7 +251,7 @@ static void CreateStridedSliceOp(Program& p, const std::shared_ptr<ngraph::op::v
|
||||
e = 1 - e;
|
||||
}
|
||||
|
||||
auto out_size = CldnnTensorFromIEDims(op->get_output_shape(0));
|
||||
auto out_size = tensor_from_dims(op->get_output_shape(0));
|
||||
|
||||
auto stridedSlicePrim = cldnn::strided_slice(layerName,
|
||||
inputPrimitives[0],
|
||||
@ -269,4 +271,6 @@ static void CreateStridedSliceOp(Program& p, const std::shared_ptr<ngraph::op::v
|
||||
|
||||
REGISTER_FACTORY_IMPL(v1, StridedSlice);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,10 +2,9 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "cldnn_engine.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
#include "intel_gpu/plugin/plugin.hpp"
|
||||
|
||||
#include <cpp/ie_cnn_network.h>
|
||||
|
||||
@ -24,7 +23,9 @@
|
||||
|
||||
using TensorIterator = ngraph::op::v0::TensorIterator;
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
template<class DATA_TYPE>
|
||||
static DATA_TYPE CreateScalarData(Program &p, const cldnn::primitive_id& id, int64_t num, const cldnn::primitive_id& ext_prim_id) {
|
||||
@ -39,7 +40,7 @@ static cldnn::mutable_data CreateAdditionalOutputData(Program &p, const std::sha
|
||||
const int32_t output_idx) {
|
||||
const auto precision = DataTypeFromPrecision(op->get_output_element_type(output_idx));
|
||||
const auto format = DefaultFormatForDims(op->get_output_shape(output_idx).size());
|
||||
const auto tensor = CldnnTensorFromIEDims(op->get_output_shape(output_idx));
|
||||
const auto tensor = tensor_from_dims(op->get_output_shape(output_idx));
|
||||
cldnn::layout output_layout = cldnn::layout(precision, format, tensor);
|
||||
auto mem = p.GetEngine().allocate_memory(output_layout);
|
||||
auto md = cldnn::mutable_data(id, {input}, mem, op->get_friendly_name()); // cldnn::data cannot set dependency
|
||||
@ -199,4 +200,6 @@ static void CreateTensorIteratorOp(Program &p, const std::shared_ptr<TensorItera
|
||||
|
||||
REGISTER_FACTORY_IMPL(v0, TensorIterator);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,14 +2,16 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/tile.hpp"
|
||||
|
||||
#include "intel_gpu/primitives/tile.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static void CreateTileOp(Program& p, const std::shared_ptr<ngraph::op::v0::Tile>& op) {
|
||||
p.ValidateInputs(op, {2});
|
||||
@ -18,7 +20,7 @@ static void CreateTileOp(Program& p, const std::shared_ptr<ngraph::op::v0::Tile>
|
||||
|
||||
auto tilePrim = cldnn::tile(layerName,
|
||||
inputPrimitives[0],
|
||||
CldnnTensorFromIEDims(op->get_output_shape(0)),
|
||||
tensor_from_dims(op->get_output_shape(0)),
|
||||
op->get_friendly_name());
|
||||
|
||||
p.AddPrimitive(tilePrim);
|
||||
@ -27,4 +29,6 @@ static void CreateTileOp(Program& p, const std::shared_ptr<ngraph::op::v0::Tile>
|
||||
|
||||
REGISTER_FACTORY_IMPL(v0, Tile);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/topk.hpp"
|
||||
|
||||
@ -11,7 +11,9 @@
|
||||
#include "intel_gpu/primitives/mutable_data.hpp"
|
||||
#include "intel_gpu/runtime/debug_configuration.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static cldnn::arg_max_min::axis_name GetAxis(int32_t axis, size_t in_rank) {
|
||||
if (in_rank == 5) {
|
||||
@ -70,7 +72,7 @@ static void CreateTopKOp(Program& p, const std::shared_ptr<ngraph::op::v1::TopK>
|
||||
|
||||
cldnn::layout mutableLayout = cldnn::layout(DataTypeFromPrecision(mutable_precision),
|
||||
DefaultFormatForDims(op->get_output_shape(1).size()),
|
||||
CldnnTensorFromIEDims(op->get_output_shape(1)));
|
||||
tensor_from_dims(op->get_output_shape(1)));
|
||||
|
||||
GPU_DEBUG_GET_INSTANCE(debug_config);
|
||||
GPU_DEBUG_IF(debug_config->verbose >= 2) {
|
||||
@ -130,4 +132,6 @@ static void CreateTopKOp(Program& p, const std::shared_ptr<ngraph::op::v1::TopK>
|
||||
|
||||
REGISTER_FACTORY_IMPL(v1, TopK);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,15 +2,17 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "cldnn_common_utils.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "intel_gpu/plugin/common_utils.hpp"
|
||||
|
||||
#include "ngraph/op/transpose.hpp"
|
||||
#include "ngraph/op/constant.hpp"
|
||||
|
||||
#include "intel_gpu/primitives/permute.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
static void CreateTransposeOp(Program& p, const std::shared_ptr<ngraph::op::v1::Transpose>& op) {
|
||||
p.ValidateInputs(op, {1, 2});
|
||||
@ -46,4 +48,6 @@ static void CreateTransposeOp(Program& p, const std::shared_ptr<ngraph::op::v1::
|
||||
|
||||
REGISTER_FACTORY_IMPL(v1, Transpose);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -2,7 +2,7 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "transformations/utils/utils.hpp"
|
||||
|
||||
#include "ngraph/op/tanh.hpp"
|
||||
@ -43,7 +43,9 @@
|
||||
|
||||
#include "intel_gpu/primitives/activation.hpp"
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
void CreateUnaryEltwiseOp(Program& p, const std::shared_ptr<ngraph::Node>& op,
|
||||
cldnn::activation_func func, cldnn::activation_additional_params params) {
|
||||
@ -313,4 +315,6 @@ REGISTER_FACTORY_IMPL(v0, Sign);
|
||||
REGISTER_FACTORY_IMPL(v5, HSigmoid);
|
||||
REGISTER_FACTORY_IMPL(v5, Round);
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -16,11 +16,11 @@
|
||||
#include <ie_ngraph_utils.hpp>
|
||||
#include <ie_algorithm.hpp>
|
||||
|
||||
#include "cldnn_engine.h"
|
||||
#include "cldnn_executable_network.h"
|
||||
#include "cldnn_transformations_pipeline.h"
|
||||
#include "cldnn_custom_layer.h"
|
||||
#include "cldnn_itt.h"
|
||||
#include "intel_gpu/plugin/plugin.hpp"
|
||||
#include "intel_gpu/plugin/compiled_model.hpp"
|
||||
#include "intel_gpu/plugin/transformations_pipeline.hpp"
|
||||
#include "intel_gpu/plugin/custom_layer.hpp"
|
||||
#include "intel_gpu/plugin/itt.hpp"
|
||||
#include "gpu/gpu_config.hpp"
|
||||
#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
|
||||
|
||||
@ -41,7 +41,9 @@ using namespace InferenceEngine;
|
||||
using namespace InferenceEngine::gpu;
|
||||
using namespace InferenceEngine::details;
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
#define FACTORY_DECLARATION(op_version, op_name) \
|
||||
void __register ## _ ## op_name ## _ ## op_version();
|
||||
@ -50,20 +52,20 @@ namespace CLDNNPlugin {
|
||||
__register ## _ ## op_name ## _ ## op_version();
|
||||
|
||||
#define REGISTER_FACTORY(op_version, op_name) FACTORY_DECLARATION(op_version, op_name)
|
||||
#include "cldnn_primitives_list.hpp"
|
||||
#include "intel_gpu/plugin/primitives_list.hpp"
|
||||
#undef REGISTER_FACTORY
|
||||
|
||||
void clDNNEngine::RegisterPrimitives() {
|
||||
void Plugin::RegisterPrimitives() {
|
||||
#define REGISTER_FACTORY(op_version, op_name) FACTORY_CALL(op_version, op_name)
|
||||
#include "cldnn_primitives_list.hpp"
|
||||
#include "intel_gpu/plugin/primitives_list.hpp"
|
||||
#undef REGISTER_FACTORY
|
||||
}
|
||||
|
||||
struct clDNNEngine::impl {
|
||||
CLDNNPlugin::Configs m_configs;
|
||||
struct Plugin::impl {
|
||||
Configs m_configs;
|
||||
};
|
||||
|
||||
std::string clDNNEngine::GetDeviceIDFromConfig(const std::map<std::string, std::string>& config) const {
|
||||
std::string Plugin::GetDeviceIDFromConfig(const std::map<std::string, std::string>& config) const {
|
||||
std::string device_id;
|
||||
if (config.find(PluginConfigParams::KEY_DEVICE_ID) != config.end()) {
|
||||
device_id = config.at(PluginConfigParams::KEY_DEVICE_ID);
|
||||
@ -71,7 +73,7 @@ std::string clDNNEngine::GetDeviceIDFromConfig(const std::map<std::string, std::
|
||||
return device_id;
|
||||
}
|
||||
|
||||
cldnn::device_info clDNNEngine::GetDeviceInfo(const std::map<std::string, std::string> &config) const {
|
||||
cldnn::device_info Plugin::GetDeviceInfo(const std::map<std::string, std::string> &config) const {
|
||||
auto device_info = device_map.begin()->second->get_info();
|
||||
std::string device_id = GetDeviceIDFromConfig(config);
|
||||
if (!device_id.empty()) {
|
||||
@ -84,9 +86,9 @@ cldnn::device_info clDNNEngine::GetDeviceInfo(const std::map<std::string, std::s
|
||||
return device_info;
|
||||
}
|
||||
|
||||
InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const InferenceEngine::CNNNetwork& network,
|
||||
const CLDNNPlugin::Config& config) const {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::CloneAndTransformNetwork");
|
||||
InferenceEngine::CNNNetwork Plugin::CloneAndTransformNetwork(const InferenceEngine::CNNNetwork& network,
|
||||
const Config& config) const {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::CloneAndTransformNetwork");
|
||||
CNNNetwork clonedNetwork = InferenceEngine::details::cloneNetwork(network);
|
||||
|
||||
if (clonedNetwork.getFunction()) {
|
||||
@ -103,11 +105,11 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc
|
||||
return clonedNetwork;
|
||||
}
|
||||
|
||||
clDNNEngine::clDNNEngine() : m_defaultContext(nullptr) {
|
||||
Plugin::Plugin() : m_defaultContext(nullptr) {
|
||||
_pluginName = "GPU";
|
||||
_impl = std::make_shared<impl>();
|
||||
RegisterPrimitives();
|
||||
// try loading clDNN engine and get info from it
|
||||
// try loading gpu engine and get info from it
|
||||
{
|
||||
// Set OCL runtime which should be always available
|
||||
cldnn::device_query device_query(cldnn::engine_types::ocl, cldnn::runtime_types::ocl);
|
||||
@ -124,12 +126,12 @@ clDNNEngine::clDNNEngine() : m_defaultContext(nullptr) {
|
||||
CHAR mpath[MAX_PATH + 1];
|
||||
HMODULE nModule;
|
||||
GetModuleHandleEx(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
|
||||
(LPCSTR)CLDNNCustomLayer::LoadFromFile,
|
||||
(LPCSTR)CustomLayer::LoadFromFile,
|
||||
&nModule);
|
||||
GetModuleFileName(nModule, mpath, sizeof(mpath));
|
||||
#elif __linux__
|
||||
Dl_info dl_info;
|
||||
dladdr(reinterpret_cast<void *>(CLDNNCustomLayer::LoadFromFile), &dl_info);
|
||||
dladdr(reinterpret_cast<void *>(CustomLayer::LoadFromFile), &dl_info);
|
||||
const char* mpath = dl_info.dli_fname;
|
||||
#endif
|
||||
std::string configFile(mpath);
|
||||
@ -142,7 +144,7 @@ clDNNEngine::clDNNEngine() : m_defaultContext(nullptr) {
|
||||
}
|
||||
config_path += "/cldnn_global_custom_kernels/cldnn_global_custom_kernels.xml";
|
||||
for (auto& config : _impl->m_configs) {
|
||||
CLDNNCustomLayer::LoadFromFile(config_path, config.second.customLayers, true);
|
||||
CustomLayer::LoadFromFile(config_path, config.second.customLayers, true);
|
||||
}
|
||||
}
|
||||
|
||||
@ -164,8 +166,8 @@ auto check_inputs = [](InferenceEngine::InputsDataMap _networkInputs) {
|
||||
}
|
||||
};
|
||||
|
||||
void clDNNEngine::UpdateConfig(CLDNNPlugin::Config& conf, const InferenceEngine::CNNNetwork &network, const std::map<std::string, std::string> ¶ms) const {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::UpdateConfig");
|
||||
void Plugin::UpdateConfig(Config& conf, const InferenceEngine::CNNNetwork &network, const std::map<std::string, std::string> ¶ms) const {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::UpdateConfig");
|
||||
auto device_info = GetDeviceInfo(params);
|
||||
conf.enableInt8 = device_info.supports_imad || device_info.supports_immad;
|
||||
conf.UpdateFromMap(params);
|
||||
@ -174,8 +176,8 @@ void clDNNEngine::UpdateConfig(CLDNNPlugin::Config& conf, const InferenceEngine:
|
||||
}
|
||||
}
|
||||
|
||||
void clDNNEngine::UpdateStatistics(const CLDNNRemoteCLContext::Ptr& context) const {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::UpdateStatistics");
|
||||
void Plugin::UpdateStatistics(const RemoteCLContext::Ptr& context) const {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::UpdateStatistics");
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(engine_mutex);
|
||||
|
||||
@ -193,9 +195,9 @@ void clDNNEngine::UpdateStatistics(const CLDNNRemoteCLContext::Ptr& context) con
|
||||
}
|
||||
}
|
||||
|
||||
std::map<std::string, std::string> clDNNEngine::ConvertPerfHintsToConfig(
|
||||
std::map<std::string, std::string> Plugin::ConvertPerfHintsToConfig(
|
||||
const std::map<std::string, std::string>& network_config,
|
||||
const CLDNNPlugin::Config& plugin_config) const {
|
||||
const Config& plugin_config) const {
|
||||
// deduces the actual settings from the performance hints and returns fully-defined config
|
||||
auto config = network_config;
|
||||
const auto &mode = config.find(PluginConfigParams::KEY_PERFORMANCE_HINT);
|
||||
@ -223,21 +225,21 @@ std::map<std::string, std::string> clDNNEngine::ConvertPerfHintsToConfig(
|
||||
return config;
|
||||
}
|
||||
|
||||
IExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network,
|
||||
IExecutableNetworkInternal::Ptr Plugin::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network,
|
||||
const std::map<std::string, std::string> &orig_config) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::LoadExeNetworkImpl");
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::LoadExeNetworkImpl");
|
||||
// verification of supported input
|
||||
InferenceEngine::InputsDataMap _networkInputs = network.getInputsInfo();
|
||||
check_inputs(_networkInputs);
|
||||
|
||||
CLDNNPlugin::Configs confs = _impl->m_configs;
|
||||
Configs confs = _impl->m_configs;
|
||||
std::string device_id = GetDeviceIDFromConfig(orig_config);
|
||||
CLDNNPlugin::Config conf = confs.GetConfig(device_id);
|
||||
Config conf = confs.GetConfig(device_id);
|
||||
|
||||
auto config = ConvertPerfHintsToConfig(orig_config, conf);
|
||||
UpdateConfig(conf, network, config);
|
||||
|
||||
CLDNNRemoteCLContext::Ptr context;
|
||||
RemoteCLContext::Ptr context;
|
||||
|
||||
auto canReuseDefaultContext = [&]() -> bool {
|
||||
if (m_defaultContext == nullptr)
|
||||
@ -263,10 +265,10 @@ IExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceE
|
||||
};
|
||||
|
||||
{
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::LoadExeNetworkImpl::CreateContext");
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::LoadExeNetworkImpl::CreateContext");
|
||||
std::lock_guard<std::mutex> lock(engine_mutex);
|
||||
if (!canReuseDefaultContext()) {
|
||||
m_defaultContext.reset(new CLDNNRemoteCLContext(shared_from_this(), ParamMap(), conf));
|
||||
m_defaultContext.reset(new RemoteCLContext(shared_from_this(), ParamMap(), conf));
|
||||
}
|
||||
}
|
||||
|
||||
@ -274,16 +276,16 @@ IExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceE
|
||||
|
||||
auto transformedNetwork = CloneAndTransformNetwork(network, conf);
|
||||
{
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::LoadExeNetworkImpl::CreateExeNetwork");
|
||||
CLDNNExecNetwork::Ptr exeNetwork = std::make_shared<CLDNNExecNetwork>(transformedNetwork, context, conf);
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::LoadExeNetworkImpl::CreateExeNetwork");
|
||||
CompiledModel::Ptr exeNetwork = std::make_shared<CompiledModel>(transformedNetwork, context, conf);
|
||||
UpdateStatistics(context);
|
||||
return exeNetwork;
|
||||
}
|
||||
}
|
||||
|
||||
IExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network,
|
||||
const RemoteContext::Ptr &context,
|
||||
const std::map<std::string, std::string> &orig_config) {
|
||||
IExecutableNetworkInternal::Ptr Plugin::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network,
|
||||
const InferenceEngine::RemoteContext::Ptr &context,
|
||||
const std::map<std::string, std::string> &orig_config) {
|
||||
InferenceEngine::InputsDataMap _networkInputs = network.getInputsInfo();
|
||||
check_inputs(_networkInputs);
|
||||
|
||||
@ -292,39 +294,39 @@ IExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceE
|
||||
IE_THROW() << "Invalid context";
|
||||
}
|
||||
|
||||
CLDNNPlugin::Config conf = getContextImpl(casted)->GetConfig();
|
||||
Config conf = getContextImpl(casted)->GetConfig();
|
||||
auto config = ConvertPerfHintsToConfig(orig_config, conf);
|
||||
UpdateConfig(conf, network, config);
|
||||
|
||||
auto transformedNetwork = CloneAndTransformNetwork(network, conf);
|
||||
return std::make_shared<CLDNNExecNetwork>(transformedNetwork, casted, conf);
|
||||
return std::make_shared<CompiledModel>(transformedNetwork, casted, conf);
|
||||
}
|
||||
|
||||
RemoteContext::Ptr clDNNEngine::CreateContext(const ParamMap& params) {
|
||||
InferenceEngine::RemoteContext::Ptr Plugin::CreateContext(const ParamMap& params) {
|
||||
// parameter map is non-empty
|
||||
std::string contextTypeStr = _StrFromParams(params, GPU_PARAM_KEY(CONTEXT_TYPE));
|
||||
|
||||
if (GPU_PARAM_VALUE(OCL) == contextTypeStr) {
|
||||
return std::make_shared<CLDNNRemoteCLContext>(shared_from_this(), params, _impl->m_configs.GetDefaultDeviceConfig());
|
||||
return std::make_shared<RemoteCLContext>(shared_from_this(), params, _impl->m_configs.GetDefaultDeviceConfig());
|
||||
} else if (GPU_PARAM_VALUE(VA_SHARED) == contextTypeStr) {
|
||||
#ifdef _WIN32
|
||||
return std::make_shared<CLDNNRemoteD3DContext>(shared_from_this(), params, _impl->m_configs.GetDefaultDeviceConfig());
|
||||
return std::make_shared<RemoteD3DContext>(shared_from_this(), params, _impl->m_configs.GetDefaultDeviceConfig());
|
||||
#else
|
||||
return std::make_shared<CLDNNRemoteVAContext>(shared_from_this(), params, _impl->m_configs.GetDefaultDeviceConfig());
|
||||
return std::make_shared<RemoteVAContext>(shared_from_this(), params, _impl->m_configs.GetDefaultDeviceConfig());
|
||||
#endif
|
||||
} else {
|
||||
IE_THROW() << "Invalid remote context type" << contextTypeStr;
|
||||
}
|
||||
}
|
||||
|
||||
RemoteContext::Ptr clDNNEngine::GetDefaultContext(const ParamMap& params) {
|
||||
InferenceEngine::RemoteContext::Ptr Plugin::GetDefaultContext(const ParamMap& params) {
|
||||
if (nullptr == m_defaultContext) {
|
||||
m_defaultContext.reset(new CLDNNRemoteCLContext(shared_from_this(), params, _impl->m_configs.GetDefaultDeviceConfig()));
|
||||
m_defaultContext.reset(new RemoteCLContext(shared_from_this(), params, _impl->m_configs.GetDefaultDeviceConfig()));
|
||||
}
|
||||
return m_defaultContext;
|
||||
}
|
||||
|
||||
void clDNNEngine::SetConfig(const std::map<std::string, std::string> &config) {
|
||||
void Plugin::SetConfig(const std::map<std::string, std::string> &config) {
|
||||
streamsSet = (config.find(PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS) != config.end());
|
||||
throttlingSet = config.find(GPUConfigParams::KEY_GPU_PLUGIN_THROTTLE) != config.end() ||
|
||||
config.find(CLDNNConfigParams::KEY_CLDNN_PLUGIN_THROTTLE) != config.end();
|
||||
@ -345,18 +347,18 @@ void clDNNEngine::SetConfig(const std::map<std::string, std::string> &config) {
|
||||
}
|
||||
}
|
||||
|
||||
QueryNetworkResult clDNNEngine::QueryNetwork(const CNNNetwork& network,
|
||||
const std::map<std::string, std::string>& config) const {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::QueryNetwork");
|
||||
QueryNetworkResult Plugin::QueryNetwork(const CNNNetwork& network,
|
||||
const std::map<std::string, std::string>& config) const {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::QueryNetwork");
|
||||
QueryNetworkResult res;
|
||||
CLDNNPlugin::Configs confs = _impl->m_configs;
|
||||
Configs confs = _impl->m_configs;
|
||||
std::string device_id = GetDeviceIDFromConfig(config);
|
||||
CLDNNPlugin::Config conf = confs.GetConfig(device_id);
|
||||
Config conf = confs.GetConfig(device_id);
|
||||
|
||||
UpdateConfig(conf, network, config);
|
||||
|
||||
if (m_defaultContext == nullptr) {
|
||||
m_defaultContext.reset(new CLDNNRemoteCLContext(
|
||||
m_defaultContext.reset(new RemoteCLContext(
|
||||
std::const_pointer_cast<InferenceEngine::IInferencePlugin>(shared_from_this()),
|
||||
ParamMap(), conf));
|
||||
}
|
||||
@ -568,8 +570,8 @@ QueryNetworkResult clDNNEngine::QueryNetwork(const CNNNetwork& network,
|
||||
return res;
|
||||
}
|
||||
|
||||
Parameter clDNNEngine::GetConfig(const std::string& name, const std::map<std::string, Parameter>& options) const {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::GetConfig");
|
||||
Parameter Plugin::GetConfig(const std::string& name, const std::map<std::string, Parameter>& options) const {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::GetConfig");
|
||||
Parameter result;
|
||||
|
||||
std::string device_id;
|
||||
@ -655,8 +657,8 @@ static float GetGOPS(cldnn::device_info info, cldnn::data_types dt) {
|
||||
return freqGHz * opsPerComputeBlock * computeBlockIPC * numEUs;
|
||||
}
|
||||
|
||||
Parameter clDNNEngine::GetMetric(const std::string& name, const std::map<std::string, Parameter>& options) const {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::GetMetric");
|
||||
Parameter Plugin::GetMetric(const std::string& name, const std::map<std::string, Parameter>& options) const {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::GetMetric");
|
||||
std::string device_id = GetConfig(CONFIG_KEY(DEVICE_ID), options);
|
||||
|
||||
auto iter = device_map.find(device_id);
|
||||
@ -813,7 +815,7 @@ Parameter clDNNEngine::GetMetric(const std::string& name, const std::map<std::st
|
||||
|
||||
InferenceEngine::CNNNetwork network(model);
|
||||
size_t base_batch_size = 16; // empirically decided for DG1
|
||||
auto engine_params = clDNNEngine::GetEngineParams(config, iter->second, nullptr);
|
||||
auto engine_params = Plugin::GetParams(config, iter->second, nullptr);
|
||||
auto engine = cldnn::engine::create(engine_params.engine_type, engine_params.runtime_type, iter->second,
|
||||
cldnn::engine_configuration(false, engine_params.queue_type, std::string(),
|
||||
config.queuePriority, config.queueThrottle, config.memory_pool_on,
|
||||
@ -835,7 +837,7 @@ Parameter clDNNEngine::GetMetric(const std::string& name, const std::map<std::st
|
||||
for (auto& info : inputs_info) {
|
||||
if (!info.second)
|
||||
continue;
|
||||
Layout layout = info.second->getLayout();
|
||||
InferenceEngine::Layout layout = info.second->getLayout();
|
||||
auto data = info.second->getInputData();
|
||||
if (!data)
|
||||
continue;
|
||||
@ -885,7 +887,9 @@ Parameter clDNNEngine::GetMetric(const std::string& name, const std::map<std::st
|
||||
IE_THROW() << "Unsupported metric key " << name;
|
||||
}
|
||||
}
|
||||
}; // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
||||
static const Version version = { {2, 1}, CI_BUILD_NUMBER, "clDNNPlugin" };
|
||||
IE_DEFINE_PLUGIN_CREATE_FUNCTION(CLDNNPlugin::clDNNEngine, version)
|
||||
static const Version version = { {2, 1}, CI_BUILD_NUMBER, "Intel GPU plugin" };
|
||||
IE_DEFINE_PLUGIN_CREATE_FUNCTION(ov::runtime::intel_gpu::Plugin, version)
|
@ -2,16 +2,18 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cldnn_program.h"
|
||||
#include "intel_gpu/plugin/program.hpp"
|
||||
#include "ngraph/ops.hpp"
|
||||
#include "ngraph_ops/nms_ie_internal.hpp"
|
||||
#include "cldnn_itt.h"
|
||||
#include "intel_gpu/plugin/itt.hpp"
|
||||
#include "intel_gpu/runtime/debug_configuration.hpp"
|
||||
|
||||
using namespace InferenceEngine;
|
||||
using namespace InferenceEngine::details;
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
const cldnn::primitive_id Program::m_preProcessTag("_cldnn_input_preprocess");
|
||||
const cldnn::primitive_id Program::m_meanValuesTag("_cldnn_mean_values");
|
||||
@ -177,7 +179,7 @@ std::shared_ptr<cldnn::program> Program::BuildProgram(const std::vector<std::sha
|
||||
InferenceEngine::InputsDataMap networkInputs,
|
||||
InferenceEngine::OutputsDataMap networkOutputs,
|
||||
bool createTopologyOnly, bool partialBuild) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "Program::BuildProgram");
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Program::BuildProgram");
|
||||
cldnn::build_options options;
|
||||
|
||||
if (!m_config.graph_dumps_dir.empty()) {
|
||||
@ -196,7 +198,7 @@ std::shared_ptr<cldnn::program> Program::BuildProgram(const std::vector<std::sha
|
||||
if (createTopologyOnly) {
|
||||
return {};
|
||||
} else {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "Program::CreateProgram");
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Program::CreateProgram");
|
||||
auto program = cldnn::program::build_program(*m_engine, *m_topology, options);
|
||||
CleanupBuild();
|
||||
|
||||
@ -205,7 +207,7 @@ std::shared_ptr<cldnn::program> Program::BuildProgram(const std::vector<std::sha
|
||||
}
|
||||
|
||||
bool Program::IsOpSupported(const InferenceEngine::CNNNetwork& network, const std::shared_ptr<ngraph::Node>& op) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "Program::IsOpSupported");
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Program::IsOpSupported");
|
||||
cldnn::topology topology;
|
||||
try {
|
||||
// Query mode disables checks that input primitives are created,
|
||||
@ -232,7 +234,7 @@ bool Program::IsOpSupported(const InferenceEngine::CNNNetwork& network, const st
|
||||
}
|
||||
|
||||
void Program::CreateSingleLayerPrimitive(cldnn::topology& topology, const std::shared_ptr<ngraph::Node>& op) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "Program::CreateSingleLayerPrimitive");
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Program::CreateSingleLayerPrimitive");
|
||||
InitProfileInfo(op->get_friendly_name(), op->get_type_name());
|
||||
|
||||
GPU_DEBUG_GET_INSTANCE(debug_config);
|
||||
@ -355,4 +357,6 @@ bool IsNodeOnConstPath(const std::shared_ptr<ngraph::Node>& node) {
|
||||
return is_const_node(node);
|
||||
}
|
||||
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
@ -3,19 +3,21 @@
|
||||
//
|
||||
|
||||
#include <memory>
|
||||
#include "cldnn_remote_context.h"
|
||||
#include "cldnn_itt.h"
|
||||
#include "cldnn_engine.h"
|
||||
#include "intel_gpu/plugin/remote_context.hpp"
|
||||
#include "intel_gpu/plugin/itt.hpp"
|
||||
#include "intel_gpu/plugin/plugin.hpp"
|
||||
#include "intel_gpu/runtime/device_query.hpp"
|
||||
|
||||
using namespace InferenceEngine;
|
||||
using namespace InferenceEngine::gpu;
|
||||
using namespace InferenceEngine::details;
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
CLDNNRemoteAllocator CLDNNRemoteBlobImpl::m_allocator;
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
RemoteAllocator RemoteBlobImpl::m_allocator;
|
||||
|
||||
CLDNNRemoteBlobImpl::CLDNNRemoteBlobImpl(ClContext::Ptr context,
|
||||
RemoteBlobImpl::RemoteBlobImpl(ClContext::Ptr context,
|
||||
cldnn::stream& stream,
|
||||
const cldnn::layout& layout,
|
||||
cldnn::shared_handle mem,
|
||||
@ -26,7 +28,7 @@ CLDNNRemoteBlobImpl::CLDNNRemoteBlobImpl(ClContext::Ptr context,
|
||||
_handle(nullptr), _allocator(nullptr), m_memObject(nullptr), lockedHolder(nullptr) {
|
||||
}
|
||||
|
||||
ParamMap CLDNNRemoteBlobImpl::getParams() const {
|
||||
ParamMap RemoteBlobImpl::getParams() const {
|
||||
assert(m_memObject != nullptr);
|
||||
auto params = m_memObject->get_internal_params();
|
||||
|
||||
@ -86,21 +88,21 @@ ParamMap CLDNNRemoteBlobImpl::getParams() const {
|
||||
}
|
||||
}
|
||||
|
||||
bool CLDNNRemoteBlobImpl::deallocate() noexcept {
|
||||
bool RemoteBlobImpl::deallocate() noexcept {
|
||||
m_memObject.reset();
|
||||
return m_memObject == nullptr;
|
||||
}
|
||||
|
||||
bool CLDNNRemoteBlobImpl::is_allocated() const noexcept {
|
||||
bool RemoteBlobImpl::is_allocated() const noexcept {
|
||||
return m_memObject != nullptr;
|
||||
}
|
||||
|
||||
bool CLDNNRemoteBlobImpl::is_locked() const noexcept {
|
||||
bool RemoteBlobImpl::is_locked() const noexcept {
|
||||
return lockedHolder != nullptr;
|
||||
}
|
||||
|
||||
void CLDNNRemoteBlobImpl::allocate() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "CLDNNRemoteBlobImpl::Allocate");
|
||||
void RemoteBlobImpl::allocate() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "RemoteBlobImpl::Allocate");
|
||||
assert(m_memObject == nullptr);
|
||||
|
||||
auto _impl = getContextImpl(m_context.lock());
|
||||
@ -153,22 +155,22 @@ void CLDNNRemoteBlobImpl::allocate() {
|
||||
_impl->release_lock();
|
||||
}
|
||||
|
||||
const std::shared_ptr<IAllocator>& CLDNNRemoteBlobImpl::getAllocator() const noexcept {
|
||||
const std::shared_ptr<IAllocator>& RemoteBlobImpl::getAllocator() const noexcept {
|
||||
if (!_allocator) {
|
||||
_allocator = std::shared_ptr<IAllocator>(&m_allocator, [] (IAllocator*) {});
|
||||
}
|
||||
return _allocator;
|
||||
};
|
||||
|
||||
std::string CLDNNRemoteBlobImpl::getDeviceName() const noexcept {
|
||||
std::string RemoteBlobImpl::getDeviceName() const noexcept {
|
||||
return getContextImpl(m_context.lock())->getDeviceName();
|
||||
};
|
||||
|
||||
std::shared_ptr<RemoteContext> CLDNNRemoteBlobImpl::getContext() const noexcept {
|
||||
std::shared_ptr<InferenceEngine::RemoteContext> RemoteBlobImpl::getContext() const noexcept {
|
||||
return m_context.lock();
|
||||
}
|
||||
|
||||
void CLDNNRemoteBlobImpl::lock() const {
|
||||
void RemoteBlobImpl::lock() const {
|
||||
if (!is_allocated()) {
|
||||
IE_THROW(NotAllocated) << "[GPU] Remote blob can't be locked as it's not allocated";
|
||||
}
|
||||
@ -178,36 +180,36 @@ void CLDNNRemoteBlobImpl::lock() const {
|
||||
m_allocator.regLockedBlob(_handle, this);
|
||||
}
|
||||
|
||||
void CLDNNRemoteBlobImpl::unlock() const {
|
||||
void RemoteBlobImpl::unlock() const {
|
||||
lockedHolder.reset();
|
||||
}
|
||||
|
||||
LockedMemory<void> CLDNNRemoteBlobImpl::buffer() noexcept {
|
||||
LockedMemory<void> RemoteBlobImpl::buffer() noexcept {
|
||||
lock();
|
||||
return LockedMemory<void>(reinterpret_cast<IAllocator*>(&m_allocator), _handle, 0);
|
||||
}
|
||||
|
||||
LockedMemory<const void> CLDNNRemoteBlobImpl::cbuffer() const noexcept {
|
||||
LockedMemory<const void> RemoteBlobImpl::cbuffer() const noexcept {
|
||||
lock();
|
||||
return LockedMemory<const void>(reinterpret_cast<IAllocator*>(&m_allocator), _handle, 0);
|
||||
}
|
||||
|
||||
LockedMemory<void> CLDNNRemoteBlobImpl::rwmap()noexcept {
|
||||
LockedMemory<void> RemoteBlobImpl::rwmap()noexcept {
|
||||
lock();
|
||||
return LockedMemory<void>(reinterpret_cast<IAllocator *>(&m_allocator), _handle, 0);
|
||||
}
|
||||
|
||||
LockedMemory<const void> CLDNNRemoteBlobImpl::rmap() const noexcept {
|
||||
LockedMemory<const void> RemoteBlobImpl::rmap() const noexcept {
|
||||
lock();
|
||||
return LockedMemory<const void>(reinterpret_cast<IAllocator *>(&m_allocator), _handle, 0);
|
||||
}
|
||||
|
||||
LockedMemory<void> CLDNNRemoteBlobImpl::wmap()noexcept {
|
||||
LockedMemory<void> RemoteBlobImpl::wmap()noexcept {
|
||||
lock();
|
||||
return LockedMemory<void>(reinterpret_cast<IAllocator *>(&m_allocator), _handle, 0);
|
||||
}
|
||||
|
||||
void CLDNNRemoteAllocator::regLockedBlob(void* handle, const CLDNNRemoteBlobImpl* blob) {
|
||||
void RemoteAllocator::regLockedBlob(void* handle, const RemoteBlobImpl* blob) {
|
||||
acquire_lock();
|
||||
auto iter = m_lockedBlobs.find(handle);
|
||||
if (iter == m_lockedBlobs.end()) {
|
||||
@ -216,7 +218,7 @@ void CLDNNRemoteAllocator::regLockedBlob(void* handle, const CLDNNRemoteBlobImpl
|
||||
release_lock();
|
||||
}
|
||||
|
||||
void CLDNNRemoteAllocator::unlock(void* handle) noexcept {
|
||||
void RemoteAllocator::unlock(void* handle) noexcept {
|
||||
acquire_lock();
|
||||
auto iter = m_lockedBlobs.find(handle);
|
||||
if (iter != m_lockedBlobs.end()) {
|
||||
@ -226,7 +228,7 @@ void CLDNNRemoteAllocator::unlock(void* handle) noexcept {
|
||||
release_lock();
|
||||
}
|
||||
|
||||
CLDNNExecutionContextImpl::CLDNNExecutionContextImpl(const std::shared_ptr<IInferencePlugin> plugin,
|
||||
ExecutionContextImpl::ExecutionContextImpl(const std::shared_ptr<IInferencePlugin> plugin,
|
||||
const ParamMap& params,
|
||||
const Config& config) :
|
||||
m_plugin(plugin),
|
||||
@ -278,7 +280,7 @@ CLDNNExecutionContextImpl::CLDNNExecutionContextImpl(const std::shared_ptr<IInfe
|
||||
(m_config.tuningConfig.mode == cldnn::tuning_mode::tuning_tune_and_cache) ||
|
||||
(m_config.tuningConfig.mode == cldnn::tuning_mode::tuning_retune_and_cache));
|
||||
|
||||
auto engine_params = clDNNEngine::GetEngineParams(m_config, dev, m_external_queue);
|
||||
auto engine_params = Plugin::GetParams(m_config, dev, m_external_queue);
|
||||
m_engine = cldnn::engine::create(engine_params.engine_type,
|
||||
engine_params.runtime_type, dev,
|
||||
cldnn::engine_configuration(enable_profiling,
|
||||
@ -293,7 +295,7 @@ CLDNNExecutionContextImpl::CLDNNExecutionContextImpl(const std::shared_ptr<IInfe
|
||||
engine_params.task_executor);
|
||||
}
|
||||
|
||||
ParamMap CLDNNExecutionContextImpl::getParams() const {
|
||||
ParamMap ExecutionContextImpl::getParams() const {
|
||||
ParamMap ret = { { GPU_PARAM_KEY(OCL_CONTEXT), m_engine->get_user_context() } };
|
||||
|
||||
switch (m_type) {
|
||||
@ -312,7 +314,7 @@ ParamMap CLDNNExecutionContextImpl::getParams() const {
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::string CLDNNExecutionContextImpl::getDeviceName() const noexcept {
|
||||
std::string ExecutionContextImpl::getDeviceName() const noexcept {
|
||||
auto devName = m_plugin.lock()->GetName();
|
||||
|
||||
auto engine_type = cldnn::engine_types::ocl;
|
||||
@ -334,4 +336,6 @@ std::string CLDNNExecutionContextImpl::getDeviceName() const noexcept {
|
||||
return devName;
|
||||
}
|
||||
|
||||
}; // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
@ -2,7 +2,7 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "simple_math.h"
|
||||
#include "intel_gpu/plugin/simple_math.hpp"
|
||||
#include <cctype>
|
||||
#include <string>
|
||||
#include <set>
|
||||
|
@ -12,7 +12,7 @@
|
||||
#include <cctype>
|
||||
#include <memory>
|
||||
|
||||
#include "cldnn_transformations_pipeline.h"
|
||||
#include "intel_gpu/plugin/transformations_pipeline.hpp"
|
||||
|
||||
#include "ie_metric_helpers.hpp"
|
||||
#include "ie_plugin_config.hpp"
|
||||
@ -86,7 +86,7 @@
|
||||
#include <low_precision/strided_slice.hpp>
|
||||
#include <low_precision/network_helper.hpp>
|
||||
|
||||
#include "cldnn_itt.h"
|
||||
#include "intel_gpu/plugin/itt.hpp"
|
||||
|
||||
namespace {
|
||||
template<typename T>
|
||||
@ -99,10 +99,12 @@ static bool disableReduceDecomposition(const std::shared_ptr<const ngraph::Node>
|
||||
}
|
||||
} // namespace
|
||||
|
||||
namespace CLDNNPlugin {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
namespace intel_gpu {
|
||||
|
||||
void TransformationsPipeline::apply(std::shared_ptr<ov::Function> func) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "TransformationsPipeline::apply");
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "TransformationsPipeline::apply");
|
||||
using const_node_ptr = const std::shared_ptr<const ngraph::Node>;
|
||||
|
||||
bool use_onednn = false;
|
||||
@ -336,7 +338,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Function> func) {
|
||||
}
|
||||
|
||||
if (enableInt8) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "TransformationsPipeline::apply::lpt");
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "TransformationsPipeline::apply::lpt");
|
||||
using namespace ngraph::pass::low_precision;
|
||||
|
||||
// Conversion to FP32 might be needed for quantized models that face any fp16 related issues (e.g. overflow) for non-quantized layers
|
||||
@ -421,7 +423,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Function> func) {
|
||||
}
|
||||
|
||||
{
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "TransformationsPipeline::apply::run_passes");
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "TransformationsPipeline::apply::run_passes");
|
||||
ngraph::pass::Manager manager;
|
||||
// This ConstantFolding pass is added to fold reshapes added for constant inputs on NMS internal operation which prevents upper-bound calculation
|
||||
// TODO: check why we have these reshapes
|
||||
@ -442,4 +444,6 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Function> func) {
|
||||
manager.run_passes(func);
|
||||
}
|
||||
}
|
||||
} // namespace CLDNNPlugin
|
||||
} // namespace intel_gpu
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
Loading…
Reference in New Issue
Block a user