Enable IE clang format (#6938)
* Add clang-format config files for IE and include files * Fixed IE core clang-format * Added clang-format for plugin API * Fixed style for plugin API * Fixed code style * Fixed build * Added proposed values * Fixed code style * Updated config * Fixed # for define * Fixed comment and build * Removed clang-config for include, fixed include for library headers * Set column limit to 120 and space before CtorInitializedColon=true * Fixed headers * Added support of FOREACH_CHILD * Fixed parameter print style * Fixed code style * Fixed target name * Restore comments style for public API * Fixed plugin API * Applied code style * Fixed CI
This commit is contained in:
parent
cc5dba95d4
commit
e3fa6544f2
28
inference-engine/src/inference_engine/.clang-format
Normal file
28
inference-engine/src/inference_engine/.clang-format
Normal file
@ -0,0 +1,28 @@
|
||||
BasedOnStyle: Google
|
||||
IndentWidth: 4
|
||||
UseTab: Never
|
||||
ColumnLimit: 120
|
||||
|
||||
Language: Cpp
|
||||
Standard: Cpp11
|
||||
|
||||
AccessModifierOffset: -4
|
||||
AlignConsecutiveMacros: true
|
||||
AllowAllArgumentsOnNextLine: false
|
||||
AllowAllConstructorInitializersOnNextLine: false
|
||||
AllowAllParametersOfDeclarationOnNextLine: false
|
||||
AllowShortFunctionsOnASingleLine: Empty
|
||||
AllowShortIfStatementsOnASingleLine: Never
|
||||
AllowShortLambdasOnASingleLine: Empty
|
||||
AllowShortLoopsOnASingleLine: false
|
||||
AlwaysBreakBeforeMultilineStrings: false
|
||||
BinPackArguments: false
|
||||
BinPackParameters: false
|
||||
CommentPragmas: '^#'
|
||||
DerivePointerAlignment: false
|
||||
FixNamespaceComments: true
|
||||
IndentCaseLabels: false
|
||||
IndentPPDirectives: AfterHash
|
||||
ForEachMacros:
|
||||
- foreach
|
||||
- FOREACH_CHILD
|
@ -106,7 +106,7 @@ set_ie_threading_interface_for(${TARGET_NAME}_plugin_api)
|
||||
file(GLOB_RECURSE plugin_api_src "${IE_MAIN_SOURCE_DIR}/src/plugin_api/*.hpp"
|
||||
"${IE_MAIN_SOURCE_DIR}/src/plugin_api/*.h")
|
||||
|
||||
add_cpplint_target(${TARGET_NAME}_plugin_api_cpplint FOR_SOURCES ${plugin_api_src})
|
||||
add_clang_format_target(${TARGET_NAME}_plugin_api_clang FOR_SOURCES ${plugin_api_src})
|
||||
|
||||
# Create object library
|
||||
|
||||
@ -142,7 +142,7 @@ if (TBBBIND_2_4_FOUND)
|
||||
target_link_libraries(${TARGET_NAME}_obj PRIVATE ${TBBBIND_2_4_IMPORTED_TARGETS})
|
||||
endif()
|
||||
|
||||
add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME}_obj)
|
||||
add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME}_obj)
|
||||
|
||||
# Create shared library file from object library
|
||||
|
||||
|
@ -10,9 +10,9 @@
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include "ie_plugin_config.hpp"
|
||||
#include "ie_api.h"
|
||||
#include "gpu/gpu_config.hpp"
|
||||
#include "ie_api.h"
|
||||
#include "ie_plugin_config.hpp"
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
@ -24,8 +24,8 @@ namespace CLDNNConfigParams {
|
||||
/**
|
||||
* @brief shortcut for defining configuration keys
|
||||
*/
|
||||
#define CLDNN_CONFIG_KEY(name) InferenceEngine::CLDNNConfigParams::_CONFIG_KEY(CLDNN_##name)
|
||||
#define DECLARE_CLDNN_CONFIG_KEY(name) DECLARE_CONFIG_KEY(CLDNN_##name)
|
||||
#define CLDNN_CONFIG_KEY(name) InferenceEngine::CLDNNConfigParams::_CONFIG_KEY(CLDNN_##name)
|
||||
#define DECLARE_CLDNN_CONFIG_KEY(name) DECLARE_CONFIG_KEY(CLDNN_##name)
|
||||
#define DECLARE_CLDNN_CONFIG_VALUE(name) DECLARE_CONFIG_VALUE(CLDNN_##name)
|
||||
|
||||
/**
|
||||
@ -67,9 +67,10 @@ DECLARE_CLDNN_CONFIG_KEY(SOURCES_DUMPS_DIR);
|
||||
|
||||
/**
|
||||
* @brief This key enables FP16 precision for quantized models.
|
||||
* By default the model is converted to FP32 precision before running LPT. If this key is enabled (default), then non-quantized layers
|
||||
* will be converted back to FP16 after LPT, which might imrpove the performance if a model has a lot of compute operations in
|
||||
* non-quantized path. This key has no effect if current device doesn't have INT8 optimization capabilities.
|
||||
* By default the model is converted to FP32 precision before running LPT. If this key is enabled (default), then
|
||||
* non-quantized layers will be converted back to FP16 after LPT, which might imrpove the performance if a model has a
|
||||
* lot of compute operations in non-quantized path. This key has no effect if current device doesn't have INT8
|
||||
* optimization capabilities.
|
||||
*/
|
||||
DECLARE_CLDNN_CONFIG_KEY(ENABLE_FP16_FOR_QUANTIZED_MODELS);
|
||||
|
||||
|
@ -15,12 +15,12 @@
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "ie_icnn_network.hpp"
|
||||
#include "ie_blob.h"
|
||||
#include "ie_common.h"
|
||||
#include "ie_data.h"
|
||||
#include "ie_extension.h"
|
||||
#include <ngraph/function.hpp>
|
||||
#include "ie_icnn_network.hpp"
|
||||
#include "ngraph/function.hpp"
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
@ -52,8 +52,7 @@ public:
|
||||
* @param network Pointer to the ngraph::Function object
|
||||
* @param exts Vector of pointers to IE extension objects
|
||||
*/
|
||||
explicit CNNNetwork(const std::shared_ptr<ngraph::Function>& network,
|
||||
const std::vector<IExtensionPtr>& exts = {});
|
||||
explicit CNNNetwork(const std::shared_ptr<ngraph::Function>& network, const std::vector<IExtensionPtr>& exts = {});
|
||||
|
||||
/**
|
||||
* @brief Gets the network output Data node information. The received info is stored in the given Data node.
|
||||
|
@ -10,18 +10,18 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ostream>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <ostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "ie_parameter.hpp"
|
||||
#include "ie_remote_context.hpp"
|
||||
#include "cpp/ie_cnn_network.h"
|
||||
#include "cpp/ie_infer_request.hpp"
|
||||
#include "details/ie_so_loader.h"
|
||||
#include "ie_iexecutable_network.hpp"
|
||||
#include "ie_parameter.hpp"
|
||||
#include "ie_remote_context.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
@ -36,16 +36,16 @@ class IExecutableNetworkInternal;
|
||||
* @brief This is an interface of an executable network
|
||||
*/
|
||||
class INFERENCE_ENGINE_API_CLASS(ExecutableNetwork) {
|
||||
details::SharedObjectLoader _so;
|
||||
std::shared_ptr<IExecutableNetworkInternal> _impl;
|
||||
details::SharedObjectLoader _so;
|
||||
std::shared_ptr<IExecutableNetworkInternal> _impl;
|
||||
|
||||
/**
|
||||
* @brief Constructs ExecutableNetwork from the initialized std::shared_ptr
|
||||
* @param so Plugin to use. This is required to ensure that ExecutableNetwork can work properly even if plugin object is destroyed.
|
||||
* @param so Plugin to use. This is required to ensure that ExecutableNetwork can work properly even if plugin
|
||||
* object is destroyed.
|
||||
* @param impl Initialized shared pointer
|
||||
*/
|
||||
ExecutableNetwork(const details::SharedObjectLoader& so,
|
||||
const std::shared_ptr<IExecutableNetworkInternal>& impl);
|
||||
ExecutableNetwork(const details::SharedObjectLoader& so, const std::shared_ptr<IExecutableNetworkInternal>& impl);
|
||||
friend class Core;
|
||||
friend class ov::runtime::Core;
|
||||
|
||||
|
@ -13,10 +13,10 @@
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "ie_blob.h"
|
||||
#include "cpp/ie_memory_state.hpp"
|
||||
#include "ie_iinfer_request.hpp"
|
||||
#include "details/ie_so_loader.h"
|
||||
#include "ie_blob.h"
|
||||
#include "ie_iinfer_request.hpp"
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
@ -33,16 +33,16 @@ class ICompletionCallbackWrapper;
|
||||
* It can throw exceptions safely for the application, where it is properly handled.
|
||||
*/
|
||||
class INFERENCE_ENGINE_API_CLASS(InferRequest) {
|
||||
details::SharedObjectLoader _so;
|
||||
std::shared_ptr<IInferRequestInternal> _impl;
|
||||
details::SharedObjectLoader _so;
|
||||
std::shared_ptr<IInferRequestInternal> _impl;
|
||||
|
||||
/**
|
||||
* @brief Constructs InferRequest from the initialized std::shared_ptr
|
||||
* @param so Plugin to use. This is required to ensure that InferRequest can work properly even if plugin object is destroyed.
|
||||
* @param so Plugin to use. This is required to ensure that InferRequest can work properly even if plugin object is
|
||||
* destroyed.
|
||||
* @param impl Initialized shared pointer
|
||||
*/
|
||||
InferRequest(const details::SharedObjectLoader& so,
|
||||
const std::shared_ptr<IInferRequestInternal>& impl);
|
||||
InferRequest(const details::SharedObjectLoader& so, const std::shared_ptr<IInferRequestInternal>& impl);
|
||||
friend class ExecutableNetwork;
|
||||
|
||||
public:
|
||||
@ -93,7 +93,7 @@ public:
|
||||
* @param data A reference to input. The type of Blob must correspond to the network input precision and size.
|
||||
* @param info Preprocess info for blob.
|
||||
*/
|
||||
void SetBlob(const std::string &name, const Blob::Ptr &data, const PreProcessInfo& info);
|
||||
void SetBlob(const std::string& name, const Blob::Ptr& data, const PreProcessInfo& info);
|
||||
|
||||
/**
|
||||
* @brief Gets pre-process for input data
|
||||
@ -176,9 +176,11 @@ private:
|
||||
void SetCompletionCallbackImpl(IInferRequest::CompletionCallback);
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
||||
template<typename T>
|
||||
template <typename T>
|
||||
struct SetCallback {
|
||||
void operator()(std::function<void()> f) {_this.SetCompletionCallbackImpl(std::move(f));}
|
||||
void operator()(std::function<void()> f) {
|
||||
_this.SetCompletionCallbackImpl(std::move(f));
|
||||
}
|
||||
InferRequest& _this;
|
||||
};
|
||||
|
||||
@ -188,7 +190,7 @@ public:
|
||||
*
|
||||
* @param callbackToSet callback object which will be called on when inference finish.
|
||||
*/
|
||||
template<typename F>
|
||||
template <typename F>
|
||||
void SetCompletionCallback(F callbackToSet) {
|
||||
SetCallback<F>{*this}(std::move(callbackToSet));
|
||||
}
|
||||
@ -207,7 +209,7 @@ public:
|
||||
* @return A shared pointer to IInferRequest interface
|
||||
*/
|
||||
INFERENCE_ENGINE_DEPRECATED("Will be removed")
|
||||
operator std::shared_ptr<IInferRequest> ();
|
||||
operator std::shared_ptr<IInferRequest>();
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
||||
/**
|
||||
@ -238,7 +240,7 @@ public:
|
||||
/**
|
||||
* @private
|
||||
*/
|
||||
template<>
|
||||
template <>
|
||||
struct InferRequest::SetCallback<std::function<void(InferRequest, StatusCode)>> {
|
||||
void operator()(std::function<void(InferRequest, StatusCode)> f) {
|
||||
_this.SetCompletionCallbackImpl(std::move(f));
|
||||
@ -251,7 +253,7 @@ IE_SUPPRESS_DEPRECATED_START
|
||||
/**
|
||||
* @private
|
||||
*/
|
||||
template<>
|
||||
template <>
|
||||
struct InferRequest::SetCallback<IInferRequest::CompletionCallback> {
|
||||
void operator()(IInferRequest::CompletionCallback f) {
|
||||
_this.SetCompletionCallbackImpl(std::move(f));
|
||||
|
@ -10,12 +10,12 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "details/ie_so_loader.h"
|
||||
#include "ie_api.h"
|
||||
#include "ie_blob.h"
|
||||
#include "details/ie_so_loader.h"
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
@ -25,16 +25,16 @@ class IVariableStateInternal;
|
||||
* @brief VariableState class
|
||||
*/
|
||||
class INFERENCE_ENGINE_API_CLASS(VariableState) {
|
||||
details::SharedObjectLoader _so;
|
||||
std::shared_ptr<IVariableStateInternal> _impl;
|
||||
details::SharedObjectLoader _so;
|
||||
std::shared_ptr<IVariableStateInternal> _impl;
|
||||
|
||||
/**
|
||||
* @brief Constructs VariableState from the initialized std::shared_ptr
|
||||
* @param impl Initialized shared pointer
|
||||
* @param so Optional: Plugin to use. This is required to ensure that VariableState can work properly even if plugin object is destroyed.
|
||||
* @param so Optional: Plugin to use. This is required to ensure that VariableState can work properly even if plugin
|
||||
* object is destroyed.
|
||||
*/
|
||||
VariableState(const details::SharedObjectLoader& so,
|
||||
const std::shared_ptr<IVariableStateInternal>& impl);
|
||||
VariableState(const details::SharedObjectLoader& so, const std::shared_ptr<IVariableStateInternal>& impl);
|
||||
friend class InferRequest;
|
||||
friend class ExecutableNetwork;
|
||||
|
||||
@ -52,7 +52,7 @@ public:
|
||||
|
||||
/**
|
||||
* @brief Gets name of current variable state, if length of array is not enough name is truncated by len, null
|
||||
* terminator is inserted as well. As variable state name `variable_id` from according `ReadValue` used.
|
||||
* terminator is inserted as well. As variable state name `variable_id` from according `ReadValue` used.
|
||||
* @return A string representing a state name
|
||||
*/
|
||||
std::string GetName() const;
|
||||
|
@ -4,7 +4,7 @@
|
||||
|
||||
/**
|
||||
* @brief A header file for the BlobIterator class
|
||||
*
|
||||
*
|
||||
* @file ie_blob_iterator.hpp
|
||||
*/
|
||||
|
||||
@ -31,7 +31,7 @@ public:
|
||||
* @param lk Rvalue of the memory instance to move from
|
||||
* @param offset Size of offset in memory
|
||||
*/
|
||||
explicit BlobIterator(LockedMemory<T>&& lk, size_t offset = 0): _mem(std::move(lk)), _offset(offset) {}
|
||||
explicit BlobIterator(LockedMemory<T>&& lk, size_t offset = 0) : _mem(std::move(lk)), _offset(offset) {}
|
||||
|
||||
/**
|
||||
* @brief Increments an offset of the current BlobIterator instance
|
||||
|
@ -4,7 +4,7 @@
|
||||
|
||||
/**
|
||||
* @brief The header file defines utility PreAllocator class
|
||||
*
|
||||
*
|
||||
* @file ie_pre_allocator.hpp
|
||||
*/
|
||||
#pragma once
|
||||
@ -23,7 +23,7 @@ class PreAllocator final : public IAllocator {
|
||||
size_t _sizeInBytes;
|
||||
|
||||
public:
|
||||
PreAllocator(void* ptr, size_t bytes_size): _actualData(ptr), _sizeInBytes(bytes_size) {}
|
||||
PreAllocator(void* ptr, size_t bytes_size) : _actualData(ptr), _sizeInBytes(bytes_size) {}
|
||||
/**
|
||||
* @brief Locks a handle to heap memory accessible by any memory manipulation routines
|
||||
* @return The generic pointer to a memory buffer
|
||||
|
@ -41,7 +41,7 @@ public:
|
||||
* @brief Loads a library with the name specified.
|
||||
* @param pluginName Full or relative path to the plugin library
|
||||
*/
|
||||
explicit SharedObjectLoader(const char * pluginName);
|
||||
explicit SharedObjectLoader(const char* pluginName);
|
||||
|
||||
/**
|
||||
* @brief A destructor
|
||||
|
@ -9,10 +9,10 @@
|
||||
#pragma once
|
||||
|
||||
#include <cassert>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
#include <functional>
|
||||
|
||||
#include "ie_common.h"
|
||||
#include "ie_so_loader.h"
|
||||
@ -31,7 +31,8 @@ class SOCreatorTrait {};
|
||||
* @tparam C A char type
|
||||
*/
|
||||
template <typename C>
|
||||
using enableIfSupportedChar = typename std::enable_if<(std::is_same<C, char>::value || std::is_same<C, wchar_t>::value)>::type;
|
||||
using enableIfSupportedChar =
|
||||
typename std::enable_if<(std::is_same<C, char>::value || std::is_same<C, wchar_t>::value)>::type;
|
||||
|
||||
/**
|
||||
* @brief This class instantiate object using shared library
|
||||
@ -44,8 +45,10 @@ class SOPointer {
|
||||
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
struct HasRelease {
|
||||
template <typename C> static char test(decltype(&C::Release));
|
||||
template <typename C> static long test(...);
|
||||
template <typename C>
|
||||
static char test(decltype(&C::Release));
|
||||
template <typename C>
|
||||
static long test(...);
|
||||
constexpr static const bool value = sizeof(test<T>(nullptr)) == sizeof(char);
|
||||
};
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
@ -60,10 +63,8 @@ public:
|
||||
* @brief The main constructor
|
||||
* @param name Name of a shared library file
|
||||
*/
|
||||
template <typename C,
|
||||
typename = enableIfSupportedChar<C>>
|
||||
SOPointer(const std::basic_string<C> & name)
|
||||
: _so(name.c_str()) {
|
||||
template <typename C, typename = enableIfSupportedChar<C>>
|
||||
SOPointer(const std::basic_string<C>& name) : _so(name.c_str()) {
|
||||
Load(std::integral_constant<bool, HasRelease::value>{});
|
||||
}
|
||||
|
||||
@ -78,8 +79,7 @@ public:
|
||||
* @brief Constructs an object with existing loader
|
||||
* @param so Existing pointer to a library loader
|
||||
*/
|
||||
explicit SOPointer(const SharedObjectLoader& so)
|
||||
: _so(so) {
|
||||
explicit SOPointer(const SharedObjectLoader& so) : _so(so) {
|
||||
Load(std::integral_constant<bool, HasRelease::value>{});
|
||||
}
|
||||
|
||||
@ -88,9 +88,8 @@ public:
|
||||
* @param that copied SOPointer object
|
||||
*/
|
||||
template <typename U>
|
||||
SOPointer(const SOPointer<U>& that)
|
||||
: _so(that._so),
|
||||
_ptr(std::dynamic_pointer_cast<T>(that._ptr)) {
|
||||
SOPointer(const SOPointer<U>& that) : _so(that._so),
|
||||
_ptr(std::dynamic_pointer_cast<T>(that._ptr)) {
|
||||
IE_ASSERT(_ptr != nullptr);
|
||||
}
|
||||
|
||||
@ -123,7 +122,7 @@ public:
|
||||
return _so;
|
||||
}
|
||||
|
||||
operator std::shared_ptr<T>& () noexcept {
|
||||
operator std::shared_ptr<T>&() noexcept {
|
||||
return _ptr;
|
||||
}
|
||||
|
||||
@ -136,7 +135,8 @@ protected:
|
||||
void* create = nullptr;
|
||||
try {
|
||||
create = _so.get_symbol((SOCreatorTrait<T>::name + std::string("Shared")).c_str());
|
||||
} catch (const NotFound&) {}
|
||||
} catch (const NotFound&) {
|
||||
}
|
||||
if (create == nullptr) {
|
||||
create = _so.get_symbol(SOCreatorTrait<T>::name);
|
||||
using CreateF = StatusCode(T*&, ResponseDesc*);
|
||||
@ -144,17 +144,23 @@ protected:
|
||||
ResponseDesc desc;
|
||||
StatusCode sts = reinterpret_cast<CreateF*>(create)(object, &desc);
|
||||
if (sts != OK) {
|
||||
IE_EXCEPTION_SWITCH(sts, ExceptionType,
|
||||
InferenceEngine::details::ThrowNow<ExceptionType>{} <<= std::stringstream{} << IE_LOCATION << desc.msg)
|
||||
IE_EXCEPTION_SWITCH(sts,
|
||||
ExceptionType,
|
||||
InferenceEngine::details::ThrowNow<ExceptionType>{} <<=
|
||||
std::stringstream{} << IE_LOCATION << desc.msg)
|
||||
}
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
_ptr = std::shared_ptr<T>(object, [] (T* ptr){ptr->Release();});
|
||||
_ptr = std::shared_ptr<T>(object, [](T* ptr) {
|
||||
ptr->Release();
|
||||
});
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
} else {
|
||||
using CreateF = void(std::shared_ptr<T>&);
|
||||
reinterpret_cast<CreateF*>(create)(_ptr);
|
||||
}
|
||||
} catch(...) {details::Rethrow();}
|
||||
} catch (...) {
|
||||
details::Rethrow();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -164,7 +170,9 @@ protected:
|
||||
try {
|
||||
using CreateF = void(std::shared_ptr<T>&);
|
||||
reinterpret_cast<CreateF*>(_so.get_symbol(SOCreatorTrait<T>::name))(_ptr);
|
||||
} catch(...) {details::Rethrow();}
|
||||
} catch (...) {
|
||||
details::Rethrow();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -31,36 +31,36 @@ namespace GNAConfigParams {
|
||||
*/
|
||||
#define GNA_CONFIG_VALUE(name) InferenceEngine::GNAConfigParams::GNA_##name
|
||||
|
||||
#define DECLARE_GNA_CONFIG_KEY(name) DECLARE_CONFIG_KEY(GNA_##name)
|
||||
#define DECLARE_GNA_CONFIG_KEY(name) DECLARE_CONFIG_KEY(GNA_##name)
|
||||
#define DECLARE_GNA_CONFIG_VALUE(name) DECLARE_CONFIG_VALUE(GNA_##name)
|
||||
|
||||
/**
|
||||
* @brief Scale factor that is calculated by user, in order to use static quantisation feature
|
||||
* This option should be used with floating point value serialized to string with decimal separator equals to . (dot)
|
||||
* @details For multiple input case, individual scale factors can be passed, using KEY_GNA_SCALE_FACTOR[_input_layer_name]
|
||||
* where input_layer can be obtained from from CNNNetwork::GetInputsInfo
|
||||
*/
|
||||
* @brief Scale factor that is calculated by user, in order to use static quantisation feature
|
||||
* This option should be used with floating point value serialized to string with decimal separator equals to . (dot)
|
||||
* @details For multiple input case, individual scale factors can be passed, using
|
||||
* KEY_GNA_SCALE_FACTOR[_input_layer_name] where input_layer can be obtained from from CNNNetwork::GetInputsInfo
|
||||
*/
|
||||
DECLARE_GNA_CONFIG_KEY(SCALE_FACTOR);
|
||||
|
||||
/**
|
||||
* @brief By default gna api works with Int16 weights precision, however this can be adjusted if necessary,
|
||||
* currently supported values are I16, I8
|
||||
*/
|
||||
* @brief By default gna api works with Int16 weights precision, however this can be adjusted if necessary,
|
||||
* currently supported values are I16, I8
|
||||
*/
|
||||
DECLARE_GNA_CONFIG_KEY(PRECISION);
|
||||
|
||||
/**
|
||||
* @brief if turned on, dump GNA firmware model into specified file
|
||||
*/
|
||||
* @brief if turned on, dump GNA firmware model into specified file
|
||||
*/
|
||||
DECLARE_GNA_CONFIG_KEY(FIRMWARE_MODEL_IMAGE);
|
||||
|
||||
/**
|
||||
* @brief information on GNA generation chosen for firmware model dump, can be overridden by GNA3
|
||||
*/
|
||||
* @brief information on GNA generation chosen for firmware model dump, can be overridden by GNA3
|
||||
*/
|
||||
DECLARE_GNA_CONFIG_KEY(FIRMWARE_MODEL_IMAGE_GENERATION);
|
||||
|
||||
/**
|
||||
* @brief GNA proc_type setting that should be one of GNA_AUTO, GNA_HW, GNA_SW, GNA_SW_EXACT
|
||||
*/
|
||||
* @brief GNA proc_type setting that should be one of GNA_AUTO, GNA_HW, GNA_SW, GNA_SW_EXACT
|
||||
*/
|
||||
DECLARE_GNA_CONFIG_KEY(DEVICE_MODE);
|
||||
|
||||
DECLARE_GNA_CONFIG_VALUE(AUTO);
|
||||
@ -79,62 +79,63 @@ DECLARE_GNA_CONFIG_VALUE(AVX2);
|
||||
DECLARE_GNA_CONFIG_VALUE(AVX2_EXACT);
|
||||
|
||||
/**
|
||||
* @brief The option to override the GNA HW execution target. May be one of GNA_TARGET_2_0, GNA_TARGET_3_0.
|
||||
* By default (in case of no value set) the behavior depends on GNA HW availability:
|
||||
* If GNA HW is present, use the option corresponding to this HW.
|
||||
* If HW is not present, use the option corresponding to the latest fully supported GNA HW generation.
|
||||
* A fully supported GNA HW generation means it must be supported by booth the OV GNA Plugin and the core GNA Library.
|
||||
* For the GNA Library 2.0.X.Y, the latest supported GNA HW generation corresponds to GNA_TARGET_2_0.
|
||||
* For the GNA Library 2.1.X.Y, the latest supported GNA HW generation corresponds to GNA_TARGET_3_0.
|
||||
* For the OV GNA Plugin 2021.4, the latest supported GNA HW generation corresponds to GNA_TARGET_3_0.
|
||||
*/
|
||||
* @brief The option to override the GNA HW execution target. May be one of GNA_TARGET_2_0, GNA_TARGET_3_0.
|
||||
* By default (in case of no value set) the behavior depends on GNA HW availability:
|
||||
* If GNA HW is present, use the option corresponding to this HW.
|
||||
* If HW is not present, use the option corresponding to the latest fully supported GNA HW generation.
|
||||
* A fully supported GNA HW generation means it must be supported by booth the OV GNA Plugin and the core GNA Library.
|
||||
* For the GNA Library 2.0.X.Y, the latest supported GNA HW generation corresponds to GNA_TARGET_2_0.
|
||||
* For the GNA Library 2.1.X.Y, the latest supported GNA HW generation corresponds to GNA_TARGET_3_0.
|
||||
* For the OV GNA Plugin 2021.4, the latest supported GNA HW generation corresponds to GNA_TARGET_3_0.
|
||||
*/
|
||||
DECLARE_GNA_CONFIG_KEY(EXEC_TARGET);
|
||||
|
||||
DECLARE_GNA_CONFIG_VALUE(TARGET_2_0);
|
||||
DECLARE_GNA_CONFIG_VALUE(TARGET_3_0);
|
||||
|
||||
/**
|
||||
* @brief The option to override the GNA HW compile target. May be one of GNA_TARGET_2_0, GNA_TARGET_3_0.
|
||||
* By default the same as GNA_EXEC_TARGET.
|
||||
*/
|
||||
* @brief The option to override the GNA HW compile target. May be one of GNA_TARGET_2_0, GNA_TARGET_3_0.
|
||||
* By default the same as GNA_EXEC_TARGET.
|
||||
*/
|
||||
DECLARE_GNA_CONFIG_KEY(COMPILE_TARGET);
|
||||
|
||||
/**
|
||||
* @brief if enabled produced minimum memory footprint for loaded network in GNA memory, default value is YES
|
||||
*/
|
||||
* @brief if enabled produced minimum memory footprint for loaded network in GNA memory, default value is YES
|
||||
*/
|
||||
DECLARE_GNA_CONFIG_KEY(COMPACT_MODE);
|
||||
|
||||
/**
|
||||
* @brief The option to enable/disable uniformly distributed PWL algorithm.
|
||||
* By default (in case of NO value set) the optimized algorithm called "Recursive Descent Algorithm for Finding
|
||||
* the Optimal Minimax Piecewise Linear Approximation of Convex Functions is used.
|
||||
* If value is YES then simple uniform distribution used to create PWL approximation of activation functions
|
||||
* Uniform distribution usually gives poor approximation with same number of segments
|
||||
*/
|
||||
* @brief The option to enable/disable uniformly distributed PWL algorithm.
|
||||
* By default (in case of NO value set) the optimized algorithm called "Recursive Descent Algorithm for Finding
|
||||
* the Optimal Minimax Piecewise Linear Approximation of Convex Functions is used.
|
||||
* If value is YES then simple uniform distribution used to create PWL approximation of activation functions
|
||||
* Uniform distribution usually gives poor approximation with same number of segments
|
||||
*/
|
||||
DECLARE_GNA_CONFIG_KEY(PWL_UNIFORM_DESIGN);
|
||||
|
||||
/**
|
||||
* @brief The option to allow to specify the maximum error percent that the optimized algorithm finding
|
||||
* will use to find PWL functions.
|
||||
* By default (in case of NO value set), 1.0 value is used.
|
||||
*/
|
||||
* @brief The option to allow to specify the maximum error percent that the optimized algorithm finding
|
||||
* will use to find PWL functions.
|
||||
* By default (in case of NO value set), 1.0 value is used.
|
||||
*/
|
||||
DECLARE_GNA_CONFIG_KEY(PWL_MAX_ERROR_PERCENT);
|
||||
|
||||
/**
|
||||
* @brief By default, the GNA plugin uses one worker thread for inference computations.
|
||||
* This parameter allows you to create up to 127 threads for software modes.
|
||||
*
|
||||
* Note that multithreading mode does not guarantee the same computation order as order
|
||||
* of issuing. Additionally, in this case, software modes do not implement any serializations.
|
||||
*/
|
||||
* @brief By default, the GNA plugin uses one worker thread for inference computations.
|
||||
* This parameter allows you to create up to 127 threads for software modes.
|
||||
*
|
||||
* Note that multithreading mode does not guarantee the same computation order as order
|
||||
* of issuing. Additionally, in this case, software modes do not implement any serializations.
|
||||
*/
|
||||
DECLARE_GNA_CONFIG_KEY(LIB_N_THREADS);
|
||||
} // namespace GNAConfigParams
|
||||
|
||||
namespace Metrics {
|
||||
/**
|
||||
* @brief Metric to get a std::string of GNA Library version, usually in the form <API_REVISION>.<RELEASE_LINE>.<RELEASE>.<BUILD>
|
||||
*/
|
||||
DECLARE_METRIC_KEY(GNA_LIBRARY_FULL_VERSION, std::string);
|
||||
/**
|
||||
* @brief Metric to get a std::string of GNA Library version, usually in the form
|
||||
* <API_REVISION>.<RELEASE_LINE>.<RELEASE>.<BUILD>
|
||||
*/
|
||||
DECLARE_METRIC_KEY(GNA_LIBRARY_FULL_VERSION, std::string);
|
||||
} // namespace Metrics
|
||||
|
||||
namespace PluginConfigParams {
|
||||
|
@ -11,27 +11,29 @@
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "ie_parameter.hpp"
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
namespace gpu {
|
||||
|
||||
namespace details {
|
||||
/**
|
||||
* @brief This wrapper class is used to obtain low-level handles
|
||||
* from remote blob or context object parameters.
|
||||
*/
|
||||
* @brief This wrapper class is used to obtain low-level handles
|
||||
* from remote blob or context object parameters.
|
||||
*/
|
||||
class param_map_obj_getter {
|
||||
protected:
|
||||
/**
|
||||
* @brief Template function that returns specified
|
||||
* object parameter typecasted to desired user type
|
||||
*/
|
||||
* @brief Template function that returns specified
|
||||
* object parameter typecasted to desired user type
|
||||
*/
|
||||
template <typename Result, typename Tmp>
|
||||
Result _ObjFromParams(const ParamMap& params,
|
||||
const std::string& handle_Key,
|
||||
const std::string& type_Key,
|
||||
const std::string& obj_T1,
|
||||
const std::string& obj_T2 = "__") const {
|
||||
const std::string& handle_Key,
|
||||
const std::string& type_Key,
|
||||
const std::string& obj_T1,
|
||||
const std::string& obj_T2 = "__") const {
|
||||
auto itrType = params.find(type_Key);
|
||||
if (itrType == params.end())
|
||||
IE_THROW() << "Parameter of type " << type_Key << " not found";
|
||||
@ -50,9 +52,9 @@ protected:
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Same as _ObjFromParams(), but should be used if check
|
||||
* for object type is not required
|
||||
*/
|
||||
* @brief Same as _ObjFromParams(), but should be used if check
|
||||
* for object type is not required
|
||||
*/
|
||||
template <typename Result>
|
||||
Result _ObjFromParamSimple(const ParamMap& params, const std::string& handle_Key) const {
|
||||
auto itrHandle = params.find(handle_Key);
|
||||
@ -65,11 +67,10 @@ protected:
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Template function that extracts string value
|
||||
* from map entry under specified key
|
||||
*/
|
||||
std::string _StrFromParams(const ParamMap& params,
|
||||
std::string Key) const {
|
||||
* @brief Template function that extracts string value
|
||||
* from map entry under specified key
|
||||
*/
|
||||
std::string _StrFromParams(const ParamMap& params, std::string Key) const {
|
||||
auto itrType = params.find(Key);
|
||||
if (itrType == params.end())
|
||||
IE_THROW() << "Parameter key " << Key << " not found";
|
||||
|
@ -20,7 +20,7 @@ namespace Metrics {
|
||||
* @def GPU_METRIC_KEY(name)
|
||||
* @brief shortcut for defining GPU plugin metrics
|
||||
*/
|
||||
#define GPU_METRIC_KEY(name) METRIC_KEY(GPU_##name)
|
||||
#define GPU_METRIC_KEY(name) METRIC_KEY(GPU_##name)
|
||||
#define DECLARE_GPU_METRIC_KEY(name, ...) DECLARE_METRIC_KEY(GPU_##name, __VA_ARGS__)
|
||||
|
||||
/**
|
||||
@ -30,7 +30,8 @@ namespace Metrics {
|
||||
#define DECLARE_GPU_METRIC_VALUE(name) DECLARE_METRIC_VALUE(GPU_##name)
|
||||
|
||||
/**
|
||||
* @brief Metric which defines size of memory in bytes available for the device. For iGPU it returns host memory size, for dGPU - dedicated gpu memory size
|
||||
* @brief Metric which defines size of memory in bytes available for the device. For iGPU it returns host memory size,
|
||||
* for dGPU - dedicated gpu memory size
|
||||
*/
|
||||
DECLARE_GPU_METRIC_KEY(DEVICE_TOTAL_MEM_SIZE, uint64_t);
|
||||
|
||||
@ -60,8 +61,8 @@ namespace GPUConfigParams {
|
||||
/**
|
||||
* @brief shortcut for defining configuration keys
|
||||
*/
|
||||
#define GPU_CONFIG_KEY(name) InferenceEngine::GPUConfigParams::_CONFIG_KEY(GPU_##name)
|
||||
#define DECLARE_GPU_CONFIG_KEY(name) DECLARE_CONFIG_KEY(GPU_##name)
|
||||
#define GPU_CONFIG_KEY(name) InferenceEngine::GPUConfigParams::_CONFIG_KEY(GPU_##name)
|
||||
#define DECLARE_GPU_CONFIG_KEY(name) DECLARE_CONFIG_KEY(GPU_##name)
|
||||
#define DECLARE_GPU_CONFIG_VALUE(name) DECLARE_CONFIG_VALUE(GPU_##name)
|
||||
|
||||
/**
|
||||
@ -93,10 +94,11 @@ DECLARE_GPU_CONFIG_KEY(NV12_TWO_INPUTS);
|
||||
DECLARE_GPU_CONFIG_KEY(MAX_NUM_THREADS);
|
||||
|
||||
/**
|
||||
* @brief Turning on this key enables to unroll recurrent layers such as TensorIterator or Loop with fixed iteration count.
|
||||
* This key is turned on by default. Turning this key on will achieve better inference performance for loops with not too many iteration counts (less than 16, as a rule of thumb).
|
||||
* Turning this key off will achieve better performance for both graph loading time and inference time with many iteration counts (greater than 16).
|
||||
* Note that turning this key on will increase the graph loading time in proportion to the iteration counts.
|
||||
* @brief Turning on this key enables to unroll recurrent layers such as TensorIterator or Loop with fixed iteration
|
||||
* count. This key is turned on by default. Turning this key on will achieve better inference performance for loops with
|
||||
* not too many iteration counts (less than 16, as a rule of thumb). Turning this key off will achieve better
|
||||
* performance for both graph loading time and inference time with many iteration counts (greater than 16). Note that
|
||||
* turning this key on will increase the graph loading time in proportion to the iteration counts.
|
||||
* Thus, this key should be turned off if graph loading time is considered to be most important target to optimize.*/
|
||||
DECLARE_GPU_CONFIG_KEY(ENABLE_LOOP_UNROLLING);
|
||||
|
||||
|
@ -11,13 +11,13 @@
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <d3d11.h>
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "gpu/gpu_context_api_ocl.hpp"
|
||||
|
||||
#include <d3d11.h>
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
namespace gpu {
|
||||
@ -37,12 +37,13 @@ public:
|
||||
|
||||
/**
|
||||
* @brief ID3D11Device conversion operator for the D3DContext object.
|
||||
* @return Pointer to underlying ID3D11Device interface
|
||||
* @return Pointer to underlying ID3D11Device interface
|
||||
*/
|
||||
operator ID3D11Device*() {
|
||||
return _ObjFromParams<ID3D11Device*, gpu_handle_param>(getParams(),
|
||||
GPU_PARAM_KEY(VA_DEVICE),
|
||||
GPU_PARAM_KEY(CONTEXT_TYPE), GPU_PARAM_VALUE(VA_SHARED));
|
||||
GPU_PARAM_KEY(VA_DEVICE),
|
||||
GPU_PARAM_KEY(CONTEXT_TYPE),
|
||||
GPU_PARAM_VALUE(VA_SHARED));
|
||||
}
|
||||
};
|
||||
|
||||
@ -67,12 +68,13 @@ public:
|
||||
|
||||
/**
|
||||
* @brief ID3D11Buffer conversion operator for the D3DContext object.
|
||||
* @return Pointer to underlying ID3D11Buffer interface
|
||||
* @return Pointer to underlying ID3D11Buffer interface
|
||||
*/
|
||||
operator ID3D11Buffer*() {
|
||||
return _ObjFromParams<ID3D11Buffer*, gpu_handle_param>(getParams(),
|
||||
GPU_PARAM_KEY(DEV_OBJECT_HANDLE),
|
||||
GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(DX_BUFFER));
|
||||
GPU_PARAM_KEY(DEV_OBJECT_HANDLE),
|
||||
GPU_PARAM_KEY(SHARED_MEM_TYPE),
|
||||
GPU_PARAM_VALUE(DX_BUFFER));
|
||||
}
|
||||
};
|
||||
|
||||
@ -97,12 +99,13 @@ public:
|
||||
|
||||
/**
|
||||
* @brief ID3D11Texture2D conversion operator for the D3DContext object.
|
||||
* @return Pointer to underlying ID3D11Texture2D interface
|
||||
* @return Pointer to underlying ID3D11Texture2D interface
|
||||
*/
|
||||
operator ID3D11Texture2D*() {
|
||||
return _ObjFromParams<ID3D11Texture2D*, gpu_handle_param>(getParams(),
|
||||
GPU_PARAM_KEY(DEV_OBJECT_HANDLE),
|
||||
GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(VA_SURFACE));
|
||||
GPU_PARAM_KEY(DEV_OBJECT_HANDLE),
|
||||
GPU_PARAM_KEY(SHARED_MEM_TYPE),
|
||||
GPU_PARAM_VALUE(VA_SURFACE));
|
||||
}
|
||||
|
||||
/**
|
||||
@ -111,8 +114,9 @@ public:
|
||||
*/
|
||||
uint32_t plane() {
|
||||
return _ObjFromParams<uint32_t, uint32_t>(getParams(),
|
||||
GPU_PARAM_KEY(VA_PLANE),
|
||||
GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(VA_SURFACE));
|
||||
GPU_PARAM_KEY(VA_PLANE),
|
||||
GPU_PARAM_KEY(SHARED_MEM_TYPE),
|
||||
GPU_PARAM_VALUE(VA_SURFACE));
|
||||
}
|
||||
};
|
||||
|
||||
@ -125,18 +129,19 @@ public:
|
||||
* @param nv12_surf A ID3D11Texture2D instance to create NV12 blob from
|
||||
* @return NV12 remote blob
|
||||
*/
|
||||
static inline Blob::Ptr make_shared_blob_nv12(size_t height, size_t width, RemoteContext::Ptr ctx, ID3D11Texture2D* nv12_surf) {
|
||||
static inline Blob::Ptr make_shared_blob_nv12(size_t height,
|
||||
size_t width,
|
||||
RemoteContext::Ptr ctx,
|
||||
ID3D11Texture2D* nv12_surf) {
|
||||
// despite of layout, blob dimensions always follow in N,C,H,W order
|
||||
TensorDesc desc(Precision::U8, { 1, 1, height, width }, Layout::NHWC);
|
||||
TensorDesc desc(Precision::U8, {1, 1, height, width}, Layout::NHWC);
|
||||
|
||||
ParamMap blobParams = {
|
||||
{ GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(VA_SURFACE) },
|
||||
{ GPU_PARAM_KEY(DEV_OBJECT_HANDLE), static_cast<gpu_handle_param>(nv12_surf) },
|
||||
{ GPU_PARAM_KEY(VA_PLANE), uint32_t(0) }
|
||||
};
|
||||
ParamMap blobParams = {{GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(VA_SURFACE)},
|
||||
{GPU_PARAM_KEY(DEV_OBJECT_HANDLE), static_cast<gpu_handle_param>(nv12_surf)},
|
||||
{GPU_PARAM_KEY(VA_PLANE), uint32_t(0)}};
|
||||
Blob::Ptr y_blob = std::dynamic_pointer_cast<Blob>(ctx->CreateBlob(desc, blobParams));
|
||||
|
||||
TensorDesc uvdesc(Precision::U8, { 1, 2, height / 2, width / 2 }, Layout::NHWC);
|
||||
TensorDesc uvdesc(Precision::U8, {1, 2, height / 2, width / 2}, Layout::NHWC);
|
||||
blobParams[GPU_PARAM_KEY(MEM_HANDLE)] = static_cast<gpu_handle_param>(nv12_surf);
|
||||
blobParams[GPU_PARAM_KEY(VA_PLANE)] = uint32_t(1);
|
||||
Blob::Ptr uv_blob = std::dynamic_pointer_cast<Blob>(ctx->CreateBlob(uvdesc, blobParams));
|
||||
@ -152,10 +157,12 @@ static inline Blob::Ptr make_shared_blob_nv12(size_t height, size_t width, Remot
|
||||
* @return A shared remote context instance
|
||||
*/
|
||||
static inline D3DContext::Ptr make_shared_context(Core& core, std::string deviceName, ID3D11Device* device) {
|
||||
// clang-format off
|
||||
ParamMap contextParams = {
|
||||
{ GPU_PARAM_KEY(CONTEXT_TYPE), GPU_PARAM_VALUE(VA_SHARED) },
|
||||
{ GPU_PARAM_KEY(VA_DEVICE), static_cast<gpu_handle_param>(device) }
|
||||
{GPU_PARAM_KEY(CONTEXT_TYPE), GPU_PARAM_VALUE(VA_SHARED)},
|
||||
{GPU_PARAM_KEY(VA_DEVICE), static_cast<gpu_handle_param>(device)}
|
||||
};
|
||||
// clang-format on
|
||||
return std::dynamic_pointer_cast<D3DContext>(core.CreateContext(deviceName, contextParams));
|
||||
}
|
||||
|
||||
@ -172,10 +179,8 @@ static inline Blob::Ptr make_shared_blob(const TensorDesc& desc, RemoteContext::
|
||||
IE_THROW() << "Invalid remote context passed";
|
||||
}
|
||||
|
||||
ParamMap params = {
|
||||
{ GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(DX_BUFFER) },
|
||||
{ GPU_PARAM_KEY(DEV_OBJECT_HANDLE), static_cast<gpu_handle_param>(buffer) }
|
||||
};
|
||||
ParamMap params = {{GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(DX_BUFFER)},
|
||||
{GPU_PARAM_KEY(DEV_OBJECT_HANDLE), static_cast<gpu_handle_param>(buffer)}};
|
||||
return std::dynamic_pointer_cast<D3DBufferBlob>(casted->CreateBlob(desc, params));
|
||||
}
|
||||
|
||||
@ -188,16 +193,17 @@ static inline Blob::Ptr make_shared_blob(const TensorDesc& desc, RemoteContext::
|
||||
* @return Smart pointer to created RemoteBlob object cast to base class
|
||||
* @note The underlying ID3D11Texture2D can also be a plane of output surface of DXGI video decoder
|
||||
*/
|
||||
static inline Blob::Ptr make_shared_blob(const TensorDesc& desc, RemoteContext::Ptr ctx, ID3D11Texture2D* surface, uint32_t plane = 0) {
|
||||
static inline Blob::Ptr make_shared_blob(const TensorDesc& desc,
|
||||
RemoteContext::Ptr ctx,
|
||||
ID3D11Texture2D* surface,
|
||||
uint32_t plane = 0) {
|
||||
auto casted = std::dynamic_pointer_cast<D3DContext>(ctx);
|
||||
if (nullptr == casted) {
|
||||
IE_THROW() << "Invalid remote context passed";
|
||||
}
|
||||
ParamMap params = {
|
||||
{ GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(VA_SURFACE) },
|
||||
{ GPU_PARAM_KEY(DEV_OBJECT_HANDLE), static_cast<gpu_handle_param>(surface) },
|
||||
{ GPU_PARAM_KEY(VA_PLANE), plane }
|
||||
};
|
||||
ParamMap params = {{GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(VA_SURFACE)},
|
||||
{GPU_PARAM_KEY(DEV_OBJECT_HANDLE), static_cast<gpu_handle_param>(surface)},
|
||||
{GPU_PARAM_KEY(VA_PLANE), plane}};
|
||||
return std::dynamic_pointer_cast<D3DSurface2DBlob>(casted->CreateBlob(desc, params));
|
||||
}
|
||||
|
||||
|
@ -13,13 +13,12 @@
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "ie_compound_blob.h"
|
||||
#include "ie_remote_context.hpp"
|
||||
#include "ie_core.hpp"
|
||||
|
||||
#include "gpu/gpu_params.hpp"
|
||||
#include "gpu/gpu_ocl_wrapper.hpp"
|
||||
#include "gpu/details/gpu_context_helpers.hpp"
|
||||
#include "gpu/gpu_ocl_wrapper.hpp"
|
||||
#include "gpu/gpu_params.hpp"
|
||||
#include "ie_compound_blob.h"
|
||||
#include "ie_core.hpp"
|
||||
#include "ie_remote_context.hpp"
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
@ -42,8 +41,11 @@ public:
|
||||
* @return `cl_context`
|
||||
*/
|
||||
cl_context get() {
|
||||
return _ObjFromParams<cl_context, gpu_handle_param>(getParams(), GPU_PARAM_KEY(OCL_CONTEXT),
|
||||
GPU_PARAM_KEY(CONTEXT_TYPE), GPU_PARAM_VALUE(OCL), GPU_PARAM_VALUE(VA_SHARED));
|
||||
return _ObjFromParams<cl_context, gpu_handle_param>(getParams(),
|
||||
GPU_PARAM_KEY(OCL_CONTEXT),
|
||||
GPU_PARAM_KEY(CONTEXT_TYPE),
|
||||
GPU_PARAM_VALUE(OCL),
|
||||
GPU_PARAM_VALUE(VA_SHARED));
|
||||
}
|
||||
|
||||
/**
|
||||
@ -105,8 +107,11 @@ public:
|
||||
* @return underlying OpenCL memory object handle
|
||||
*/
|
||||
cl_mem get() {
|
||||
return _ObjFromParams<cl_mem, gpu_handle_param>(getParams(), GPU_PARAM_KEY(MEM_HANDLE),
|
||||
GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(OCL_BUFFER), GPU_PARAM_VALUE(DX_BUFFER));
|
||||
return _ObjFromParams<cl_mem, gpu_handle_param>(getParams(),
|
||||
GPU_PARAM_KEY(MEM_HANDLE),
|
||||
GPU_PARAM_KEY(SHARED_MEM_TYPE),
|
||||
GPU_PARAM_VALUE(OCL_BUFFER),
|
||||
GPU_PARAM_VALUE(DX_BUFFER));
|
||||
}
|
||||
|
||||
/**
|
||||
@ -150,8 +155,11 @@ public:
|
||||
* @return `cl_mem`
|
||||
*/
|
||||
cl_mem get() {
|
||||
return _ObjFromParams<cl_mem, gpu_handle_param>(getParams(), GPU_PARAM_KEY(MEM_HANDLE),
|
||||
GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(OCL_IMAGE2D), GPU_PARAM_VALUE(VA_SURFACE));
|
||||
return _ObjFromParams<cl_mem, gpu_handle_param>(getParams(),
|
||||
GPU_PARAM_KEY(MEM_HANDLE),
|
||||
GPU_PARAM_KEY(SHARED_MEM_TYPE),
|
||||
GPU_PARAM_VALUE(OCL_IMAGE2D),
|
||||
GPU_PARAM_VALUE(VA_SURFACE));
|
||||
}
|
||||
|
||||
/**
|
||||
@ -179,7 +187,9 @@ public:
|
||||
* @param nv12_image_plane_uv cl::Image2D object containing UV plane data.
|
||||
* @return A shared remote blob instance
|
||||
*/
|
||||
static inline Blob::Ptr make_shared_blob_nv12(RemoteContext::Ptr ctx, cl::Image2D& nv12_image_plane_y, cl::Image2D& nv12_image_plane_uv) {
|
||||
static inline Blob::Ptr make_shared_blob_nv12(RemoteContext::Ptr ctx,
|
||||
cl::Image2D& nv12_image_plane_y,
|
||||
cl::Image2D& nv12_image_plane_uv) {
|
||||
auto casted = std::dynamic_pointer_cast<ClContext>(ctx);
|
||||
if (nullptr == casted) {
|
||||
IE_THROW() << "Invalid remote context passed";
|
||||
@ -189,15 +199,13 @@ static inline Blob::Ptr make_shared_blob_nv12(RemoteContext::Ptr ctx, cl::Image2
|
||||
size_t height = nv12_image_plane_y.getImageInfo<CL_IMAGE_HEIGHT>();
|
||||
|
||||
// despite of layout, blob dimensions always follow in N,C,H,W order
|
||||
TensorDesc ydesc(Precision::U8, { 1, 1, height, width }, Layout::NHWC);
|
||||
TensorDesc ydesc(Precision::U8, {1, 1, height, width}, Layout::NHWC);
|
||||
|
||||
ParamMap blobParams = {
|
||||
{ GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(OCL_IMAGE2D) },
|
||||
{ GPU_PARAM_KEY(MEM_HANDLE), static_cast<gpu_handle_param>(nv12_image_plane_y.get()) }
|
||||
};
|
||||
ParamMap blobParams = {{GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(OCL_IMAGE2D)},
|
||||
{GPU_PARAM_KEY(MEM_HANDLE), static_cast<gpu_handle_param>(nv12_image_plane_y.get())}};
|
||||
Blob::Ptr y_blob = std::dynamic_pointer_cast<Blob>(casted->CreateBlob(ydesc, blobParams));
|
||||
|
||||
TensorDesc uvdesc(Precision::U8, { 1, 2, height / 2, width / 2 }, Layout::NHWC);
|
||||
TensorDesc uvdesc(Precision::U8, {1, 2, height / 2, width / 2}, Layout::NHWC);
|
||||
blobParams[GPU_PARAM_KEY(MEM_HANDLE)] = static_cast<gpu_handle_param>(nv12_image_plane_uv.get());
|
||||
Blob::Ptr uv_blob = std::dynamic_pointer_cast<Blob>(casted->CreateBlob(uvdesc, blobParams));
|
||||
|
||||
@ -213,10 +221,8 @@ static inline Blob::Ptr make_shared_blob_nv12(RemoteContext::Ptr ctx, cl::Image2
|
||||
* @return A shared remote context instance
|
||||
*/
|
||||
static inline RemoteContext::Ptr make_shared_context(Core& core, std::string deviceName, cl_context ctx) {
|
||||
ParamMap contextParams = {
|
||||
{ GPU_PARAM_KEY(CONTEXT_TYPE), GPU_PARAM_VALUE(OCL) },
|
||||
{ GPU_PARAM_KEY(OCL_CONTEXT), static_cast<gpu_handle_param>(ctx) }
|
||||
};
|
||||
ParamMap contextParams = {{GPU_PARAM_KEY(CONTEXT_TYPE), GPU_PARAM_VALUE(OCL)},
|
||||
{GPU_PARAM_KEY(OCL_CONTEXT), static_cast<gpu_handle_param>(ctx)}};
|
||||
return core.CreateContext(deviceName, contextParams);
|
||||
}
|
||||
|
||||
@ -243,10 +249,8 @@ static inline Blob::Ptr make_shared_blob(const TensorDesc& desc, RemoteContext::
|
||||
IE_THROW() << "Invalid remote context passed";
|
||||
}
|
||||
|
||||
ParamMap params = {
|
||||
{ GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(OCL_BUFFER) },
|
||||
{ GPU_PARAM_KEY(MEM_HANDLE), static_cast<gpu_handle_param>(buffer.get()) }
|
||||
};
|
||||
ParamMap params = {{GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(OCL_BUFFER)},
|
||||
{GPU_PARAM_KEY(MEM_HANDLE), static_cast<gpu_handle_param>(buffer.get())}};
|
||||
return std::dynamic_pointer_cast<Blob>(casted->CreateBlob(desc, params));
|
||||
}
|
||||
|
||||
@ -263,10 +267,8 @@ static inline Blob::Ptr make_shared_blob(const TensorDesc& desc, RemoteContext::
|
||||
IE_THROW() << "Invalid remote context passed";
|
||||
}
|
||||
|
||||
ParamMap params = {
|
||||
{ GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(OCL_BUFFER) },
|
||||
{ GPU_PARAM_KEY(MEM_HANDLE), static_cast<gpu_handle_param>(buffer) }
|
||||
};
|
||||
ParamMap params = {{GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(OCL_BUFFER)},
|
||||
{GPU_PARAM_KEY(MEM_HANDLE), static_cast<gpu_handle_param>(buffer)}};
|
||||
return std::dynamic_pointer_cast<Blob>(casted->CreateBlob(desc, params));
|
||||
}
|
||||
|
||||
@ -283,10 +285,8 @@ static inline Blob::Ptr make_shared_blob(const TensorDesc& desc, RemoteContext::
|
||||
IE_THROW() << "Invalid remote context passed";
|
||||
}
|
||||
|
||||
ParamMap params = {
|
||||
{ GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(OCL_IMAGE2D) },
|
||||
{ GPU_PARAM_KEY(MEM_HANDLE), static_cast<gpu_handle_param>(image.get()) }
|
||||
};
|
||||
ParamMap params = {{GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(OCL_IMAGE2D)},
|
||||
{GPU_PARAM_KEY(MEM_HANDLE), static_cast<gpu_handle_param>(image.get())}};
|
||||
return std::dynamic_pointer_cast<Blob>(casted->CreateBlob(desc, params));
|
||||
}
|
||||
|
||||
|
@ -16,7 +16,9 @@
|
||||
|
||||
#include "gpu/gpu_context_api_ocl.hpp"
|
||||
|
||||
// clang-format off
|
||||
#include <va/va.h>
|
||||
// clang-format on
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
@ -41,8 +43,9 @@ public:
|
||||
*/
|
||||
operator VADisplay() {
|
||||
return _ObjFromParams<VADisplay, gpu_handle_param>(getParams(),
|
||||
GPU_PARAM_KEY(VA_DEVICE),
|
||||
GPU_PARAM_KEY(CONTEXT_TYPE), GPU_PARAM_VALUE(VA_SHARED));
|
||||
GPU_PARAM_KEY(VA_DEVICE),
|
||||
GPU_PARAM_KEY(CONTEXT_TYPE),
|
||||
GPU_PARAM_VALUE(VA_SHARED));
|
||||
}
|
||||
};
|
||||
|
||||
@ -71,8 +74,9 @@ public:
|
||||
*/
|
||||
operator VASurfaceID() {
|
||||
return _ObjFromParams<VASurfaceID, uint32_t>(getParams(),
|
||||
GPU_PARAM_KEY(DEV_OBJECT_HANDLE),
|
||||
GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(VA_SURFACE));
|
||||
GPU_PARAM_KEY(DEV_OBJECT_HANDLE),
|
||||
GPU_PARAM_KEY(SHARED_MEM_TYPE),
|
||||
GPU_PARAM_VALUE(VA_SURFACE));
|
||||
}
|
||||
|
||||
/**
|
||||
@ -81,8 +85,9 @@ public:
|
||||
*/
|
||||
uint32_t plane() {
|
||||
return _ObjFromParams<uint32_t, uint32_t>(getParams(),
|
||||
GPU_PARAM_KEY(VA_PLANE),
|
||||
GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(VA_SURFACE));
|
||||
GPU_PARAM_KEY(VA_PLANE),
|
||||
GPU_PARAM_KEY(SHARED_MEM_TYPE),
|
||||
GPU_PARAM_VALUE(VA_SURFACE));
|
||||
}
|
||||
};
|
||||
|
||||
@ -95,17 +100,18 @@ public:
|
||||
* @param nv12_surf NV12 `VASurfaceID` to create NV12 from
|
||||
* @return A remote NV12 blob wrapping `VASurfaceID`
|
||||
*/
|
||||
static inline Blob::Ptr make_shared_blob_nv12(size_t height, size_t width, RemoteContext::Ptr ctx, VASurfaceID nv12_surf) {
|
||||
static inline Blob::Ptr make_shared_blob_nv12(size_t height,
|
||||
size_t width,
|
||||
RemoteContext::Ptr ctx,
|
||||
VASurfaceID nv12_surf) {
|
||||
// despite of layout, blob dimensions always follow in N, C, H, W order
|
||||
TensorDesc ydesc(Precision::U8, { 1, 1, height, width }, Layout::NHWC);
|
||||
ParamMap blobParams = {
|
||||
{ GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(VA_SURFACE) },
|
||||
{ GPU_PARAM_KEY(DEV_OBJECT_HANDLE), nv12_surf },
|
||||
{ GPU_PARAM_KEY(VA_PLANE), uint32_t(0) }
|
||||
};
|
||||
TensorDesc ydesc(Precision::U8, {1, 1, height, width}, Layout::NHWC);
|
||||
ParamMap blobParams = {{GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(VA_SURFACE)},
|
||||
{GPU_PARAM_KEY(DEV_OBJECT_HANDLE), nv12_surf},
|
||||
{GPU_PARAM_KEY(VA_PLANE), uint32_t(0)}};
|
||||
Blob::Ptr y_blob = std::dynamic_pointer_cast<Blob>(ctx->CreateBlob(ydesc, blobParams));
|
||||
|
||||
TensorDesc uvdesc(Precision::U8, { 1, 2, height / 2, width / 2 }, Layout::NHWC);
|
||||
TensorDesc uvdesc(Precision::U8, {1, 2, height / 2, width / 2}, Layout::NHWC);
|
||||
blobParams[GPU_PARAM_KEY(VA_PLANE)] = uint32_t(1);
|
||||
Blob::Ptr uv_blob = std::dynamic_pointer_cast<Blob>(ctx->CreateBlob(uvdesc, blobParams));
|
||||
|
||||
@ -120,10 +126,8 @@ static inline Blob::Ptr make_shared_blob_nv12(size_t height, size_t width, Remot
|
||||
* @return A remote context wrapping `VADisplay`
|
||||
*/
|
||||
static inline VAContext::Ptr make_shared_context(Core& core, std::string deviceName, VADisplay device) {
|
||||
ParamMap contextParams = {
|
||||
{ GPU_PARAM_KEY(CONTEXT_TYPE), GPU_PARAM_VALUE(VA_SHARED) },
|
||||
{ GPU_PARAM_KEY(VA_DEVICE), static_cast<gpu_handle_param>(device) }
|
||||
};
|
||||
ParamMap contextParams = {{GPU_PARAM_KEY(CONTEXT_TYPE), GPU_PARAM_VALUE(VA_SHARED)},
|
||||
{GPU_PARAM_KEY(VA_DEVICE), static_cast<gpu_handle_param>(device)}};
|
||||
return std::dynamic_pointer_cast<VAContext>(core.CreateContext(deviceName, contextParams));
|
||||
}
|
||||
|
||||
@ -135,16 +139,17 @@ static inline VAContext::Ptr make_shared_context(Core& core, std::string deviceN
|
||||
* @param plane An index of a plane inside `VASurfaceID` to create blob from
|
||||
* @return A remote blob wrapping `VASurfaceID`
|
||||
*/
|
||||
static inline VASurfaceBlob::Ptr make_shared_blob(const TensorDesc& desc, RemoteContext::Ptr ctx, VASurfaceID surface, uint32_t plane = 0) {
|
||||
static inline VASurfaceBlob::Ptr make_shared_blob(const TensorDesc& desc,
|
||||
RemoteContext::Ptr ctx,
|
||||
VASurfaceID surface,
|
||||
uint32_t plane = 0) {
|
||||
auto casted = std::dynamic_pointer_cast<VAContext>(ctx);
|
||||
if (nullptr == casted) {
|
||||
IE_THROW() << "Invalid remote context passed";
|
||||
}
|
||||
ParamMap params = {
|
||||
{ GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(VA_SURFACE) },
|
||||
{ GPU_PARAM_KEY(DEV_OBJECT_HANDLE), surface },
|
||||
{ GPU_PARAM_KEY(VA_PLANE), plane }
|
||||
};
|
||||
ParamMap params = {{GPU_PARAM_KEY(SHARED_MEM_TYPE), GPU_PARAM_VALUE(VA_SURFACE)},
|
||||
{GPU_PARAM_KEY(DEV_OBJECT_HANDLE), surface},
|
||||
{GPU_PARAM_KEY(VA_PLANE), plane}};
|
||||
return std::dynamic_pointer_cast<VASurfaceBlob>(casted->CreateBlob(desc, params));
|
||||
}
|
||||
|
||||
|
@ -15,32 +15,32 @@
|
||||
*/
|
||||
|
||||
#ifndef CL_HPP_ENABLE_EXCEPTIONS
|
||||
# define CL_HPP_ENABLE_EXCEPTIONS
|
||||
# define CL_HPP_ENABLE_EXCEPTIONS
|
||||
#endif
|
||||
|
||||
#ifdef CL_HPP_MINIMUM_OPENCL_VERSION
|
||||
# if CL_HPP_MINIMUM_OPENCL_VERSION < 120
|
||||
# error "CL_HPP_MINIMUM_OPENCL_VERSION must be >= 120"
|
||||
# endif
|
||||
# if CL_HPP_MINIMUM_OPENCL_VERSION < 120
|
||||
# error "CL_HPP_MINIMUM_OPENCL_VERSION must be >= 120"
|
||||
# endif
|
||||
#else
|
||||
# define CL_HPP_MINIMUM_OPENCL_VERSION 120
|
||||
# define CL_HPP_MINIMUM_OPENCL_VERSION 120
|
||||
#endif
|
||||
|
||||
#ifdef CL_HPP_TARGET_OPENCL_VERSION
|
||||
# if CL_HPP_TARGET_OPENCL_VERSION < 120
|
||||
# error "CL_HPP_TARGET_OPENCL_VERSION must be >= 120"
|
||||
# endif
|
||||
# if CL_HPP_TARGET_OPENCL_VERSION < 120
|
||||
# error "CL_HPP_TARGET_OPENCL_VERSION must be >= 120"
|
||||
# endif
|
||||
#else
|
||||
# define CL_HPP_TARGET_OPENCL_VERSION 120
|
||||
# define CL_HPP_TARGET_OPENCL_VERSION 120
|
||||
#endif
|
||||
|
||||
#ifdef __GNUC__
|
||||
# pragma GCC diagnostic push
|
||||
# pragma GCC system_header
|
||||
# pragma GCC diagnostic push
|
||||
# pragma GCC system_header
|
||||
#endif
|
||||
|
||||
#include <CL/cl2.hpp>
|
||||
|
||||
#ifdef __GNUC__
|
||||
# pragma GCC diagnostic pop
|
||||
# pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
@ -41,8 +41,7 @@ namespace GPUContextParams {
|
||||
* @def DECLARE_GPU_PARAM_KEY(name, ...)
|
||||
* @brief Shortcut for defining object parameter keys
|
||||
*/
|
||||
#define DECLARE_GPU_PARAM_KEY(name, ...) \
|
||||
static constexpr auto PARAM_##name = #name
|
||||
#define DECLARE_GPU_PARAM_KEY(name, ...) static constexpr auto PARAM_##name = #name
|
||||
/**
|
||||
* @brief Shared device context type: can be either pure OpenCL (OCL)
|
||||
* or shared video decoder (VA_SHARED) context
|
||||
|
@ -24,7 +24,7 @@ namespace HeteroConfigParams {
|
||||
* @def HETERO_CONFIG_KEY(name)
|
||||
* @brief Shortcut for defining HETERO configuration keys
|
||||
*/
|
||||
#define HETERO_CONFIG_KEY(name) InferenceEngine::HeteroConfigParams::_CONFIG_KEY(HETERO_##name)
|
||||
#define HETERO_CONFIG_KEY(name) InferenceEngine::HeteroConfigParams::_CONFIG_KEY(HETERO_##name)
|
||||
#define DECLARE_HETERO_CONFIG_KEY(name) DECLARE_CONFIG_KEY(HETERO_##name)
|
||||
|
||||
/**
|
||||
|
@ -9,9 +9,10 @@
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include "ie_api.h"
|
||||
#include <memory>
|
||||
|
||||
#include "ie_api.h"
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
/**
|
||||
@ -19,7 +20,7 @@ namespace InferenceEngine {
|
||||
*/
|
||||
enum LockOp {
|
||||
LOCK_FOR_READ = 0, //!< A flag to lock data for read
|
||||
LOCK_FOR_WRITE //!< A flag to lock data for write
|
||||
LOCK_FOR_WRITE //!< A flag to lock data for write
|
||||
};
|
||||
|
||||
/**
|
||||
@ -60,7 +61,7 @@ public:
|
||||
virtual bool free(void* handle) noexcept = 0;
|
||||
|
||||
protected:
|
||||
~IAllocator() = default;
|
||||
~IAllocator() = default;
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -10,101 +10,101 @@
|
||||
#pragma once
|
||||
|
||||
#if defined(USE_STATIC_IE) || (defined(__GNUC__) && (__GNUC__ < 4))
|
||||
# define INFERENCE_ENGINE_API(...) extern "C" __VA_ARGS__
|
||||
# define INFERENCE_ENGINE_API_CPP(...) __VA_ARGS__
|
||||
# define INFERENCE_ENGINE_API_CLASS(...) __VA_ARGS__
|
||||
# define INFERENCE_ENGINE_CDECL __attribute__((cdecl))
|
||||
# define INFERENCE_ENGINE_API(...) extern "C" __VA_ARGS__
|
||||
# define INFERENCE_ENGINE_API_CPP(...) __VA_ARGS__
|
||||
# define INFERENCE_ENGINE_API_CLASS(...) __VA_ARGS__
|
||||
# define INFERENCE_ENGINE_CDECL __attribute__((cdecl))
|
||||
#else
|
||||
# if defined(_WIN32)
|
||||
# define INFERENCE_ENGINE_CDECL
|
||||
# ifdef IMPLEMENT_INFERENCE_ENGINE_API
|
||||
# define INFERENCE_ENGINE_API(...) extern "C" __declspec(dllexport) __VA_ARGS__ __cdecl
|
||||
# define INFERENCE_ENGINE_API_CPP(...) __declspec(dllexport) __VA_ARGS__ __cdecl
|
||||
# define INFERENCE_ENGINE_API_CLASS(...) __declspec(dllexport) __VA_ARGS__
|
||||
# else
|
||||
# define INFERENCE_ENGINE_API(...) extern "C" __declspec(dllimport) __VA_ARGS__ __cdecl
|
||||
# define INFERENCE_ENGINE_API_CPP(...) __declspec(dllimport) __VA_ARGS__ __cdecl
|
||||
# define INFERENCE_ENGINE_API_CLASS(...) __declspec(dllimport) __VA_ARGS__
|
||||
# endif
|
||||
# else
|
||||
# define INFERENCE_ENGINE_CDECL __attribute__((cdecl))
|
||||
# define INFERENCE_ENGINE_API(...) extern "C" __attribute__((visibility("default"))) __VA_ARGS__
|
||||
# define INFERENCE_ENGINE_API_CPP(...) __attribute__((visibility("default"))) __VA_ARGS__
|
||||
# define INFERENCE_ENGINE_API_CLASS(...) __attribute__((visibility("default"))) __VA_ARGS__
|
||||
# endif
|
||||
# if defined(_WIN32)
|
||||
# define INFERENCE_ENGINE_CDECL
|
||||
# ifdef IMPLEMENT_INFERENCE_ENGINE_API
|
||||
# define INFERENCE_ENGINE_API(...) extern "C" __declspec(dllexport) __VA_ARGS__ __cdecl
|
||||
# define INFERENCE_ENGINE_API_CPP(...) __declspec(dllexport) __VA_ARGS__ __cdecl
|
||||
# define INFERENCE_ENGINE_API_CLASS(...) __declspec(dllexport) __VA_ARGS__
|
||||
# else
|
||||
# define INFERENCE_ENGINE_API(...) extern "C" __declspec(dllimport) __VA_ARGS__ __cdecl
|
||||
# define INFERENCE_ENGINE_API_CPP(...) __declspec(dllimport) __VA_ARGS__ __cdecl
|
||||
# define INFERENCE_ENGINE_API_CLASS(...) __declspec(dllimport) __VA_ARGS__
|
||||
# endif
|
||||
# else
|
||||
# define INFERENCE_ENGINE_CDECL __attribute__((cdecl))
|
||||
# define INFERENCE_ENGINE_API(...) extern "C" __attribute__((visibility("default"))) __VA_ARGS__
|
||||
# define INFERENCE_ENGINE_API_CPP(...) __attribute__((visibility("default"))) __VA_ARGS__
|
||||
# define INFERENCE_ENGINE_API_CLASS(...) __attribute__((visibility("default"))) __VA_ARGS__
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined(_WIN32)
|
||||
# define INFERENCE_ENGINE_DEPRECATED(msg) __declspec(deprecated(msg))
|
||||
# define INFERENCE_ENGINE_DEPRECATED(msg) __declspec(deprecated(msg))
|
||||
#elif defined __INTEL_COMPILER
|
||||
# define INFERENCE_ENGINE_DEPRECATED(msg) __attribute__((deprecated(msg)))
|
||||
# define INFERENCE_ENGINE_DEPRECATED(msg) __attribute__((deprecated(msg)))
|
||||
#elif defined(__GNUC__)
|
||||
# define INFERENCE_ENGINE_DEPRECATED(msg) __attribute__((deprecated((msg))))
|
||||
# define INFERENCE_ENGINE_DEPRECATED(msg) __attribute__((deprecated((msg))))
|
||||
#else
|
||||
# define INFERENCE_ENGINE_DEPRECATED(msg)
|
||||
# define INFERENCE_ENGINE_DEPRECATED(msg)
|
||||
#endif
|
||||
|
||||
#if defined IMPLEMENT_INFERENCE_ENGINE_API || defined IMPLEMENT_INFERENCE_ENGINE_PLUGIN
|
||||
# define INFERENCE_ENGINE_INTERNAL(msg)
|
||||
# define INFERENCE_ENGINE_INTERNAL(msg)
|
||||
#else
|
||||
# define INFERENCE_ENGINE_INTERNAL(msg) INFERENCE_ENGINE_DEPRECATED(msg)
|
||||
# define INFERENCE_ENGINE_INTERNAL(msg) INFERENCE_ENGINE_DEPRECATED(msg)
|
||||
#endif
|
||||
|
||||
// Suppress warning "-Wdeprecated-declarations" / C4996
|
||||
#if defined(_MSC_VER)
|
||||
# define IE_DO_PRAGMA(x) __pragma(x)
|
||||
# define IE_DO_PRAGMA(x) __pragma(x)
|
||||
#elif defined(__GNUC__)
|
||||
# define IE_DO_PRAGMA(x) _Pragma(#x)
|
||||
# define IE_DO_PRAGMA(x) _Pragma(# x)
|
||||
#else
|
||||
# define IE_DO_PRAGMA(x)
|
||||
# define IE_DO_PRAGMA(x)
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
# define IE_SUPPRESS_DEPRECATED_START \
|
||||
IE_DO_PRAGMA(warning(push)) \
|
||||
IE_DO_PRAGMA(warning(disable : 4996))
|
||||
# define IE_SUPPRESS_DEPRECATED_END IE_DO_PRAGMA(warning(pop))
|
||||
# define IE_SUPPRESS_DEPRECATED_START \
|
||||
IE_DO_PRAGMA(warning(push)) \
|
||||
IE_DO_PRAGMA(warning(disable : 4996))
|
||||
# define IE_SUPPRESS_DEPRECATED_END IE_DO_PRAGMA(warning(pop))
|
||||
#elif defined(__INTEL_COMPILER)
|
||||
# define IE_SUPPRESS_DEPRECATED_START \
|
||||
IE_DO_PRAGMA(warning(push)) \
|
||||
IE_DO_PRAGMA(warning(disable : 1478))
|
||||
IE_DO_PRAGMA(warning(disable : 1786))
|
||||
# define IE_SUPPRESS_DEPRECATED_END IE_DO_PRAGMA(warning(pop))
|
||||
# define IE_SUPPRESS_DEPRECATED_START \
|
||||
IE_DO_PRAGMA(warning(push)) \
|
||||
IE_DO_PRAGMA(warning(disable : 1478))
|
||||
IE_DO_PRAGMA(warning(disable : 1786))
|
||||
# define IE_SUPPRESS_DEPRECATED_END IE_DO_PRAGMA(warning(pop))
|
||||
#elif defined(__clang__) || ((__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ > 405))
|
||||
# define IE_SUPPRESS_DEPRECATED_START \
|
||||
IE_DO_PRAGMA(GCC diagnostic push) \
|
||||
IE_DO_PRAGMA(GCC diagnostic ignored "-Wdeprecated-declarations")
|
||||
# define IE_SUPPRESS_DEPRECATED_END IE_DO_PRAGMA(GCC diagnostic pop)
|
||||
# define IE_SUPPRESS_DEPRECATED_START \
|
||||
IE_DO_PRAGMA(GCC diagnostic push) \
|
||||
IE_DO_PRAGMA(GCC diagnostic ignored "-Wdeprecated-declarations")
|
||||
# define IE_SUPPRESS_DEPRECATED_END IE_DO_PRAGMA(GCC diagnostic pop)
|
||||
#else
|
||||
# define IE_SUPPRESS_DEPRECATED_START
|
||||
# define IE_SUPPRESS_DEPRECATED_END
|
||||
# define IE_SUPPRESS_DEPRECATED_START
|
||||
# define IE_SUPPRESS_DEPRECATED_END
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
# define _IE_SUPPRESS_DEPRECATED_START_MSVC IE_SUPPRESS_DEPRECATED_START
|
||||
# define _IE_SUPPRESS_DEPRECATED_END_MSVC IE_SUPPRESS_DEPRECATED_END
|
||||
# define _IE_SUPPRESS_DEPRECATED_START_MSVC IE_SUPPRESS_DEPRECATED_START
|
||||
# define _IE_SUPPRESS_DEPRECATED_END_MSVC IE_SUPPRESS_DEPRECATED_END
|
||||
#else
|
||||
# define _IE_SUPPRESS_DEPRECATED_START_MSVC
|
||||
# define _IE_SUPPRESS_DEPRECATED_END_MSVC
|
||||
# define _IE_SUPPRESS_DEPRECATED_START_MSVC
|
||||
# define _IE_SUPPRESS_DEPRECATED_END_MSVC
|
||||
#endif
|
||||
|
||||
#if defined __GNUC__ && (__GNUC__ <= 4 || (__GNUC__ == 5 && __GNUC_MINOR__ <= 5) || \
|
||||
(defined __i386__ || defined __arm__ || defined __aarch64__))
|
||||
# define _IE_SUPPRESS_DEPRECATED_START_GCC IE_SUPPRESS_DEPRECATED_START
|
||||
# define _IE_SUPPRESS_DEPRECATED_END_GCC IE_SUPPRESS_DEPRECATED_END
|
||||
# define _IE_SUPPRESS_DEPRECATED_START_GCC IE_SUPPRESS_DEPRECATED_START
|
||||
# define _IE_SUPPRESS_DEPRECATED_END_GCC IE_SUPPRESS_DEPRECATED_END
|
||||
#else
|
||||
# define _IE_SUPPRESS_DEPRECATED_START_GCC
|
||||
# define _IE_SUPPRESS_DEPRECATED_END_GCC
|
||||
# define _IE_SUPPRESS_DEPRECATED_START_GCC
|
||||
# define _IE_SUPPRESS_DEPRECATED_END_GCC
|
||||
#endif
|
||||
|
||||
#ifndef ENABLE_UNICODE_PATH_SUPPORT
|
||||
# ifdef _WIN32
|
||||
# if defined __INTEL_COMPILER || defined _MSC_VER
|
||||
# define ENABLE_UNICODE_PATH_SUPPORT
|
||||
# endif
|
||||
# elif defined(__GNUC__) && (__GNUC__ > 5 || (__GNUC__ == 5 && __GNUC_MINOR__ > 2)) || defined(__clang__)
|
||||
# define ENABLE_UNICODE_PATH_SUPPORT
|
||||
# endif
|
||||
# ifdef _WIN32
|
||||
# if defined __INTEL_COMPILER || defined _MSC_VER
|
||||
# define ENABLE_UNICODE_PATH_SUPPORT
|
||||
# endif
|
||||
# elif defined(__GNUC__) && (__GNUC__ > 5 || (__GNUC__ == 5 && __GNUC_MINOR__ > 2)) || defined(__clang__)
|
||||
# define ENABLE_UNICODE_PATH_SUPPORT
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/**
|
||||
@ -114,17 +114,17 @@
|
||||
*/
|
||||
|
||||
#if defined(_WIN32)
|
||||
# ifdef IMPLEMENT_INFERENCE_ENGINE_PLUGIN
|
||||
# define INFERENCE_PLUGIN_API(type) extern "C" __declspec(dllexport) type
|
||||
# else
|
||||
# define INFERENCE_PLUGIN_API(type) extern "C" type
|
||||
# endif
|
||||
# ifdef IMPLEMENT_INFERENCE_ENGINE_PLUGIN
|
||||
# define INFERENCE_PLUGIN_API(type) extern "C" __declspec(dllexport) type
|
||||
# else
|
||||
# define INFERENCE_PLUGIN_API(type) extern "C" type
|
||||
# endif
|
||||
#elif (__GNUC__ >= 4) // NOLINT
|
||||
# ifdef IMPLEMENT_INFERENCE_ENGINE_PLUGIN
|
||||
# define INFERENCE_PLUGIN_API(type) extern "C" __attribute__((visibility("default"))) type
|
||||
# else
|
||||
# define INFERENCE_PLUGIN_API(type) extern "C" type
|
||||
# endif
|
||||
# ifdef IMPLEMENT_INFERENCE_ENGINE_PLUGIN
|
||||
# define INFERENCE_PLUGIN_API(type) extern "C" __attribute__((visibility("default"))) type
|
||||
# else
|
||||
# define INFERENCE_PLUGIN_API(type) extern "C" type
|
||||
# endif
|
||||
#else
|
||||
# define INFERENCE_PLUGIN_API(TYPE) extern "C" TYPE
|
||||
# define INFERENCE_PLUGIN_API(TYPE) extern "C" TYPE
|
||||
#endif
|
||||
|
@ -19,13 +19,13 @@
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "details/ie_blob_iterator.hpp"
|
||||
#include "details/ie_pre_allocator.hpp"
|
||||
#include "ie_allocator.hpp"
|
||||
#include "ie_common.h"
|
||||
#include "ie_layouts.h"
|
||||
#include "ie_locked_memory.hpp"
|
||||
#include "ie_precision.hpp"
|
||||
#include "details/ie_blob_iterator.hpp"
|
||||
#include "details/ie_pre_allocator.hpp"
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
@ -120,7 +120,7 @@ public:
|
||||
*
|
||||
* @param tensorDesc Defines the layout and dims of the blob
|
||||
*/
|
||||
explicit Blob(const TensorDesc& tensorDesc): tensorDesc(tensorDesc) {}
|
||||
explicit Blob(const TensorDesc& tensorDesc) : tensorDesc(tensorDesc) {}
|
||||
|
||||
/**
|
||||
* @brief Returns the tensor description
|
||||
@ -146,7 +146,8 @@ public:
|
||||
* @return The total number of elements
|
||||
*/
|
||||
virtual size_t size() const noexcept {
|
||||
if (tensorDesc.getLayout() == Layout::SCALAR) return 1;
|
||||
if (tensorDesc.getLayout() == Layout::SCALAR)
|
||||
return 1;
|
||||
return product(tensorDesc.getDims());
|
||||
}
|
||||
|
||||
@ -233,7 +234,8 @@ protected:
|
||||
* @return Result of multiplication
|
||||
*/
|
||||
static size_t product(const SizeVector& dims) noexcept {
|
||||
if (dims.empty()) return 0;
|
||||
if (dims.empty())
|
||||
return 0;
|
||||
return std::accumulate(std::begin(dims), std::end(dims), (size_t)1, std::multiplies<size_t>());
|
||||
}
|
||||
|
||||
@ -278,7 +280,7 @@ std::shared_ptr<const T> as(const Blob::CPtr& blob) noexcept {
|
||||
* @note Any Blob implementation that represents a concept of a tensor in memory (for example,
|
||||
* TBlob) must be a subclass of MemoryBlob instead of Blob
|
||||
*/
|
||||
class INFERENCE_ENGINE_API_CLASS(MemoryBlob): public Blob {
|
||||
class INFERENCE_ENGINE_API_CLASS(MemoryBlob) : public Blob {
|
||||
public:
|
||||
/**
|
||||
* @brief A smart pointer to the MemoryBlob object
|
||||
@ -300,7 +302,7 @@ public:
|
||||
*
|
||||
* @param tensorDesc Defines the layout and dims of the blob
|
||||
*/
|
||||
explicit MemoryBlob(const TensorDesc& tensorDesc): Blob(tensorDesc) {}
|
||||
explicit MemoryBlob(const TensorDesc& tensorDesc) : Blob(tensorDesc) {}
|
||||
|
||||
/**
|
||||
* @brief Returns the tensor description
|
||||
@ -323,7 +325,8 @@ public:
|
||||
* @return The total number of elements
|
||||
*/
|
||||
size_t size() const noexcept override {
|
||||
if (tensorDesc.getLayout() == Layout::SCALAR) return 1;
|
||||
if (tensorDesc.getLayout() == Layout::SCALAR)
|
||||
return 1;
|
||||
return product(tensorDesc.getDims());
|
||||
}
|
||||
|
||||
@ -493,7 +496,7 @@ public:
|
||||
*
|
||||
* @param tensorDesc Tensor description
|
||||
*/
|
||||
explicit TBlob(const TensorDesc& tensorDesc): MemoryBlob(tensorDesc) {}
|
||||
explicit TBlob(const TensorDesc& tensorDesc) : MemoryBlob(tensorDesc) {}
|
||||
|
||||
/**
|
||||
* @brief The constructor creates a TBlob object with the specified dimensions and layout
|
||||
@ -506,7 +509,7 @@ public:
|
||||
* @param data_size Length of the pre-allocated array. If not set, size is assumed equal
|
||||
* to the dot product of dims.
|
||||
*/
|
||||
TBlob(const TensorDesc& tensorDesc, T* ptr, size_t data_size = 0): MemoryBlob(tensorDesc) {
|
||||
TBlob(const TensorDesc& tensorDesc, T* ptr, size_t data_size = 0) : MemoryBlob(tensorDesc) {
|
||||
if (data_size == 0) {
|
||||
data_size = size();
|
||||
}
|
||||
@ -528,8 +531,10 @@ public:
|
||||
* @param alloc An allocator
|
||||
*/
|
||||
TBlob(const TensorDesc& tensorDesc, const std::shared_ptr<IAllocator>& alloc)
|
||||
: MemoryBlob(tensorDesc), _allocator(alloc) {
|
||||
if (_allocator == nullptr) IE_THROW() << "TBlob allocator was not initialized.";
|
||||
: MemoryBlob(tensorDesc),
|
||||
_allocator(alloc) {
|
||||
if (_allocator == nullptr)
|
||||
IE_THROW() << "TBlob allocator was not initialized.";
|
||||
}
|
||||
|
||||
/**
|
||||
@ -537,7 +542,7 @@ public:
|
||||
*
|
||||
* @param blob Source blob
|
||||
*/
|
||||
TBlob(const TBlob<T>& blob): MemoryBlob(blob.getTensorDesc()) {
|
||||
TBlob(const TBlob<T>& blob) : MemoryBlob(blob.getTensorDesc()) {
|
||||
copyFrom(blob);
|
||||
}
|
||||
|
||||
@ -546,7 +551,7 @@ public:
|
||||
*
|
||||
* @param blob rvalue to make a move from
|
||||
*/
|
||||
TBlob(TBlob<T>&& blob): MemoryBlob(blob.getTensorDesc()) {
|
||||
TBlob(TBlob<T>&& blob) : MemoryBlob(blob.getTensorDesc()) {
|
||||
moveFrom(blob);
|
||||
}
|
||||
|
||||
@ -592,11 +597,9 @@ public:
|
||||
return;
|
||||
}
|
||||
|
||||
_handle.reset(
|
||||
rawHandle,
|
||||
[allocator](void* rawHandle) {
|
||||
allocator->free(rawHandle);
|
||||
});
|
||||
_handle.reset(rawHandle, [allocator](void* rawHandle) {
|
||||
allocator->free(rawHandle);
|
||||
});
|
||||
}
|
||||
|
||||
bool deallocate() noexcept override {
|
||||
@ -611,14 +614,14 @@ public:
|
||||
return std::move(lockme<const void>());
|
||||
}
|
||||
|
||||
LockedMemory<void> rwmap()noexcept override {
|
||||
LockedMemory<void> rwmap() noexcept override {
|
||||
return std::move(lockme<void>());
|
||||
}
|
||||
|
||||
LockedMemory<const void> rmap() const noexcept override {
|
||||
return std::move(lockme<const void>());
|
||||
}
|
||||
LockedMemory<void> wmap()noexcept override {
|
||||
LockedMemory<void> wmap() noexcept override {
|
||||
return std::move(lockme<void>());
|
||||
}
|
||||
|
||||
@ -725,7 +728,7 @@ protected:
|
||||
template <class S>
|
||||
LockedMemory<S> lockme() const {
|
||||
return LockedMemory<S>(_allocator.get(), getHandle(), 0);
|
||||
// getTensorDesc().getBlockingDesc().getOffsetPadding());
|
||||
// getTensorDesc().getBlockingDesc().getOffsetPadding());
|
||||
}
|
||||
|
||||
const std::shared_ptr<IAllocator>& getAllocator() const noexcept override {
|
||||
@ -746,11 +749,10 @@ protected:
|
||||
* @param origBlob An original blob
|
||||
* @param roi A ROI object
|
||||
*/
|
||||
TBlob(const TBlob& origBlob, const ROI& roi) :
|
||||
MemoryBlob(make_roi_desc(origBlob.getTensorDesc(), roi, true)),
|
||||
_allocator(origBlob._allocator) {
|
||||
IE_ASSERT(origBlob._handle != nullptr)
|
||||
<< "Original Blob must be allocated before ROI creation";
|
||||
TBlob(const TBlob& origBlob, const ROI& roi)
|
||||
: MemoryBlob(make_roi_desc(origBlob.getTensorDesc(), roi, true)),
|
||||
_allocator(origBlob._allocator) {
|
||||
IE_ASSERT(origBlob._handle != nullptr) << "Original Blob must be allocated before ROI creation";
|
||||
|
||||
_handle = origBlob._handle;
|
||||
}
|
||||
@ -784,7 +786,7 @@ template <typename Type>
|
||||
inline typename InferenceEngine::TBlob<Type>::Ptr make_shared_blob(const TensorDesc& tensorDesc) {
|
||||
if (!tensorDesc.getPrecision().hasStorageType<Type>())
|
||||
IE_THROW() << "Cannot make shared blob! "
|
||||
<< "The blob type cannot be used to store objects of current precision";
|
||||
<< "The blob type cannot be used to store objects of current precision";
|
||||
return std::make_shared<InferenceEngine::TBlob<Type>>(tensorDesc);
|
||||
}
|
||||
|
||||
@ -798,11 +800,12 @@ inline typename InferenceEngine::TBlob<Type>::Ptr make_shared_blob(const TensorD
|
||||
* @return A shared pointer to the newly created blob of the given type
|
||||
*/
|
||||
template <typename Type>
|
||||
inline typename InferenceEngine::TBlob<Type>::Ptr make_shared_blob(const TensorDesc& tensorDesc, Type* ptr,
|
||||
inline typename InferenceEngine::TBlob<Type>::Ptr make_shared_blob(const TensorDesc& tensorDesc,
|
||||
Type* ptr,
|
||||
size_t size = 0) {
|
||||
if (!tensorDesc.getPrecision().hasStorageType<Type>())
|
||||
IE_THROW() << "Cannot make shared blob! "
|
||||
<< "The blob type cannot be used to store objects of current precision";
|
||||
<< "The blob type cannot be used to store objects of current precision";
|
||||
return std::make_shared<InferenceEngine::TBlob<Type>>(tensorDesc, ptr, size);
|
||||
}
|
||||
|
||||
@ -816,10 +819,11 @@ inline typename InferenceEngine::TBlob<Type>::Ptr make_shared_blob(const TensorD
|
||||
*/
|
||||
template <typename Type>
|
||||
inline typename InferenceEngine::TBlob<Type>::Ptr make_shared_blob(
|
||||
const TensorDesc& tensorDesc, const std::shared_ptr<InferenceEngine::IAllocator>& alloc) {
|
||||
const TensorDesc& tensorDesc,
|
||||
const std::shared_ptr<InferenceEngine::IAllocator>& alloc) {
|
||||
if (!tensorDesc.getPrecision().hasStorageType<Type>())
|
||||
IE_THROW() << "Cannot make shared blob! "
|
||||
<< "The blob type cannot be used to store objects of current precision";
|
||||
<< "The blob type cannot be used to store objects of current precision";
|
||||
return std::make_shared<InferenceEngine::TBlob<Type>>(tensorDesc, alloc);
|
||||
}
|
||||
|
||||
|
@ -11,18 +11,19 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdlib>
|
||||
#include <iterator>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <ostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include <iterator>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "ie_api.h"
|
||||
|
||||
#include <ie_api.h>
|
||||
#ifndef NDEBUG
|
||||
#include <cassert>
|
||||
# include <cassert>
|
||||
#endif
|
||||
namespace InferenceEngine {
|
||||
/**
|
||||
@ -58,9 +59,9 @@ using DataWeakPtr = std::weak_ptr<Data>;
|
||||
* @brief The method holds the user values to enable binding of data per graph node.
|
||||
*/
|
||||
union UserValue {
|
||||
int v_int; //!< An integer value
|
||||
int v_int; //!< An integer value
|
||||
float v_float; //!< A floating point value
|
||||
void* v_ptr; //!< A pointer to a void
|
||||
void* v_ptr; //!< A pointer to a void
|
||||
};
|
||||
|
||||
/**
|
||||
@ -71,15 +72,15 @@ enum Layout : uint8_t {
|
||||
ANY = 0, //!< "any" layout
|
||||
|
||||
// I/O data layouts
|
||||
NCHW = 1, //!< NCHW layout for input / output blobs
|
||||
NHWC = 2, //!< NHWC layout for input / output blobs
|
||||
NCHW = 1, //!< NCHW layout for input / output blobs
|
||||
NHWC = 2, //!< NHWC layout for input / output blobs
|
||||
NCDHW = 3, //!< NCDHW layout for input / output blobs
|
||||
NDHWC = 4, //!< NDHWC layout for input / output blobs
|
||||
|
||||
// weight layouts
|
||||
OIHW = 64, //!< NDHWC layout for operation weights
|
||||
GOIHW = 65, //!< NDHWC layout for operation weights
|
||||
OIDHW = 66, //!< NDHWC layout for operation weights
|
||||
OIHW = 64, //!< NDHWC layout for operation weights
|
||||
GOIHW = 65, //!< NDHWC layout for operation weights
|
||||
OIDHW = 66, //!< NDHWC layout for operation weights
|
||||
GOIDHW = 67, //!< NDHWC layout for operation weights
|
||||
|
||||
// Scalar
|
||||
@ -189,9 +190,9 @@ struct InferenceEngineProfileInfo {
|
||||
* @brief Defines the general status of the layer
|
||||
*/
|
||||
enum LayerStatus {
|
||||
NOT_RUN, //!< A layer is not executed
|
||||
NOT_RUN, //!< A layer is not executed
|
||||
OPTIMIZED_OUT, //!< A layer is optimized out during graph optimization phase
|
||||
EXECUTED //!< A layer is executed
|
||||
EXECUTED //!< A layer is executed
|
||||
};
|
||||
|
||||
/**
|
||||
@ -292,10 +293,12 @@ using ConstOutputsDataMap = std::map<std::string, CDataPtr>;
|
||||
using OutputsDataMap = std::map<std::string, DataPtr>;
|
||||
|
||||
namespace details {
|
||||
struct INFERENCE_ENGINE_DEPRECATED("Use InferRequest::Exception")
|
||||
INFERENCE_ENGINE_API_CLASS(InferenceEngineException) : public std::runtime_error {
|
||||
struct INFERENCE_ENGINE_DEPRECATED("Use InferRequest::Exception") INFERENCE_ENGINE_API_CLASS(InferenceEngineException)
|
||||
: public std::runtime_error {
|
||||
using std::runtime_error::runtime_error;
|
||||
bool hasStatus() const {return true;}
|
||||
bool hasStatus() const {
|
||||
return true;
|
||||
}
|
||||
StatusCode getStatus() const;
|
||||
};
|
||||
} // namespace details
|
||||
@ -311,18 +314,22 @@ IE_SUPPRESS_DEPRECATED_END
|
||||
|
||||
/// @cond
|
||||
namespace details {
|
||||
template<typename ExceptionType> struct ExceptionTraits;
|
||||
template <typename ExceptionType>
|
||||
struct ExceptionTraits;
|
||||
}
|
||||
|
||||
#define INFERENCE_ENGINE_DECLARE_EXCEPTION(ExceptionType, statusCode) \
|
||||
struct INFERENCE_ENGINE_API_CLASS(ExceptionType) final : public InferenceEngine::Exception { \
|
||||
using Exception::Exception; \
|
||||
}; \
|
||||
namespace details { \
|
||||
template<> struct ExceptionTraits<ExceptionType> { \
|
||||
static const char* string() {return "[ " #statusCode " ]";} \
|
||||
}; \
|
||||
}
|
||||
#define INFERENCE_ENGINE_DECLARE_EXCEPTION(ExceptionType, statusCode) \
|
||||
struct INFERENCE_ENGINE_API_CLASS(ExceptionType) final : public InferenceEngine::Exception { \
|
||||
using Exception::Exception; \
|
||||
}; \
|
||||
namespace details { \
|
||||
template <> \
|
||||
struct ExceptionTraits<ExceptionType> { \
|
||||
static const char* string() { \
|
||||
return "[ " #statusCode " ]"; \
|
||||
} \
|
||||
}; \
|
||||
}
|
||||
/// @endcond
|
||||
|
||||
/** @brief This class represents StatusCode::GENERAL_ERROR exception */
|
||||
@ -380,7 +387,7 @@ namespace details {
|
||||
/**
|
||||
* @brief Tag struct used to throw exception
|
||||
*/
|
||||
template<typename ExceptionType>
|
||||
template <typename ExceptionType>
|
||||
struct ThrowNow final {
|
||||
[[noreturn]] void operator<<=(const std::ostream& ostream) {
|
||||
std::ostringstream stream;
|
||||
@ -391,31 +398,32 @@ struct ThrowNow final {
|
||||
|
||||
/// @cond
|
||||
#ifndef NDEBUG
|
||||
#define IE_LOCATION '\n' << __FILE__ << ':' << __LINE__<< ' '
|
||||
# define IE_LOCATION '\n' << __FILE__ << ':' << __LINE__ << ' '
|
||||
#else
|
||||
#define IE_LOCATION ""
|
||||
# define IE_LOCATION ""
|
||||
#endif // NDEBUG
|
||||
|
||||
|
||||
// WARNING: DO NOT USE THIS MACRO! Use openvino/pp.hpp macro library
|
||||
#define IE_PP_EXPAND(X) X
|
||||
#define IE_PP_NARG(...) IE_PP_EXPAND(IE_PP_NARG_(__VA_ARGS__, IE_PP_RSEQ_N()))
|
||||
#define IE_PP_NARG_(...) IE_PP_EXPAND(IE_PP_ARG_N(__VA_ARGS__))
|
||||
#define IE_PP_EXPAND(X) X
|
||||
#define IE_PP_NARG(...) IE_PP_EXPAND(IE_PP_NARG_(__VA_ARGS__, IE_PP_RSEQ_N()))
|
||||
#define IE_PP_NARG_(...) IE_PP_EXPAND(IE_PP_ARG_N(__VA_ARGS__))
|
||||
#define IE_PP_ARG_N(_0, _1, N, ...) N
|
||||
#define IE_PP_RSEQ_N() 0, 1, 0
|
||||
#define IE_PP_NO_ARGS(NAME) ,
|
||||
#define IE_PP_CAT3_(x, y, z) x ## y ## z
|
||||
#define IE_PP_CAT3(x, y, z) IE_PP_CAT3_(x, y, z)
|
||||
#define IE_PP_OVERLOAD(NAME, ...) IE_PP_EXPAND(IE_PP_CAT3(NAME, _, IE_PP_EXPAND(IE_PP_NARG(IE_PP_NO_ARGS __VA_ARGS__ (NAME))))(__VA_ARGS__))
|
||||
#define IE_PP_RSEQ_N() 0, 1, 0
|
||||
#define IE_PP_NO_ARGS(NAME) ,
|
||||
#define IE_PP_CAT3_(x, y, z) x##y##z
|
||||
#define IE_PP_CAT3(x, y, z) IE_PP_CAT3_(x, y, z)
|
||||
#define IE_PP_OVERLOAD(NAME, ...) \
|
||||
IE_PP_EXPAND(IE_PP_CAT3(NAME, _, IE_PP_EXPAND(IE_PP_NARG(IE_PP_NO_ARGS __VA_ARGS__(NAME))))(__VA_ARGS__))
|
||||
// ENDWARNING
|
||||
|
||||
#define IE_THROW_0() \
|
||||
InferenceEngine::details::ThrowNow<InferenceEngine::GeneralError> {} <<= std::stringstream {} \
|
||||
<< IE_LOCATION
|
||||
#define IE_THROW_0() \
|
||||
InferenceEngine::details::ThrowNow<InferenceEngine::GeneralError>{} <<= std::stringstream{} << IE_LOCATION
|
||||
|
||||
#define IE_THROW_1(ExceptionType) \
|
||||
InferenceEngine::details::ThrowNow<InferenceEngine::ExceptionType> {} <<= std::stringstream {} \
|
||||
<< IE_LOCATION << InferenceEngine::details::ExceptionTraits<InferenceEngine::ExceptionType>::string() << ' '
|
||||
#define IE_THROW_1(ExceptionType) \
|
||||
InferenceEngine::details::ThrowNow<InferenceEngine::ExceptionType>{} <<= \
|
||||
std::stringstream{} << IE_LOCATION \
|
||||
<< InferenceEngine::details::ExceptionTraits<InferenceEngine::ExceptionType>::string() \
|
||||
<< ' '
|
||||
/// @endcond
|
||||
|
||||
/**
|
||||
@ -429,31 +437,35 @@ struct ThrowNow final {
|
||||
* @brief Uses assert() function if NDEBUG is not defined, InferenceEngine exception otherwise
|
||||
*/
|
||||
#ifdef NDEBUG
|
||||
#define IE_ASSERT(EXPRESSION) \
|
||||
if (!(EXPRESSION)) \
|
||||
IE_THROW(GeneralError) << " AssertionFailed: " << #EXPRESSION // NOLINT
|
||||
# define IE_ASSERT(EXPRESSION) \
|
||||
if (!(EXPRESSION)) \
|
||||
IE_THROW(GeneralError) << " AssertionFailed: " << #EXPRESSION // NOLINT
|
||||
#else
|
||||
/**
|
||||
* @private
|
||||
*/
|
||||
struct NullStream {
|
||||
template <typename T>
|
||||
NullStream& operator<<(const T&) noexcept {return *this;}
|
||||
NullStream& operator<<(const T&) noexcept {
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
#define IE_ASSERT(EXPRESSION) \
|
||||
assert((EXPRESSION)); \
|
||||
InferenceEngine::details::NullStream()
|
||||
# define IE_ASSERT(EXPRESSION) \
|
||||
assert((EXPRESSION)); \
|
||||
InferenceEngine::details::NullStream()
|
||||
#endif // NDEBUG
|
||||
|
||||
/// @cond
|
||||
#define THROW_IE_EXCEPTION \
|
||||
InferenceEngine::details::ThrowNow<InferenceEngine::details::InferenceEngineException> {} <<= std::stringstream {} \
|
||||
<< IE_LOCATION
|
||||
#define THROW_IE_EXCEPTION \
|
||||
InferenceEngine::details::ThrowNow<InferenceEngine::details::InferenceEngineException>{} <<= std::stringstream{} \
|
||||
<< IE_LOCATION
|
||||
|
||||
#define IE_EXCEPTION_CASE(TYPE_ALIAS, STATUS_CODE, EXCEPTION_TYPE, ...) \
|
||||
case InferenceEngine::STATUS_CODE : { \
|
||||
using InferenceEngine::EXCEPTION_TYPE; using TYPE_ALIAS = EXCEPTION_TYPE; __VA_ARGS__; \
|
||||
#define IE_EXCEPTION_CASE(TYPE_ALIAS, STATUS_CODE, EXCEPTION_TYPE, ...) \
|
||||
case InferenceEngine::STATUS_CODE: { \
|
||||
using InferenceEngine::EXCEPTION_TYPE; \
|
||||
using TYPE_ALIAS = EXCEPTION_TYPE; \
|
||||
__VA_ARGS__; \
|
||||
} break;
|
||||
/// @endcond
|
||||
|
||||
@ -461,28 +473,29 @@ struct NullStream {
|
||||
* @def IE_EXCEPTION_SWITCH
|
||||
* @brief Generate Switch statement over error codes adn maps them to coresponding exceptions type
|
||||
*/
|
||||
#define IE_EXCEPTION_SWITCH(STATUS, TYPE_ALIAS, ...) \
|
||||
switch (STATUS) { \
|
||||
IE_EXCEPTION_CASE(TYPE_ALIAS, GENERAL_ERROR , GeneralError , __VA_ARGS__) \
|
||||
IE_EXCEPTION_CASE(TYPE_ALIAS, NOT_IMPLEMENTED , NotImplemented , __VA_ARGS__) \
|
||||
IE_EXCEPTION_CASE(TYPE_ALIAS, NETWORK_NOT_LOADED , NetworkNotLoaded , __VA_ARGS__) \
|
||||
IE_EXCEPTION_CASE(TYPE_ALIAS, PARAMETER_MISMATCH , ParameterMismatch , __VA_ARGS__) \
|
||||
IE_EXCEPTION_CASE(TYPE_ALIAS, NOT_FOUND , NotFound , __VA_ARGS__) \
|
||||
IE_EXCEPTION_CASE(TYPE_ALIAS, OUT_OF_BOUNDS , OutOfBounds , __VA_ARGS__) \
|
||||
IE_EXCEPTION_CASE(TYPE_ALIAS, UNEXPECTED , Unexpected , __VA_ARGS__) \
|
||||
IE_EXCEPTION_CASE(TYPE_ALIAS, REQUEST_BUSY , RequestBusy , __VA_ARGS__) \
|
||||
IE_EXCEPTION_CASE(TYPE_ALIAS, RESULT_NOT_READY , ResultNotReady , __VA_ARGS__) \
|
||||
IE_EXCEPTION_CASE(TYPE_ALIAS, NOT_ALLOCATED , NotAllocated , __VA_ARGS__) \
|
||||
IE_EXCEPTION_CASE(TYPE_ALIAS, INFER_NOT_STARTED , InferNotStarted , __VA_ARGS__) \
|
||||
IE_EXCEPTION_CASE(TYPE_ALIAS, NETWORK_NOT_READ , NetworkNotRead , __VA_ARGS__) \
|
||||
IE_EXCEPTION_CASE(TYPE_ALIAS, INFER_CANCELLED , InferCancelled , __VA_ARGS__) \
|
||||
default: IE_ASSERT(!"Unreachable"); \
|
||||
#define IE_EXCEPTION_SWITCH(STATUS, TYPE_ALIAS, ...) \
|
||||
switch (STATUS) { \
|
||||
IE_EXCEPTION_CASE(TYPE_ALIAS, GENERAL_ERROR, GeneralError, __VA_ARGS__) \
|
||||
IE_EXCEPTION_CASE(TYPE_ALIAS, NOT_IMPLEMENTED, NotImplemented, __VA_ARGS__) \
|
||||
IE_EXCEPTION_CASE(TYPE_ALIAS, NETWORK_NOT_LOADED, NetworkNotLoaded, __VA_ARGS__) \
|
||||
IE_EXCEPTION_CASE(TYPE_ALIAS, PARAMETER_MISMATCH, ParameterMismatch, __VA_ARGS__) \
|
||||
IE_EXCEPTION_CASE(TYPE_ALIAS, NOT_FOUND, NotFound, __VA_ARGS__) \
|
||||
IE_EXCEPTION_CASE(TYPE_ALIAS, OUT_OF_BOUNDS, OutOfBounds, __VA_ARGS__) \
|
||||
IE_EXCEPTION_CASE(TYPE_ALIAS, UNEXPECTED, Unexpected, __VA_ARGS__) \
|
||||
IE_EXCEPTION_CASE(TYPE_ALIAS, REQUEST_BUSY, RequestBusy, __VA_ARGS__) \
|
||||
IE_EXCEPTION_CASE(TYPE_ALIAS, RESULT_NOT_READY, ResultNotReady, __VA_ARGS__) \
|
||||
IE_EXCEPTION_CASE(TYPE_ALIAS, NOT_ALLOCATED, NotAllocated, __VA_ARGS__) \
|
||||
IE_EXCEPTION_CASE(TYPE_ALIAS, INFER_NOT_STARTED, InferNotStarted, __VA_ARGS__) \
|
||||
IE_EXCEPTION_CASE(TYPE_ALIAS, NETWORK_NOT_READ, NetworkNotRead, __VA_ARGS__) \
|
||||
IE_EXCEPTION_CASE(TYPE_ALIAS, INFER_CANCELLED, InferCancelled, __VA_ARGS__) \
|
||||
default: \
|
||||
IE_ASSERT(!"Unreachable"); \
|
||||
}
|
||||
|
||||
} // namespace details
|
||||
} // namespace InferenceEngine
|
||||
#if defined(_WIN32)
|
||||
#define __PRETTY_FUNCTION__ __FUNCSIG__
|
||||
# define __PRETTY_FUNCTION__ __FUNCSIG__
|
||||
#else
|
||||
#define __PRETTY_FUNCTION__ __PRETTY_FUNCTION__
|
||||
# define __PRETTY_FUNCTION__ __PRETTY_FUNCTION__
|
||||
#endif
|
||||
|
@ -22,7 +22,7 @@ namespace InferenceEngine {
|
||||
* Compound blob is a wrapper blob over references to underlying blobs. These blobs should share
|
||||
* some properties and can be grouped into a single entity.
|
||||
*/
|
||||
class INFERENCE_ENGINE_API_CLASS(CompoundBlob): public Blob {
|
||||
class INFERENCE_ENGINE_API_CLASS(CompoundBlob) : public Blob {
|
||||
public:
|
||||
/**
|
||||
* @brief A smart pointer to the CompoundBlob object
|
||||
@ -118,7 +118,7 @@ protected:
|
||||
/**
|
||||
* @brief Represents a blob that contains two planes (Y and UV) in NV12 color format
|
||||
*/
|
||||
class INFERENCE_ENGINE_API_CLASS(NV12Blob): public CompoundBlob {
|
||||
class INFERENCE_ENGINE_API_CLASS(NV12Blob) : public CompoundBlob {
|
||||
public:
|
||||
/**
|
||||
* @brief A smart pointer to the NV12Blob object
|
||||
@ -220,7 +220,7 @@ public:
|
||||
* Please note that reference to Blob::Ptr is returned. I.e. the reference will be valid until
|
||||
* the I420Blob object is destroyed.
|
||||
*
|
||||
* @return constant reference to shared pointer object of Y plane*
|
||||
* @return constant reference to shared pointer object of Y plane*
|
||||
*/
|
||||
const Blob::Ptr& y() const noexcept;
|
||||
|
||||
@ -273,7 +273,7 @@ public:
|
||||
* in the OPTIMIZATION_CAPABILITIES metric.
|
||||
*/
|
||||
class INFERENCE_ENGINE_API_CLASS(BatchedBlob) : public CompoundBlob {
|
||||
public:
|
||||
public:
|
||||
/**
|
||||
* @brief A smart pointer to the BatchedBlob object
|
||||
*/
|
||||
|
@ -15,11 +15,11 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "ie_version.hpp"
|
||||
#include "cpp/ie_executable_network.hpp"
|
||||
#include "ie_extension.h"
|
||||
#include "ie_plugin_config.hpp"
|
||||
#include "ie_remote_context.hpp"
|
||||
#include "cpp/ie_executable_network.hpp"
|
||||
#include "ie_version.hpp"
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
@ -106,9 +106,9 @@ public:
|
||||
* operation
|
||||
* @return An executable network reference
|
||||
*/
|
||||
ExecutableNetwork LoadNetwork(
|
||||
const CNNNetwork& network, const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config = {});
|
||||
ExecutableNetwork LoadNetwork(const CNNNetwork& network,
|
||||
const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config = {});
|
||||
|
||||
/**
|
||||
* @brief Reads model and creates an executable network from IR or ONNX file
|
||||
@ -123,9 +123,9 @@ public:
|
||||
*
|
||||
* @return An executable network reference
|
||||
*/
|
||||
ExecutableNetwork LoadNetwork(
|
||||
const std::string& modelPath, const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config = {});
|
||||
ExecutableNetwork LoadNetwork(const std::string& modelPath,
|
||||
const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config = {});
|
||||
|
||||
/**
|
||||
* @brief Registers extension
|
||||
@ -141,9 +141,9 @@ public:
|
||||
* operation
|
||||
* @return An executable network object
|
||||
*/
|
||||
ExecutableNetwork LoadNetwork(
|
||||
const CNNNetwork& network, RemoteContext::Ptr context,
|
||||
const std::map<std::string, std::string>& config = {});
|
||||
ExecutableNetwork LoadNetwork(const CNNNetwork& network,
|
||||
RemoteContext::Ptr context,
|
||||
const std::map<std::string, std::string>& config = {});
|
||||
|
||||
/**
|
||||
* @brief Registers extension for the specified plugin
|
||||
@ -162,9 +162,9 @@ public:
|
||||
* operation*
|
||||
* @return An executable network reference
|
||||
*/
|
||||
ExecutableNetwork ImportNetwork(
|
||||
const std::string& modelFileName, const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config = {});
|
||||
ExecutableNetwork ImportNetwork(const std::string& modelFileName,
|
||||
const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config = {});
|
||||
|
||||
/**
|
||||
* @brief Creates an executable network from a previously exported network
|
||||
@ -174,7 +174,8 @@ public:
|
||||
* operation*
|
||||
* @return An executable network reference
|
||||
*/
|
||||
ExecutableNetwork ImportNetwork(std::istream& networkModel, const std::string& deviceName,
|
||||
ExecutableNetwork ImportNetwork(std::istream& networkModel,
|
||||
const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config = {});
|
||||
|
||||
/**
|
||||
@ -208,9 +209,9 @@ public:
|
||||
* @param config Optional map of pairs: (config parameter name, config parameter value)
|
||||
* @return An object containing a map of pairs a layer name -> a device name supporting this layer.
|
||||
*/
|
||||
QueryNetworkResult QueryNetwork(
|
||||
const CNNNetwork& network, const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config = {}) const;
|
||||
QueryNetworkResult QueryNetwork(const CNNNetwork& network,
|
||||
const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config = {}) const;
|
||||
|
||||
/**
|
||||
* @brief Sets configuration for device, acceptable keys can be found in ie_plugin_config.hpp
|
||||
|
@ -27,6 +27,7 @@ namespace InferenceEngine {
|
||||
*/
|
||||
class INFERENCE_ENGINE_API_CLASS(Data) {
|
||||
class Impl;
|
||||
|
||||
public:
|
||||
/**
|
||||
* @brief An empty constructor (dimensionless)
|
||||
@ -58,7 +59,7 @@ public:
|
||||
* @param data A data object to copy from
|
||||
* @return An assigned object
|
||||
*/
|
||||
Data & operator = (const Data& data);
|
||||
Data& operator=(const Data& data);
|
||||
|
||||
/**
|
||||
* @brief Checks if the current node is resolved
|
||||
|
@ -14,9 +14,9 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <ngraph/opsets/opset.hpp>
|
||||
#include "ie_iextension.h"
|
||||
#include "details/ie_so_pointer.hpp"
|
||||
#include "ie_iextension.h"
|
||||
#include "ngraph/opsets/opset.hpp"
|
||||
|
||||
namespace InferenceEngine {
|
||||
namespace details {
|
||||
@ -46,9 +46,8 @@ public:
|
||||
*
|
||||
* @param name Full or relative path to extension library
|
||||
*/
|
||||
template <typename C,
|
||||
typename = details::enableIfSupportedChar<C>>
|
||||
explicit Extension(const std::basic_string<C>& name): actual(name) {}
|
||||
template <typename C, typename = details::enableIfSupportedChar<C>>
|
||||
explicit Extension(const std::basic_string<C>& name) : actual(name) {}
|
||||
|
||||
/**
|
||||
* @brief Gets the extension version information
|
||||
@ -79,7 +78,8 @@ public:
|
||||
* @return vector of strings
|
||||
*/
|
||||
std::vector<std::string> getImplTypes(const std::shared_ptr<ngraph::Node>& node) override {
|
||||
if (node == nullptr) IE_THROW() << "Provided ngraph::Node pointer is nullptr.";
|
||||
if (node == nullptr)
|
||||
IE_THROW() << "Provided ngraph::Node pointer is nullptr.";
|
||||
return actual->getImplTypes(node);
|
||||
}
|
||||
|
||||
@ -90,7 +90,8 @@ public:
|
||||
* @return shared pointer to implementation
|
||||
*/
|
||||
ILayerImpl::Ptr getImplementation(const std::shared_ptr<ngraph::Node>& node, const std::string& implType) override {
|
||||
if (node == nullptr) IE_THROW() << "Provided ngraph::Node pointer is nullptr.";
|
||||
if (node == nullptr)
|
||||
IE_THROW() << "Provided ngraph::Node pointer is nullptr.";
|
||||
return actual->getImplementation(node, implType);
|
||||
}
|
||||
|
||||
@ -107,7 +108,7 @@ protected:
|
||||
* @param name extension library name
|
||||
* @return shared pointer to extension
|
||||
*/
|
||||
template<typename T = IExtension>
|
||||
template <typename T = IExtension>
|
||||
INFERENCE_ENGINE_DEPRECATED("Use std::make_shared<Extension>")
|
||||
inline std::shared_ptr<T> make_so_pointer(const std::string& name) {
|
||||
return std::make_shared<Extension>(name);
|
||||
@ -120,7 +121,7 @@ inline std::shared_ptr<T> make_so_pointer(const std::string& name) {
|
||||
* @param name extension library name
|
||||
* @return shared pointer to extension
|
||||
*/
|
||||
template<typename T = IExtension>
|
||||
template <typename T = IExtension>
|
||||
INFERENCE_ENGINE_DEPRECATED("Use std::make_shared<Extension>")
|
||||
inline std::shared_ptr<IExtension> make_so_pointer(const std::wstring& name) {
|
||||
return std::make_shared<Extension>(name);
|
||||
|
@ -17,8 +17,7 @@
|
||||
#include "ie_common.h"
|
||||
#include "ie_data.h"
|
||||
#include "ie_input_info.hpp"
|
||||
|
||||
#include <ngraph/function.hpp>
|
||||
#include "ngraph/function.hpp"
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
@ -29,7 +28,7 @@ _IE_SUPPRESS_DEPRECATED_START_GCC
|
||||
* @interface ICNNNetwork
|
||||
* @brief This is the main interface to describe the NN topology
|
||||
*/
|
||||
class INFERENCE_ENGINE_API_CLASS(ICNNNetwork): public std::enable_shared_from_this<ICNNNetwork> {
|
||||
class INFERENCE_ENGINE_API_CLASS(ICNNNetwork) : public std::enable_shared_from_this<ICNNNetwork> {
|
||||
public:
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
/**
|
||||
@ -127,7 +126,8 @@ public:
|
||||
* @return Status code of the operation
|
||||
*/
|
||||
INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::CNNNetwork wrapper instead")
|
||||
virtual StatusCode addOutput(const std::string& layerName, size_t outputIndex = 0,
|
||||
virtual StatusCode addOutput(const std::string& layerName,
|
||||
size_t outputIndex = 0,
|
||||
ResponseDesc* resp = nullptr) noexcept = 0;
|
||||
|
||||
/**
|
||||
@ -219,8 +219,7 @@ public:
|
||||
* @return Status code of the operation
|
||||
*/
|
||||
INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::CNNNetwork wrapper instead")
|
||||
virtual StatusCode serialize(std::ostream& xmlStream, Blob::Ptr& binData, ResponseDesc* resp) const
|
||||
noexcept = 0;
|
||||
virtual StatusCode serialize(std::ostream& xmlStream, Blob::Ptr& binData, ResponseDesc* resp) const noexcept = 0;
|
||||
|
||||
/**
|
||||
* @deprecated Use InferenceEngine::CNNNetwork wrapper instead
|
||||
@ -233,10 +232,11 @@ public:
|
||||
* @return Status code of the operation
|
||||
*/
|
||||
INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::CNNNetwork wrapper instead")
|
||||
virtual StatusCode getOVNameForTensor(std::string& ov_name, const std::string& orig_name, ResponseDesc* resp) const noexcept {
|
||||
(void) ov_name;
|
||||
(void) orig_name;
|
||||
(void) resp;
|
||||
virtual StatusCode getOVNameForTensor(std::string& ov_name, const std::string& orig_name, ResponseDesc* resp) const
|
||||
noexcept {
|
||||
(void)ov_name;
|
||||
(void)orig_name;
|
||||
(void)resp;
|
||||
return NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
|
@ -9,9 +9,9 @@
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <ostream>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <ostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
|
@ -15,20 +15,20 @@
|
||||
#include <vector>
|
||||
|
||||
#include "ie_api.h"
|
||||
#include "ie_blob.h"
|
||||
#include "ie_common.h"
|
||||
#include "ie_layouts.h"
|
||||
#include "ie_blob.h"
|
||||
#include "ie_version.hpp"
|
||||
#include <ngraph/opsets/opset.hpp>
|
||||
#include "ngraph/opsets/opset.hpp"
|
||||
|
||||
/**
|
||||
* @def INFERENCE_EXTENSION_API(TYPE)
|
||||
* @brief Defines Inference Engine Extension API method
|
||||
*/
|
||||
#if defined(_WIN32) && defined(IMPLEMENT_INFERENCE_EXTENSION_API)
|
||||
#define INFERENCE_EXTENSION_API(TYPE) extern "C" __declspec(dllexport) TYPE
|
||||
# define INFERENCE_EXTENSION_API(TYPE) extern "C" __declspec(dllexport) TYPE
|
||||
#else
|
||||
#define INFERENCE_EXTENSION_API(TYPE) INFERENCE_ENGINE_API(TYPE)
|
||||
# define INFERENCE_EXTENSION_API(TYPE) INFERENCE_ENGINE_API(TYPE)
|
||||
#endif
|
||||
|
||||
namespace InferenceEngine {
|
||||
@ -131,7 +131,8 @@ public:
|
||||
* @param resp Response descriptor
|
||||
* @return Status code
|
||||
*/
|
||||
virtual StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs,
|
||||
virtual StatusCode execute(std::vector<Blob::Ptr>& inputs,
|
||||
std::vector<Blob::Ptr>& outputs,
|
||||
ResponseDesc* resp) noexcept = 0;
|
||||
};
|
||||
|
||||
@ -183,7 +184,8 @@ public:
|
||||
/**
|
||||
* @brief Implements deprecated API
|
||||
*/
|
||||
INFERENCE_ENGINE_DEPRECATED("Do not override or use this method. Use IE_DEFINE_EXTENSION_CREATE_FUNCTION to export extension")
|
||||
INFERENCE_ENGINE_DEPRECATED(
|
||||
"Do not override or use this method. Use IE_DEFINE_EXTENSION_CREATE_FUNCTION to export extension")
|
||||
virtual void Release() noexcept {
|
||||
delete this;
|
||||
}
|
||||
@ -217,15 +219,17 @@ INFERENCE_EXTENSION_API(StatusCode)
|
||||
CreateExtension(IExtension*& ext, ResponseDesc* resp) noexcept;
|
||||
#else
|
||||
INFERENCE_EXTENSION_API(StatusCode)
|
||||
CreateExtension(IExtension*& ext, ResponseDesc* resp) noexcept INFERENCE_ENGINE_DEPRECATED("Use IE_DEFINE_EXTENSION_CREATE_FUNCTION macro");
|
||||
CreateExtension(IExtension*& ext, ResponseDesc* resp) noexcept INFERENCE_ENGINE_DEPRECATED(
|
||||
"Use IE_DEFINE_EXTENSION_CREATE_FUNCTION macro");
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @def IE_DEFINE_EXTENSION_CREATE_FUNCTION
|
||||
* @brief Generates extension creation function
|
||||
*/
|
||||
#define IE_DEFINE_EXTENSION_CREATE_FUNCTION(ExtensionType) \
|
||||
INFERENCE_EXTENSION_API(void) InferenceEngine::CreateExtensionShared(std::shared_ptr<InferenceEngine::IExtension>& ext) { \
|
||||
ext = std::make_shared<ExtensionType>(); \
|
||||
}
|
||||
#define IE_DEFINE_EXTENSION_CREATE_FUNCTION(ExtensionType) \
|
||||
INFERENCE_EXTENSION_API(void) \
|
||||
InferenceEngine::CreateExtensionShared(std::shared_ptr<InferenceEngine::IExtension>& ext) { \
|
||||
ext = std::make_shared<ExtensionType>(); \
|
||||
}
|
||||
} // namespace InferenceEngine
|
||||
|
@ -26,7 +26,8 @@ _IE_SUPPRESS_DEPRECATED_START_GCC
|
||||
* @deprecated Use InferenceEngine::InferRequest C++ wrapper
|
||||
* @brief This is an interface of asynchronous infer request
|
||||
*/
|
||||
class INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::InferRequest C++ wrapper") IInferRequest : public std::enable_shared_from_this<IInferRequest> {
|
||||
class INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::InferRequest C++ wrapper") IInferRequest
|
||||
: public std::enable_shared_from_this<IInferRequest> {
|
||||
public:
|
||||
/**
|
||||
* @enum WaitMode
|
||||
@ -83,7 +84,10 @@ public:
|
||||
* @param resp Optional: pointer to an already allocated object to contain information in case of failure
|
||||
* @return Status code of the operation: OK (0) for success
|
||||
*/
|
||||
virtual StatusCode SetBlob(const char *name, const Blob::Ptr &data, const PreProcessInfo& info, ResponseDesc *resp) noexcept = 0;
|
||||
virtual StatusCode SetBlob(const char* name,
|
||||
const Blob::Ptr& data,
|
||||
const PreProcessInfo& info,
|
||||
ResponseDesc* resp) noexcept = 0;
|
||||
|
||||
/**
|
||||
* @brief Gets pre-process for input data
|
||||
@ -92,7 +96,8 @@ public:
|
||||
* @param resp Optional: pointer to an already allocated object to contain information in case of failure
|
||||
* @return Status code of the operation: OK (0) for success
|
||||
*/
|
||||
virtual StatusCode GetPreProcess(const char* name, const PreProcessInfo** info, ResponseDesc *resp) const noexcept = 0;
|
||||
virtual StatusCode GetPreProcess(const char* name, const PreProcessInfo** info, ResponseDesc* resp) const
|
||||
noexcept = 0;
|
||||
/**
|
||||
* @brief Infers specified input(s) in synchronous mode
|
||||
*
|
||||
@ -200,4 +205,4 @@ protected:
|
||||
|
||||
_IE_SUPPRESS_DEPRECATED_END_GCC
|
||||
|
||||
} // namespace InferenceEngine
|
||||
} // namespace InferenceEngine
|
||||
|
@ -66,8 +66,11 @@ public:
|
||||
* @param dimOffsets per-dimension offset from the padding to actual data,
|
||||
* @param strides strides for each dimension
|
||||
*/
|
||||
BlockingDesc(const SizeVector& blocked_dims, const SizeVector& order, size_t offset,
|
||||
const SizeVector& dimOffsets, const SizeVector& strides);
|
||||
BlockingDesc(const SizeVector& blocked_dims,
|
||||
const SizeVector& order,
|
||||
size_t offset,
|
||||
const SizeVector& dimOffsets,
|
||||
const SizeVector& strides);
|
||||
|
||||
/**
|
||||
* @brief Returns the blocked dimensions vector
|
||||
@ -335,11 +338,11 @@ private:
|
||||
* @brief This structure describes ROI data for image-like tensors.
|
||||
*/
|
||||
struct ROI {
|
||||
size_t id = 0; //!< ID of a ROI (offset over batch dimension)
|
||||
size_t posX = 0; //!< W upper left coordinate of ROI
|
||||
size_t posY = 0; //!< H upper left coordinate of ROI
|
||||
size_t sizeX = 0; //!< W size of ROI
|
||||
size_t sizeY = 0; //!< H size of ROI
|
||||
size_t id = 0; //!< ID of a ROI (offset over batch dimension)
|
||||
size_t posX = 0; //!< W upper left coordinate of ROI
|
||||
size_t posY = 0; //!< H upper left coordinate of ROI
|
||||
size_t sizeX = 0; //!< W size of ROI
|
||||
size_t sizeY = 0; //!< H size of ROI
|
||||
|
||||
ROI() = default;
|
||||
|
||||
@ -351,9 +354,12 @@ struct ROI {
|
||||
* @param sizeX W size of ROI
|
||||
* @param sizeY H size of ROI
|
||||
*/
|
||||
ROI(size_t id, size_t posX, size_t posY, size_t sizeX, size_t sizeY) :
|
||||
id(id), posX(posX), posY(posY), sizeX(sizeX), sizeY(sizeY) {
|
||||
}
|
||||
ROI(size_t id, size_t posX, size_t posY, size_t sizeX, size_t sizeY)
|
||||
: id(id),
|
||||
posX(posX),
|
||||
posY(posY),
|
||||
sizeX(sizeX),
|
||||
sizeY(sizeY) {}
|
||||
};
|
||||
|
||||
/**
|
||||
@ -366,9 +372,6 @@ struct ROI {
|
||||
*
|
||||
* @return A newly created TensorDesc object representing ROI.
|
||||
*/
|
||||
INFERENCE_ENGINE_API_CPP(TensorDesc) make_roi_desc(
|
||||
const TensorDesc& origDesc,
|
||||
const ROI& roi,
|
||||
bool useOrigMemDesc);
|
||||
INFERENCE_ENGINE_API_CPP(TensorDesc) make_roi_desc(const TensorDesc& origDesc, const ROI& roi, bool useOrigMemDesc);
|
||||
|
||||
} // namespace InferenceEngine
|
||||
|
@ -43,7 +43,10 @@ public:
|
||||
* @param offsetInBytes Offset in originally locked region
|
||||
*/
|
||||
LockedMemoryBase(IAllocator* ptr, void* handle, LockOp lockFlag, size_t offsetInBytes)
|
||||
: _allocator(ptr), _handle(handle), _lockFlag(lockFlag), _offset(offsetInBytes) {}
|
||||
: _allocator(ptr),
|
||||
_handle(handle),
|
||||
_lockFlag(lockFlag),
|
||||
_offset(offsetInBytes) {}
|
||||
|
||||
/**
|
||||
* @brief A copy constructor
|
||||
@ -51,7 +54,10 @@ public:
|
||||
* @param that An rvalue reference for the other LockedMemoryBase instance
|
||||
*/
|
||||
LockedMemoryBase(LockedMemoryBase&& that) noexcept
|
||||
: _allocator(that._allocator), _handle(that._handle), _lockFlag(that._lockFlag), _offset(that._offset) {
|
||||
: _allocator(that._allocator),
|
||||
_handle(that._handle),
|
||||
_lockFlag(that._lockFlag),
|
||||
_offset(that._offset) {
|
||||
that._locked = nullptr;
|
||||
}
|
||||
|
||||
@ -86,7 +92,8 @@ protected:
|
||||
* @return The pointer to the locked object, nullptr otherwise
|
||||
*/
|
||||
virtual T* dereference() const {
|
||||
if (_locked != nullptr) return _locked;
|
||||
if (_locked != nullptr)
|
||||
return _locked;
|
||||
|
||||
if (_allocator == nullptr) {
|
||||
return nullptr;
|
||||
@ -134,7 +141,7 @@ public:
|
||||
* @param that Rvalue reference for the other LockedMemoryBase instance
|
||||
* @param offset Offset value
|
||||
*/
|
||||
LockedMemory(LockedMemory<T>&& that, size_t offset): base(std::move(that)) {
|
||||
LockedMemory(LockedMemory<T>&& that, size_t offset) : base(std::move(that)) {
|
||||
base::_offset = offset;
|
||||
}
|
||||
|
||||
@ -242,7 +249,7 @@ public:
|
||||
* @param that Rvalue reference for the other LockedMemoryBase instance
|
||||
* @param offset Offset value
|
||||
*/
|
||||
LockedMemory(LockedMemory<void>&& that, size_t offset): base(std::move(that)) {
|
||||
LockedMemory(LockedMemory<void>&& that, size_t offset) : base(std::move(that)) {
|
||||
base::_offset = offset;
|
||||
}
|
||||
|
||||
@ -326,7 +333,7 @@ public:
|
||||
* @param handle Handle provided by allocator
|
||||
* @param offset Offset in bytes in originally locked region
|
||||
*/
|
||||
LockedMemory(IAllocator* ptr, void* handle, size_t offset): base(ptr, handle, LOCK_FOR_READ, offset) {}
|
||||
LockedMemory(IAllocator* ptr, void* handle, size_t offset) : base(ptr, handle, LOCK_FOR_READ, offset) {}
|
||||
|
||||
/**
|
||||
* @brief A default copy constructor that accepts rvalue
|
||||
@ -341,7 +348,7 @@ public:
|
||||
* @param that Rvalue reference for the other LockedMemoryBase instance
|
||||
* @param offset Offset value
|
||||
*/
|
||||
LockedMemory(LockedMemory<const T>&& that, size_t offset): base(std::move(that)) {
|
||||
LockedMemory(LockedMemory<const T>&& that, size_t offset) : base(std::move(that)) {
|
||||
base::_offset = offset;
|
||||
}
|
||||
|
||||
|
@ -17,33 +17,33 @@
|
||||
#include <cstddef>
|
||||
#include <type_traits>
|
||||
|
||||
#define IE_THREAD_TBB 0
|
||||
#define IE_THREAD_OMP 1
|
||||
#define IE_THREAD_SEQ 2
|
||||
#define IE_THREAD_TBB 0
|
||||
#define IE_THREAD_OMP 1
|
||||
#define IE_THREAD_SEQ 2
|
||||
#define IE_THREAD_TBB_AUTO 3
|
||||
|
||||
#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
|
||||
#ifndef NOMINMAX
|
||||
# define NOMINMAX
|
||||
#endif
|
||||
#ifndef TBB_PREVIEW_LOCAL_OBSERVER
|
||||
# define TBB_PREVIEW_LOCAL_OBSERVER 1
|
||||
#endif
|
||||
#ifndef TBB_PREVIEW_NUMA_SUPPORT
|
||||
# define TBB_PREVIEW_NUMA_SUPPORT 1
|
||||
#endif
|
||||
#ifndef TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION
|
||||
# define TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION 1
|
||||
#endif
|
||||
# ifndef NOMINMAX
|
||||
# define NOMINMAX
|
||||
# endif
|
||||
# ifndef TBB_PREVIEW_LOCAL_OBSERVER
|
||||
# define TBB_PREVIEW_LOCAL_OBSERVER 1
|
||||
# endif
|
||||
# ifndef TBB_PREVIEW_NUMA_SUPPORT
|
||||
# define TBB_PREVIEW_NUMA_SUPPORT 1
|
||||
# endif
|
||||
# ifndef TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION
|
||||
# define TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION 1
|
||||
# endif
|
||||
|
||||
#include "tbb/blocked_range.h"
|
||||
#include "tbb/blocked_range2d.h"
|
||||
#include "tbb/blocked_range3d.h"
|
||||
#include "tbb/parallel_for.h"
|
||||
#include "tbb/parallel_reduce.h"
|
||||
#include "tbb/parallel_sort.h"
|
||||
#include "tbb/task_arena.h"
|
||||
#include "tbb/task_scheduler_observer.h"
|
||||
# include "tbb/blocked_range.h"
|
||||
# include "tbb/blocked_range2d.h"
|
||||
# include "tbb/blocked_range3d.h"
|
||||
# include "tbb/parallel_for.h"
|
||||
# include "tbb/parallel_reduce.h"
|
||||
# include "tbb/parallel_sort.h"
|
||||
# include "tbb/task_arena.h"
|
||||
# include "tbb/task_scheduler_observer.h"
|
||||
|
||||
inline int parallel_get_max_threads() {
|
||||
return tbb::this_task_arena::max_concurrency();
|
||||
@ -60,31 +60,31 @@ inline void parallel_set_num_threads(int) {
|
||||
inline int parallel_get_env_threads() {
|
||||
return 0;
|
||||
}
|
||||
#if IE_THREAD == IE_THREAD_TBB
|
||||
#define PARTITIONING , tbb::static_partitioner()
|
||||
# if IE_THREAD == IE_THREAD_TBB
|
||||
# define PARTITIONING , tbb::static_partitioner()
|
||||
|
||||
// The TBB version less than 2018u1 has no static_partitioner argument for
|
||||
// tbb::parallel_deterministic_reduce. So will fallback to non deterministic version.
|
||||
#if (TBB_INTERFACE_VERSION >= 10001)
|
||||
#define _TBB_REDUCE_FUNC tbb::parallel_deterministic_reduce
|
||||
#else
|
||||
#define _TBB_REDUCE_FUNC tbb::parallel_reduce
|
||||
#endif
|
||||
# if (TBB_INTERFACE_VERSION >= 10001)
|
||||
# define _TBB_REDUCE_FUNC tbb::parallel_deterministic_reduce
|
||||
# else
|
||||
# define _TBB_REDUCE_FUNC tbb::parallel_reduce
|
||||
# endif
|
||||
|
||||
#else
|
||||
#define PARTITIONING
|
||||
#endif
|
||||
# else
|
||||
# define PARTITIONING
|
||||
# endif
|
||||
#elif IE_THREAD == IE_THREAD_OMP
|
||||
#include <omp.h>
|
||||
# include <omp.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdlib>
|
||||
#include <string>
|
||||
# include <algorithm>
|
||||
# include <cstdlib>
|
||||
# include <string>
|
||||
|
||||
/* MSVC still supports omp 2.0 only */
|
||||
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
|
||||
#define collapse(x)
|
||||
#endif // defined(_MSC_VER) && !defined(__INTEL_COMPILER)
|
||||
# if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
|
||||
# define collapse(x)
|
||||
# endif // defined(_MSC_VER) && !defined(__INTEL_COMPILER)
|
||||
inline int parallel_get_max_threads() {
|
||||
return omp_get_max_threads();
|
||||
}
|
||||
@ -110,7 +110,7 @@ inline int parallel_get_env_threads() {
|
||||
}
|
||||
|
||||
#elif IE_THREAD == IE_THREAD_SEQ
|
||||
#include <algorithm> // NOLINT
|
||||
# include <algorithm> // NOLINT
|
||||
inline int parallel_get_env_threads() {
|
||||
return 1;
|
||||
}
|
||||
@ -133,7 +133,8 @@ namespace InferenceEngine {
|
||||
template <typename F>
|
||||
void parallel_nt(int nthr, const F& func) {
|
||||
#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
|
||||
if (nthr == 0) nthr = parallel_get_max_threads();
|
||||
if (nthr == 0)
|
||||
nthr = parallel_get_max_threads();
|
||||
if (nthr == 1) {
|
||||
func(0, 1);
|
||||
return;
|
||||
@ -148,7 +149,7 @@ void parallel_nt(int nthr, const F& func) {
|
||||
return;
|
||||
}
|
||||
|
||||
#pragma omp parallel num_threads(nthr)
|
||||
# pragma omp parallel num_threads(nthr)
|
||||
func(parallel_get_thread_num(), parallel_get_num_threads());
|
||||
#elif IE_THREAD == IE_THREAD_SEQ
|
||||
func(0, 1);
|
||||
@ -168,18 +169,20 @@ void parallel_nt_static(int nthr, const F& func) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (nthr == 0) nthr = parallel_get_max_threads();
|
||||
if (nthr == 0)
|
||||
nthr = parallel_get_max_threads();
|
||||
#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
|
||||
tbb::parallel_for(
|
||||
0, nthr,
|
||||
0,
|
||||
nthr,
|
||||
[&](int ithr) {
|
||||
func(ithr, nthr);
|
||||
},
|
||||
tbb::static_partitioner {});
|
||||
tbb::static_partitioner{});
|
||||
|
||||
#elif IE_THREAD == IE_THREAD_OMP
|
||||
|
||||
#pragma omp parallel num_threads(nthr)
|
||||
# pragma omp parallel num_threads(nthr)
|
||||
{ func(parallel_get_thread_num(), parallel_get_num_threads()); }
|
||||
#endif
|
||||
}
|
||||
@ -200,10 +203,12 @@ template <typename T0, typename R, typename F>
|
||||
R parallel_sum(const T0& D0, const R& input, const F& func) {
|
||||
#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
|
||||
return _TBB_REDUCE_FUNC(
|
||||
tbb::blocked_range<T0>(0, D0), input,
|
||||
tbb::blocked_range<T0>(0, D0),
|
||||
input,
|
||||
[&](const tbb::blocked_range<T0>& r, R init) -> R {
|
||||
R sum = init;
|
||||
for (T0 dim1 = r.begin(); dim1 < r.end(); ++dim1) sum += func(dim1);
|
||||
for (T0 dim1 = r.begin(); dim1 < r.end(); ++dim1)
|
||||
sum += func(dim1);
|
||||
return sum;
|
||||
},
|
||||
[](R x, R y) -> R {
|
||||
@ -212,15 +217,15 @@ R parallel_sum(const T0& D0, const R& input, const F& func) {
|
||||
#else
|
||||
R sum = input;
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# ifdef _MSC_VER
|
||||
using T0_IT = typename std::make_signed<T0>::type;
|
||||
#else
|
||||
# else
|
||||
using T0_IT = T0;
|
||||
#endif
|
||||
# endif
|
||||
|
||||
#if IE_THREAD == IE_THREAD_OMP
|
||||
#pragma omp parallel for reduction(+ : sum) schedule(static)
|
||||
#endif
|
||||
# if IE_THREAD == IE_THREAD_OMP
|
||||
# pragma omp parallel for reduction(+ : sum) schedule(static)
|
||||
# endif
|
||||
for (T0_IT dim1 = 0; dim1 < static_cast<T0_IT>(D0); dim1++) {
|
||||
sum += static_cast<R>(func(dim1));
|
||||
}
|
||||
@ -232,7 +237,8 @@ template <typename T0, typename T1, typename R, typename F>
|
||||
R parallel_sum2d(const T0& D0, const T1& D1, const R& input, const F& func) {
|
||||
#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
|
||||
return _TBB_REDUCE_FUNC(
|
||||
tbb::blocked_range2d<T0, T1>(0, D0, 0, D1), input,
|
||||
tbb::blocked_range2d<T0, T1>(0, D0, 0, D1),
|
||||
input,
|
||||
[&](const tbb::blocked_range2d<T0, T1>& r, R init) -> R {
|
||||
R sum = init;
|
||||
for (T0 dim2 = r.rows().begin(); dim2 < r.rows().end(); dim2++) {
|
||||
@ -248,17 +254,17 @@ R parallel_sum2d(const T0& D0, const T1& D1, const R& input, const F& func) {
|
||||
#else
|
||||
R sum = input;
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# ifdef _MSC_VER
|
||||
using T0_IT = typename std::make_signed<T0>::type;
|
||||
using T1_IT = typename std::make_signed<T1>::type;
|
||||
#else
|
||||
# else
|
||||
using T0_IT = T0;
|
||||
using T1_IT = T1;
|
||||
#endif
|
||||
# endif
|
||||
|
||||
#if IE_THREAD == IE_THREAD_OMP
|
||||
#pragma omp parallel for collapse(2) reduction(+ : sum) schedule(static)
|
||||
#endif
|
||||
# if IE_THREAD == IE_THREAD_OMP
|
||||
# pragma omp parallel for collapse(2) reduction(+ : sum) schedule(static)
|
||||
# endif
|
||||
for (T0_IT dim2 = 0; dim2 < D0; dim2++) {
|
||||
for (T1_IT dim1 = 0; dim1 < D1; dim1++) {
|
||||
sum += func(dim2, dim1);
|
||||
@ -271,7 +277,8 @@ template <typename T0, typename T1, typename T2, typename R, typename F>
|
||||
R parallel_sum3d(const T0& D0, const T1& D1, const T2& D2, const R& input, const F& func) {
|
||||
#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
|
||||
return _TBB_REDUCE_FUNC(
|
||||
tbb::blocked_range3d<T0, T1, T2>(0, D0, 0, D1, 0, D2), input,
|
||||
tbb::blocked_range3d<T0, T1, T2>(0, D0, 0, D1, 0, D2),
|
||||
input,
|
||||
[&](const tbb::blocked_range3d<T0, T1, T2>& r, R init) -> R {
|
||||
R sum = init;
|
||||
for (T0 dim1 = r.pages().begin(); dim1 < r.pages().end(); dim1++) {
|
||||
@ -289,19 +296,19 @@ R parallel_sum3d(const T0& D0, const T1& D1, const T2& D2, const R& input, const
|
||||
#else
|
||||
R sum = input;
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# ifdef _MSC_VER
|
||||
using T0_IT = typename std::make_signed<T0>::type;
|
||||
using T1_IT = typename std::make_signed<T1>::type;
|
||||
using T2_IT = typename std::make_signed<T2>::type;
|
||||
#else
|
||||
# else
|
||||
using T0_IT = T0;
|
||||
using T1_IT = T1;
|
||||
using T2_IT = T2;
|
||||
#endif
|
||||
# endif
|
||||
|
||||
#if IE_THREAD == IE_THREAD_OMP
|
||||
#pragma omp parallel for collapse(3) reduction(+ : sum) schedule(static)
|
||||
#endif
|
||||
# if IE_THREAD == IE_THREAD_OMP
|
||||
# pragma omp parallel for collapse(3) reduction(+ : sum) schedule(static)
|
||||
# endif
|
||||
for (T0_IT dim1 = 0; dim1 < static_cast<T0_IT>(D0); dim1++) {
|
||||
for (T1_IT dim2 = 0; dim2 < static_cast<T1_IT>(D1); dim2++) {
|
||||
for (T2_IT dim3 = 0; dim3 < static_cast<T2_IT>(D2); dim3++) {
|
||||
@ -353,31 +360,28 @@ inline void splitter(const T& n, const Q& team, const Q& tid, T& n_start, T& n_e
|
||||
}
|
||||
|
||||
namespace details {
|
||||
template<typename T>
|
||||
struct num_of_lambda_args : public num_of_lambda_args<decltype(&T::operator())> {
|
||||
};
|
||||
template <typename T>
|
||||
struct num_of_lambda_args : public num_of_lambda_args<decltype(&T::operator())> {};
|
||||
|
||||
template<typename C, typename R, typename... Args>
|
||||
struct num_of_lambda_args<R(C::*)(Args...) const> {
|
||||
constexpr static int value = sizeof...(Args);
|
||||
};
|
||||
template <typename C, typename R, typename... Args>
|
||||
struct num_of_lambda_args<R (C::*)(Args...) const> {
|
||||
constexpr static int value = sizeof...(Args);
|
||||
};
|
||||
|
||||
template<typename ACT, typename ...T, size_t N_ARGS = num_of_lambda_args<ACT>::value>
|
||||
typename std::enable_if<N_ARGS == sizeof...(T) + 1, void>::type
|
||||
call_with_args(ACT body, size_t g_id, T ...arg) {
|
||||
body(g_id, arg...);
|
||||
}
|
||||
template <typename ACT, typename... T, size_t N_ARGS = num_of_lambda_args<ACT>::value>
|
||||
typename std::enable_if<N_ARGS == sizeof...(T) + 1, void>::type call_with_args(ACT body, size_t g_id, T... arg) {
|
||||
body(g_id, arg...);
|
||||
}
|
||||
|
||||
template<typename ACT, typename ...T, size_t N_ARGS = num_of_lambda_args<ACT>::value>
|
||||
typename std::enable_if<N_ARGS == sizeof...(T), void>::type
|
||||
call_with_args(ACT body, size_t g_id, T ...arg) {
|
||||
body(arg...);
|
||||
}
|
||||
template <typename ACT, typename... T, size_t N_ARGS = num_of_lambda_args<ACT>::value>
|
||||
typename std::enable_if<N_ARGS == sizeof...(T), void>::type call_with_args(ACT body, size_t g_id, T... arg) {
|
||||
body(arg...);
|
||||
}
|
||||
} // namespace details
|
||||
|
||||
template <typename T0, typename F>
|
||||
void for_1d(const int& ithr, const int& nthr, const T0& D0, const F& func) {
|
||||
T0 d0 {0}, end {0};
|
||||
T0 d0{0}, end{0};
|
||||
splitter(D0, nthr, ithr, d0, end);
|
||||
for (; d0 < end; ++d0)
|
||||
details::call_with_args(func, ithr, d0);
|
||||
@ -388,12 +392,14 @@ void parallel_for(const T0& D0, const F& func) {
|
||||
#if IE_THREAD == IE_THREAD_TBB
|
||||
auto work_amount = static_cast<size_t>(D0);
|
||||
int nthr = parallel_get_max_threads();
|
||||
if (static_cast<size_t>(nthr) > work_amount) nthr = static_cast<int>(work_amount);
|
||||
if (static_cast<size_t>(nthr) > work_amount)
|
||||
nthr = static_cast<int>(work_amount);
|
||||
if (nthr == 1) {
|
||||
for_1d(0, 1, D0, func);
|
||||
} else {
|
||||
tbb::parallel_for(
|
||||
0, nthr,
|
||||
0,
|
||||
nthr,
|
||||
[&](int ithr) {
|
||||
for_1d(ithr, nthr, D0, func);
|
||||
},
|
||||
@ -405,7 +411,7 @@ void parallel_for(const T0& D0, const F& func) {
|
||||
for_1d(ithr, nthr, D0, func);
|
||||
});
|
||||
#elif IE_THREAD == IE_THREAD_OMP
|
||||
#pragma omp parallel
|
||||
# pragma omp parallel
|
||||
for_1d(parallel_get_thread_num(), parallel_get_num_threads(), D0, func);
|
||||
#elif IE_THREAD == IE_THREAD_SEQ
|
||||
for_1d(0, 1, D0, func);
|
||||
@ -415,12 +421,13 @@ void parallel_for(const T0& D0, const F& func) {
|
||||
template <typename T0, typename T1, typename F>
|
||||
void for_2d(const int& ithr, const int& nthr, const T0& D0, const T1& D1, const F& func) {
|
||||
const size_t work_amount = (size_t)D0 * D1;
|
||||
if (work_amount == 0) return;
|
||||
size_t start {0}, end {0};
|
||||
if (work_amount == 0)
|
||||
return;
|
||||
size_t start{0}, end{0};
|
||||
splitter(work_amount, nthr, ithr, start, end);
|
||||
|
||||
T0 d0 {0};
|
||||
T1 d1 {0};
|
||||
T0 d0{0};
|
||||
T1 d1{0};
|
||||
parallel_it_init(start, d0, D0, d1, D1);
|
||||
for (size_t iwork = start; iwork < end; ++iwork) {
|
||||
details::call_with_args(func, ithr, d0, d1);
|
||||
@ -433,12 +440,14 @@ void parallel_for2d(const T0& D0, const T1& D1, const F& func) {
|
||||
#if IE_THREAD == IE_THREAD_TBB
|
||||
auto work_amount = static_cast<size_t>(D0 * D1);
|
||||
int nthr = parallel_get_max_threads();
|
||||
if (static_cast<size_t>(nthr) > work_amount) nthr = static_cast<int>(work_amount);
|
||||
if (static_cast<size_t>(nthr) > work_amount)
|
||||
nthr = static_cast<int>(work_amount);
|
||||
if (nthr == 1) {
|
||||
for_2d(0, 1, D0, D1, func);
|
||||
} else {
|
||||
tbb::parallel_for(
|
||||
0, nthr,
|
||||
0,
|
||||
nthr,
|
||||
[&](int ithr) {
|
||||
for_2d(ithr, nthr, D0, D1, func);
|
||||
},
|
||||
@ -450,7 +459,7 @@ void parallel_for2d(const T0& D0, const T1& D1, const F& func) {
|
||||
for_2d(ithr, nthr, D0, D1, func);
|
||||
});
|
||||
#elif IE_THREAD == IE_THREAD_OMP
|
||||
#pragma omp parallel
|
||||
# pragma omp parallel
|
||||
for_2d(parallel_get_thread_num(), parallel_get_num_threads(), D0, D1, func);
|
||||
#elif IE_THREAD == IE_THREAD_SEQ
|
||||
for_2d(0, 1, D0, D1, func);
|
||||
@ -460,13 +469,14 @@ void parallel_for2d(const T0& D0, const T1& D1, const F& func) {
|
||||
template <typename T0, typename T1, typename T2, typename F>
|
||||
void for_3d(const int& ithr, const int& nthr, const T0& D0, const T1& D1, const T2& D2, const F& func) {
|
||||
const size_t work_amount = (size_t)D0 * D1 * D2;
|
||||
if (work_amount == 0) return;
|
||||
size_t start {0}, end {0};
|
||||
if (work_amount == 0)
|
||||
return;
|
||||
size_t start{0}, end{0};
|
||||
splitter(work_amount, nthr, ithr, start, end);
|
||||
|
||||
T0 d0 {0};
|
||||
T1 d1 {0};
|
||||
T2 d2 {0};
|
||||
T0 d0{0};
|
||||
T1 d1{0};
|
||||
T2 d2{0};
|
||||
parallel_it_init(start, d0, D0, d1, D1, d2, D2);
|
||||
for (size_t iwork = start; iwork < end; ++iwork) {
|
||||
details::call_with_args(func, ithr, d0, d1, d2);
|
||||
@ -479,12 +489,14 @@ void parallel_for3d(const T0& D0, const T1& D1, const T2& D2, const F& func) {
|
||||
#if IE_THREAD == IE_THREAD_TBB
|
||||
auto work_amount = static_cast<size_t>(D0 * D1 * D2);
|
||||
int nthr = parallel_get_max_threads();
|
||||
if (static_cast<size_t>(nthr) > work_amount) nthr = static_cast<int>(work_amount);
|
||||
if (static_cast<size_t>(nthr) > work_amount)
|
||||
nthr = static_cast<int>(work_amount);
|
||||
if (nthr == 1) {
|
||||
for_3d(0, 1, D0, D1, D2, func);
|
||||
} else {
|
||||
tbb::parallel_for(
|
||||
0, nthr,
|
||||
0,
|
||||
nthr,
|
||||
[&](int ithr) {
|
||||
for_3d(ithr, nthr, D0, D1, D2, func);
|
||||
},
|
||||
@ -496,7 +508,7 @@ void parallel_for3d(const T0& D0, const T1& D1, const T2& D2, const F& func) {
|
||||
for_3d(ithr, nthr, D0, D1, D2, func);
|
||||
});
|
||||
#elif IE_THREAD == IE_THREAD_OMP
|
||||
#pragma omp parallel
|
||||
# pragma omp parallel
|
||||
for_3d(parallel_get_thread_num(), parallel_get_num_threads(), D0, D1, D2, func);
|
||||
#elif IE_THREAD == IE_THREAD_SEQ
|
||||
for_3d(0, 1, D0, D1, D2, func);
|
||||
@ -506,14 +518,15 @@ void parallel_for3d(const T0& D0, const T1& D1, const T2& D2, const F& func) {
|
||||
template <typename T0, typename T1, typename T2, typename T3, typename F>
|
||||
void for_4d(const int& ithr, const int& nthr, const T0& D0, const T1& D1, const T2& D2, const T3& D3, const F& func) {
|
||||
const size_t work_amount = (size_t)D0 * D1 * D2 * D3;
|
||||
if (work_amount == 0) return;
|
||||
size_t start {0}, end {0};
|
||||
if (work_amount == 0)
|
||||
return;
|
||||
size_t start{0}, end{0};
|
||||
splitter(work_amount, nthr, ithr, start, end);
|
||||
|
||||
T0 d0 {0};
|
||||
T1 d1 {0};
|
||||
T2 d2 {0};
|
||||
T3 d3 {0};
|
||||
T0 d0{0};
|
||||
T1 d1{0};
|
||||
T2 d2{0};
|
||||
T3 d3{0};
|
||||
parallel_it_init(start, d0, D0, d1, D1, d2, D2, d3, D3);
|
||||
for (size_t iwork = start; iwork < end; ++iwork) {
|
||||
details::call_with_args(func, ithr, d0, d1, d2, d3);
|
||||
@ -526,12 +539,14 @@ void parallel_for4d(const T0& D0, const T1& D1, const T2& D2, const T3& D3, cons
|
||||
#if IE_THREAD == IE_THREAD_TBB
|
||||
auto work_amount = static_cast<size_t>(D0 * D1 * D2 * D3);
|
||||
int nthr = parallel_get_max_threads();
|
||||
if (static_cast<size_t>(nthr) > work_amount) nthr = static_cast<int>(work_amount);
|
||||
if (static_cast<size_t>(nthr) > work_amount)
|
||||
nthr = static_cast<int>(work_amount);
|
||||
if (nthr == 1) {
|
||||
for_4d(0, 1, D0, D1, D2, D3, func);
|
||||
} else {
|
||||
tbb::parallel_for(
|
||||
0, nthr,
|
||||
0,
|
||||
nthr,
|
||||
[&](int ithr) {
|
||||
for_4d(ithr, nthr, D0, D1, D2, D3, func);
|
||||
},
|
||||
@ -543,7 +558,7 @@ void parallel_for4d(const T0& D0, const T1& D1, const T2& D2, const T3& D3, cons
|
||||
for_4d(ithr, nthr, D0, D1, D2, D3, func);
|
||||
});
|
||||
#elif IE_THREAD == IE_THREAD_OMP
|
||||
#pragma omp parallel
|
||||
# pragma omp parallel
|
||||
for_4d(parallel_get_thread_num(), parallel_get_num_threads(), D0, D1, D2, D3, func);
|
||||
#elif IE_THREAD == IE_THREAD_SEQ
|
||||
for_4d(0, 1, D0, D1, D2, D3, func);
|
||||
@ -551,18 +566,25 @@ void parallel_for4d(const T0& D0, const T1& D1, const T2& D2, const T3& D3, cons
|
||||
}
|
||||
|
||||
template <typename T0, typename T1, typename T2, typename T3, typename T4, typename F>
|
||||
void for_5d(const int& ithr, const int& nthr, const T0& D0, const T1& D1, const T2& D2, const T3& D3, const T4& D4,
|
||||
void for_5d(const int& ithr,
|
||||
const int& nthr,
|
||||
const T0& D0,
|
||||
const T1& D1,
|
||||
const T2& D2,
|
||||
const T3& D3,
|
||||
const T4& D4,
|
||||
const F& func) {
|
||||
const size_t work_amount = (size_t)D0 * D1 * D2 * D3 * D4;
|
||||
if (work_amount == 0) return;
|
||||
size_t start {0}, end {0};
|
||||
if (work_amount == 0)
|
||||
return;
|
||||
size_t start{0}, end{0};
|
||||
splitter(work_amount, nthr, ithr, start, end);
|
||||
|
||||
T0 d0 {0};
|
||||
T1 d1 {0};
|
||||
T2 d2 {0};
|
||||
T3 d3 {0};
|
||||
T4 d4 {0};
|
||||
T0 d0{0};
|
||||
T1 d1{0};
|
||||
T2 d2{0};
|
||||
T3 d3{0};
|
||||
T4 d4{0};
|
||||
parallel_it_init(start, d0, D0, d1, D1, d2, D2, d3, D3, d4, D4);
|
||||
for (size_t iwork = start; iwork < end; ++iwork) {
|
||||
details::call_with_args(func, ithr, d0, d1, d2, d3, d4);
|
||||
@ -575,12 +597,14 @@ void parallel_for5d(const T0& D0, const T1& D1, const T2& D2, const T3& D3, cons
|
||||
#if IE_THREAD == IE_THREAD_TBB
|
||||
auto work_amount = static_cast<size_t>(D0 * D1 * D2 * D3 * D4);
|
||||
int nthr = parallel_get_max_threads();
|
||||
if (static_cast<size_t>(nthr) > work_amount) nthr = static_cast<int>(work_amount);
|
||||
if (static_cast<size_t>(nthr) > work_amount)
|
||||
nthr = static_cast<int>(work_amount);
|
||||
if (nthr == 1) {
|
||||
for_5d(0, 1, D0, D1, D2, D3, D4, func);
|
||||
} else {
|
||||
tbb::parallel_for(
|
||||
0, nthr,
|
||||
0,
|
||||
nthr,
|
||||
[&](int ithr) {
|
||||
for_5d(ithr, nthr, D0, D1, D2, D3, D4, func);
|
||||
},
|
||||
@ -592,7 +616,7 @@ void parallel_for5d(const T0& D0, const T1& D1, const T2& D2, const T3& D3, cons
|
||||
for_5d(ithr, nthr, D0, D1, D2, D3, D4, func);
|
||||
});
|
||||
#elif IE_THREAD == IE_THREAD_OMP
|
||||
#pragma omp parallel
|
||||
# pragma omp parallel
|
||||
for_5d(parallel_get_thread_num(), parallel_get_num_threads(), D0, D1, D2, D3, D4, func);
|
||||
#elif IE_THREAD == IE_THREAD_SEQ
|
||||
for_5d(0, 1, D0, D1, D2, D3, D4, func);
|
||||
|
@ -72,7 +72,7 @@ public:
|
||||
*
|
||||
* @param str char array
|
||||
*/
|
||||
Parameter(const char* str): Parameter(std::string(str)) {} // NOLINT
|
||||
Parameter(const char* str) : Parameter(std::string(str)) {} // NOLINT
|
||||
|
||||
/**
|
||||
* @brief Destructor
|
||||
@ -89,7 +89,8 @@ public:
|
||||
return *this;
|
||||
}
|
||||
clear();
|
||||
if (!parameter.empty()) ptr = parameter.ptr->copy();
|
||||
if (!parameter.empty())
|
||||
ptr = parameter.ptr->copy();
|
||||
return *this;
|
||||
}
|
||||
|
||||
@ -279,7 +280,7 @@ private:
|
||||
return id == typeid(T);
|
||||
}
|
||||
Any* copy() const override {
|
||||
return new RealData {get()};
|
||||
return new RealData{get()};
|
||||
}
|
||||
|
||||
T& get() & {
|
||||
@ -291,14 +292,12 @@ private:
|
||||
}
|
||||
|
||||
template <class U>
|
||||
typename std::enable_if<!HasOperatorEqual<U>::value, bool>::type
|
||||
equal(const Any& left, const Any& rhs) const {
|
||||
typename std::enable_if<!HasOperatorEqual<U>::value, bool>::type equal(const Any& left, const Any& rhs) const {
|
||||
IE_THROW() << "Parameter doesn't contain equal operator";
|
||||
}
|
||||
|
||||
template <class U>
|
||||
typename std::enable_if<HasOperatorEqual<U>::value, bool>::type
|
||||
equal(const Any& left, const Any& rhs) const {
|
||||
typename std::enable_if<HasOperatorEqual<U>::value, bool>::type equal(const Any& left, const Any& rhs) const {
|
||||
return dyn_cast<U>(&left) == dyn_cast<U>(&rhs);
|
||||
}
|
||||
|
||||
@ -306,13 +305,11 @@ private:
|
||||
return rhs.is(typeid(T)) && equal<T>(*this, rhs);
|
||||
}
|
||||
|
||||
template <class U>
|
||||
typename std::enable_if<!HasOutputStreamOperator<U>::value, void>::type
|
||||
print(std::ostream& stream, const U& object) const {}
|
||||
template <class U, typename std::enable_if<!HasOutputStreamOperator<U>::value, bool>::type = true>
|
||||
void print(std::ostream& stream, const U& object) const {}
|
||||
|
||||
template <class U>
|
||||
typename std::enable_if<HasOutputStreamOperator<U>::value, void>::type
|
||||
print(std::ostream& stream, const U& object) const {
|
||||
template <class U, typename std::enable_if<HasOutputStreamOperator<U>::value, bool>::type = true>
|
||||
void print(std::ostream& stream, const U& object) const {
|
||||
stream << object;
|
||||
}
|
||||
|
||||
@ -323,13 +320,15 @@ private:
|
||||
|
||||
template <typename T>
|
||||
static T& dyn_cast(Any* obj) {
|
||||
if (obj == nullptr) IE_THROW() << "Parameter is empty!";
|
||||
if (obj == nullptr)
|
||||
IE_THROW() << "Parameter is empty!";
|
||||
return dynamic_cast<RealData<T>&>(*obj).get();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static const T& dyn_cast(const Any* obj) {
|
||||
if (obj == nullptr) IE_THROW() << "Parameter is empty!";
|
||||
if (obj == nullptr)
|
||||
IE_THROW() << "Parameter is empty!";
|
||||
return dynamic_cast<const RealData<T>&>(*obj).get();
|
||||
}
|
||||
|
||||
@ -338,7 +337,7 @@ private:
|
||||
|
||||
/**
|
||||
* @brief An std::map object containing parameters
|
||||
*/
|
||||
*/
|
||||
using ParamMap = std::map<std::string, Parameter>;
|
||||
|
||||
#ifdef __ANDROID__
|
||||
|
@ -10,10 +10,10 @@
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
|
||||
#include "ie_precision.hpp"
|
||||
|
||||
@ -37,12 +37,11 @@ namespace Metrics {
|
||||
#define EXEC_NETWORK_METRIC_KEY(name) METRIC_KEY(name)
|
||||
|
||||
#ifndef DECLARE_METRIC_KEY_IMPL
|
||||
#define DECLARE_METRIC_KEY(name, ...) \
|
||||
static constexpr auto METRIC_##name = #name
|
||||
# define DECLARE_METRIC_KEY(name, ...) static constexpr auto METRIC_##name = # name
|
||||
#else
|
||||
#define DECLARE_METRIC_KEY(name, ...) \
|
||||
static constexpr auto METRIC_##name = #name; \
|
||||
DECLARE_METRIC_KEY_IMPL(name, __VA_ARGS__)
|
||||
# define DECLARE_METRIC_KEY(name, ...) \
|
||||
static constexpr auto METRIC_##name = #name; \
|
||||
DECLARE_METRIC_KEY_IMPL(name, __VA_ARGS__)
|
||||
#endif
|
||||
|
||||
#define DECLARE_EXEC_NETWORK_METRIC_KEY(name, ...) DECLARE_METRIC_KEY(name, __VA_ARGS__)
|
||||
@ -51,7 +50,7 @@ namespace Metrics {
|
||||
* @def METRIC_VALUE(name)
|
||||
* @brief shortcut for defining metric values
|
||||
*/
|
||||
#define METRIC_VALUE(name) InferenceEngine::Metrics::name
|
||||
#define METRIC_VALUE(name) InferenceEngine::Metrics::name
|
||||
#define DECLARE_METRIC_VALUE(name) static constexpr auto name = #name
|
||||
|
||||
/**
|
||||
@ -162,9 +161,15 @@ enum class DeviceType {
|
||||
/** @cond INTERNAL */
|
||||
inline std::ostream& operator<<(std::ostream& os, const InferenceEngine::Metrics::DeviceType& deviceType) {
|
||||
switch (deviceType) {
|
||||
case InferenceEngine::Metrics::DeviceType::discrete: os << "discrete"; break;
|
||||
case InferenceEngine::Metrics::DeviceType::integrated: os << "integrated"; break;
|
||||
default: os << "unknown"; break;
|
||||
case InferenceEngine::Metrics::DeviceType::discrete:
|
||||
os << "discrete";
|
||||
break;
|
||||
case InferenceEngine::Metrics::DeviceType::integrated:
|
||||
os << "integrated";
|
||||
break;
|
||||
default:
|
||||
os << "unknown";
|
||||
break;
|
||||
}
|
||||
|
||||
return os;
|
||||
@ -177,7 +182,8 @@ inline std::ostream& operator<<(std::ostream& os, const InferenceEngine::Metrics
|
||||
DECLARE_METRIC_KEY(DEVICE_TYPE, DeviceType);
|
||||
|
||||
/**
|
||||
* @brief Metric which defines Giga OPS per second count (GFLOPS or GIOPS) for a set of precisions supported by specified device
|
||||
* @brief Metric which defines Giga OPS per second count (GFLOPS or GIOPS) for a set of precisions supported by
|
||||
* specified device
|
||||
*/
|
||||
DECLARE_METRIC_KEY(DEVICE_GOPS, std::map<InferenceEngine::Precision, float>);
|
||||
|
||||
@ -212,15 +218,15 @@ namespace PluginConfigParams {
|
||||
* @def CONFIG_KEY(name)
|
||||
* @brief shortcut for defining configuration keys
|
||||
*/
|
||||
#define CONFIG_KEY(name) InferenceEngine::PluginConfigParams::_CONFIG_KEY(name)
|
||||
#define _CONFIG_KEY(name) KEY_##name
|
||||
#define CONFIG_KEY(name) InferenceEngine::PluginConfigParams::_CONFIG_KEY(name)
|
||||
#define _CONFIG_KEY(name) KEY_##name
|
||||
#define DECLARE_CONFIG_KEY(name) static constexpr auto _CONFIG_KEY(name) = #name
|
||||
|
||||
/**
|
||||
* @def CONFIG_VALUE(name)
|
||||
* @brief shortcut for defining configuration values
|
||||
*/
|
||||
#define CONFIG_VALUE(name) InferenceEngine::PluginConfigParams::name
|
||||
#define CONFIG_VALUE(name) InferenceEngine::PluginConfigParams::name
|
||||
#define DECLARE_CONFIG_VALUE(name) static constexpr auto name = #name
|
||||
|
||||
/**
|
||||
@ -239,13 +245,14 @@ DECLARE_CONFIG_KEY(CPU_THREADS_NUM);
|
||||
*
|
||||
* It is passed to Core::SetConfig(), this option should be used with values:
|
||||
* PluginConfigParams::NO (no pinning for CPU inference threads)
|
||||
* PluginConfigParams::YES, which is default on the conventional CPUs (pinning threads to cores, best for static benchmarks),
|
||||
* PluginConfigParams::YES, which is default on the conventional CPUs (pinning threads to cores, best for static
|
||||
* benchmarks),
|
||||
*
|
||||
* the following options are implemented only for the TBB as a threading option
|
||||
* PluginConfigParams::NUMA (pinning threads to NUMA nodes, best for real-life, contented cases)
|
||||
* on the Windows and MacOS* this option behaves as YES
|
||||
* PluginConfigParams::HYBRID_AWARE (let the runtime to do pinning to the cores types, e.g. prefer the "big" cores for latency tasks)
|
||||
* on the hybrid CPUs this option is default
|
||||
* PluginConfigParams::HYBRID_AWARE (let the runtime to do pinning to the cores types, e.g. prefer the "big" cores for
|
||||
* latency tasks) on the hybrid CPUs this option is default
|
||||
*
|
||||
* Also, the settings are ignored, if the OpenVINO compiled with OpenMP and any affinity-related OpenMP's
|
||||
* environment variable is set (as affinity is configured explicitly)
|
||||
@ -313,12 +320,15 @@ DECLARE_CONFIG_KEY(CONFIG_FILE);
|
||||
DECLARE_CONFIG_KEY(LOG_LEVEL);
|
||||
|
||||
DECLARE_CONFIG_VALUE(LOG_NONE); // turn off logging
|
||||
DECLARE_CONFIG_VALUE(LOG_ERROR); // error events that might still allow the application to continue running
|
||||
DECLARE_CONFIG_VALUE(LOG_WARNING); // potentially harmful situations which may further lead to ERROR
|
||||
DECLARE_CONFIG_VALUE(
|
||||
LOG_INFO); // informational messages that display the progress of the application at coarse-grained level
|
||||
DECLARE_CONFIG_VALUE(LOG_DEBUG); // fine-grained events that are most useful to debug an application.
|
||||
DECLARE_CONFIG_VALUE(LOG_TRACE); // finer-grained informational events than the DEBUG
|
||||
DECLARE_CONFIG_VALUE(LOG_ERROR); // error events that might still allow the
|
||||
// application to continue running
|
||||
DECLARE_CONFIG_VALUE(LOG_WARNING); // potentially harmful situations which may
|
||||
// further lead to ERROR
|
||||
DECLARE_CONFIG_VALUE(LOG_INFO); // informational messages that display the progress of the
|
||||
// application at coarse-grained level
|
||||
DECLARE_CONFIG_VALUE(LOG_DEBUG); // fine-grained events that are most useful to
|
||||
// debug an application.
|
||||
DECLARE_CONFIG_VALUE(LOG_TRACE); // finer-grained informational events than the DEBUG
|
||||
|
||||
/**
|
||||
* @brief the key for setting of required device to execute on
|
||||
@ -349,7 +359,6 @@ DECLARE_CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS);
|
||||
INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::ExecutableNetwork::GetExecGraphInfo::serialize method")
|
||||
DECLARE_CONFIG_KEY(DUMP_EXEC_GRAPH_AS_DOT);
|
||||
|
||||
|
||||
/**
|
||||
* @brief The name for setting to execute in bfloat16 precision whenever it is possible
|
||||
*
|
||||
|
@ -15,7 +15,6 @@
|
||||
|
||||
#include "ie_common.h"
|
||||
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
/**
|
||||
@ -230,10 +229,23 @@ public:
|
||||
static Precision FromStr(const std::string& str) {
|
||||
static const std::unordered_map<std::string, ePrecision> names = {
|
||||
#define PRECISION_NAME(s) {#s, s}
|
||||
PRECISION_NAME(Q78), PRECISION_NAME(BOOL), PRECISION_NAME(BF16),
|
||||
PRECISION_NAME(I4), PRECISION_NAME(I8), PRECISION_NAME(I16), PRECISION_NAME(I32), PRECISION_NAME(I64),
|
||||
PRECISION_NAME(U4), PRECISION_NAME(U8), PRECISION_NAME(U16), PRECISION_NAME(U32), PRECISION_NAME(U64),
|
||||
PRECISION_NAME(FP32), PRECISION_NAME(FP64), PRECISION_NAME(FP16), PRECISION_NAME(MIXED),
|
||||
PRECISION_NAME(Q78),
|
||||
PRECISION_NAME(BOOL),
|
||||
PRECISION_NAME(BF16),
|
||||
PRECISION_NAME(I4),
|
||||
PRECISION_NAME(I8),
|
||||
PRECISION_NAME(I16),
|
||||
PRECISION_NAME(I32),
|
||||
PRECISION_NAME(I64),
|
||||
PRECISION_NAME(U4),
|
||||
PRECISION_NAME(U8),
|
||||
PRECISION_NAME(U16),
|
||||
PRECISION_NAME(U32),
|
||||
PRECISION_NAME(U64),
|
||||
PRECISION_NAME(FP32),
|
||||
PRECISION_NAME(FP64),
|
||||
PRECISION_NAME(FP16),
|
||||
PRECISION_NAME(MIXED),
|
||||
PRECISION_NAME(BIN),
|
||||
#undef PRECISION_NAME
|
||||
};
|
||||
@ -292,12 +304,15 @@ protected:
|
||||
* @returns True if strings are the same
|
||||
*/
|
||||
static bool areSameStrings(const char* l, const char* r) noexcept {
|
||||
if (l == r) return true;
|
||||
if (l == r)
|
||||
return true;
|
||||
|
||||
if (l == nullptr || r == nullptr) return false;
|
||||
if (l == nullptr || r == nullptr)
|
||||
return false;
|
||||
|
||||
for (; *l && *r; l++, r++) {
|
||||
if (*l != *r) return false;
|
||||
if (*l != *r)
|
||||
return false;
|
||||
}
|
||||
return *l == *r;
|
||||
}
|
||||
@ -366,7 +381,7 @@ struct PrecisionTrait<Precision::BF16> {
|
||||
using value_type = int16_t;
|
||||
enum { is_float = true };
|
||||
};
|
||||
template<>
|
||||
template <>
|
||||
struct PrecisionTrait<Precision::Q78> {
|
||||
using value_type = uint16_t;
|
||||
enum { is_float = false };
|
||||
@ -484,7 +499,8 @@ inline std::ostream& operator<<(std::ostream& os, const std::vector<Precision>&
|
||||
}
|
||||
|
||||
inline constexpr uint32_t getPrecisionMask(
|
||||
InferenceEngine::Precision::ePrecision precision1, InferenceEngine::Precision::ePrecision precision2,
|
||||
InferenceEngine::Precision::ePrecision precision1,
|
||||
InferenceEngine::Precision::ePrecision precision2,
|
||||
InferenceEngine::Precision::ePrecision precision3 = InferenceEngine::Precision::MIXED,
|
||||
InferenceEngine::Precision::ePrecision precision4 = InferenceEngine::Precision::MIXED) {
|
||||
return (precision1) | (precision2 << 8) | (precision3 << 16) | (precision4 << 24);
|
||||
|
@ -3,8 +3,8 @@
|
||||
//
|
||||
|
||||
/**
|
||||
* @brief This header file provides structures to store info about pre-processing of network inputs (scale, mean image,
|
||||
* ...)
|
||||
* @brief This header file provides structures to store info about pre-processing of
|
||||
* network inputs (scale, mean image, ...)
|
||||
*
|
||||
* @file ie_preprocess.hpp
|
||||
*/
|
||||
@ -155,8 +155,7 @@ public:
|
||||
} else if (meanImage.get()->getTensorDesc().getDims().size() != 2) {
|
||||
IE_THROW() << "Failed to set invalid mean image for channel: number of dimensions != 2";
|
||||
} else if (channel >= _channelsInfo.size()) {
|
||||
IE_THROW() << "Channel " << channel
|
||||
<< " exceed number of PreProcess channels: " << _channelsInfo.size();
|
||||
IE_THROW() << "Channel " << channel << " exceed number of PreProcess channels: " << _channelsInfo.size();
|
||||
}
|
||||
_variant = MEAN_IMAGE;
|
||||
_channelsInfo[channel]->meanData = meanImage;
|
||||
|
@ -4,7 +4,7 @@
|
||||
|
||||
/**
|
||||
* @brief This is a header file for the IE RemoteContext and RemoteBlob classes
|
||||
*
|
||||
*
|
||||
* @file ie_remote_context.hpp
|
||||
*/
|
||||
#pragma once
|
||||
@ -44,7 +44,7 @@ public:
|
||||
* @brief Constructor. Creates an empty RemoteBlob object with the specified precision.
|
||||
* @param tensorDesc Defines the layout and dims of the blob
|
||||
*/
|
||||
explicit RemoteBlob(const TensorDesc& tensorDesc): MemoryBlob(tensorDesc) {}
|
||||
explicit RemoteBlob(const TensorDesc& tensorDesc) : MemoryBlob(tensorDesc) {}
|
||||
|
||||
/**
|
||||
* @brief Returns a map of device-specific parameters required for low-level
|
||||
@ -103,8 +103,8 @@ public:
|
||||
* @return true if this object can be dynamically cast to the type T*. Otherwise, false
|
||||
*/
|
||||
template <typename T,
|
||||
typename std::enable_if<!std::is_pointer<T>::value && !std::is_reference<T>::value, int>::type = 0,
|
||||
typename std::enable_if<std::is_base_of<RemoteContext, T>::value, int>::type = 0>
|
||||
typename std::enable_if<!std::is_pointer<T>::value && !std::is_reference<T>::value, int>::type = 0,
|
||||
typename std::enable_if<std::is_base_of<RemoteContext, T>::value, int>::type = 0>
|
||||
bool is() noexcept {
|
||||
return dynamic_cast<T*>(this) != nullptr;
|
||||
}
|
||||
@ -116,8 +116,8 @@ public:
|
||||
* @return true if this object can be dynamically cast to the type const T*. Otherwise, false
|
||||
*/
|
||||
template <typename T,
|
||||
typename std::enable_if<!std::is_pointer<T>::value && !std::is_reference<T>::value, int>::type = 0,
|
||||
typename std::enable_if<std::is_base_of<RemoteContext, T>::value, int>::type = 0>
|
||||
typename std::enable_if<!std::is_pointer<T>::value && !std::is_reference<T>::value, int>::type = 0,
|
||||
typename std::enable_if<std::is_base_of<RemoteContext, T>::value, int>::type = 0>
|
||||
bool is() const noexcept {
|
||||
return dynamic_cast<const T*>(this) != nullptr;
|
||||
}
|
||||
@ -129,9 +129,9 @@ public:
|
||||
* @return Raw pointer to the object of the type T or nullptr on error
|
||||
*/
|
||||
template <typename T,
|
||||
typename std::enable_if<!std::is_pointer<T>::value && !std::is_reference<T>::value, int>::type = 0,
|
||||
typename std::enable_if<std::is_base_of<RemoteContext, T>::value, int>::type = 0>
|
||||
T * as() noexcept {
|
||||
typename std::enable_if<!std::is_pointer<T>::value && !std::is_reference<T>::value, int>::type = 0,
|
||||
typename std::enable_if<std::is_base_of<RemoteContext, T>::value, int>::type = 0>
|
||||
T* as() noexcept {
|
||||
return dynamic_cast<T*>(this);
|
||||
}
|
||||
|
||||
@ -142,9 +142,9 @@ public:
|
||||
* @return Raw pointer to the object of the type const T or nullptr on error
|
||||
*/
|
||||
template <typename T,
|
||||
typename std::enable_if<!std::is_pointer<T>::value && !std::is_reference<T>::value, int>::type = 0,
|
||||
typename std::enable_if<std::is_base_of<RemoteContext, T>::value, int>::type = 0>
|
||||
const T * as() const noexcept {
|
||||
typename std::enable_if<!std::is_pointer<T>::value && !std::is_reference<T>::value, int>::type = 0,
|
||||
typename std::enable_if<std::is_base_of<RemoteContext, T>::value, int>::type = 0>
|
||||
const T* as() const noexcept {
|
||||
return dynamic_cast<const T*>(this);
|
||||
}
|
||||
|
||||
@ -190,4 +190,3 @@ inline RemoteBlob::Ptr make_shared_blob(const TensorDesc& desc, RemoteContext::P
|
||||
}
|
||||
|
||||
} // namespace InferenceEngine
|
||||
|
||||
|
@ -10,8 +10,8 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ie_api.h>
|
||||
#include <cpp/ie_cnn_network.h>
|
||||
#include "cpp/ie_cnn_network.h"
|
||||
#include "ie_api.h"
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
@ -56,7 +56,6 @@ INFERENCE_ENGINE_DEPRECATED("This transformation will be removed in 2023.1. "
|
||||
"Use InferenceEngine::lowLatency2 instead.")
|
||||
INFERENCE_ENGINE_API_CPP(void) LowLatency(InferenceEngine::CNNNetwork& network);
|
||||
|
||||
|
||||
/**
|
||||
* @brief The transformation finds all TensorIterator/Loop layers in the network,
|
||||
* processes all back edges that describe a connection between Result and Parameter
|
||||
@ -84,7 +83,6 @@ INFERENCE_ENGINE_API_CPP(void) LowLatency(InferenceEngine::CNNNetwork& network);
|
||||
If "false, then the transformation leaves existed initializing subgraph for ReadValue operation.
|
||||
* Loop operation by a given number. Does not affect TensorIterators.
|
||||
*/
|
||||
INFERENCE_ENGINE_API_CPP(void) lowLatency2(InferenceEngine::CNNNetwork& network,
|
||||
bool use_const_initializer = true);
|
||||
INFERENCE_ENGINE_API_CPP(void) lowLatency2(InferenceEngine::CNNNetwork& network, bool use_const_initializer = true);
|
||||
|
||||
} // namespace InferenceEngine
|
||||
} // namespace InferenceEngine
|
||||
|
@ -44,9 +44,9 @@ struct Version {
|
||||
*/
|
||||
struct ApiVersion {
|
||||
INFERENCE_ENGINE_DEPRECATED("Use IE_VERSION_[MAJOR|MINOR|PATCH] definitions, buildNumber property")
|
||||
int major; //!< A major version
|
||||
int major; //!< A major version
|
||||
INFERENCE_ENGINE_DEPRECATED("Use IE_VERSION_[MAJOR|MINOR|PATCH] definitions, buildNumber property")
|
||||
int minor; //!< A minor version
|
||||
int minor; //!< A minor version
|
||||
|
||||
/**
|
||||
* @brief A default construtor
|
||||
@ -60,7 +60,7 @@ struct Version {
|
||||
* @brief A default construtor
|
||||
* @param v A version to copy
|
||||
*/
|
||||
ApiVersion(const ApiVersion & v) {
|
||||
ApiVersion(const ApiVersion& v) {
|
||||
major = v.major;
|
||||
minor = v.minor;
|
||||
}
|
||||
|
@ -8,9 +8,9 @@
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include "ie_transformations.hpp"
|
||||
#include "ie_compound_blob.h"
|
||||
#include "ie_core.hpp"
|
||||
#include "ie_transformations.hpp"
|
||||
|
||||
// remove in 2022.1 major release
|
||||
#include <iostream>
|
||||
|
@ -26,7 +26,7 @@ namespace MultiDeviceConfigParams {
|
||||
*/
|
||||
#define MULTI_CONFIG_KEY(name) InferenceEngine::MultiDeviceConfigParams::_CONFIG_KEY(MULTI_##name)
|
||||
|
||||
#define DECLARE_MULTI_CONFIG_KEY(name) DECLARE_CONFIG_KEY(MULTI_##name)
|
||||
#define DECLARE_MULTI_CONFIG_KEY(name) DECLARE_CONFIG_KEY(MULTI_##name)
|
||||
#define DECLARE_MULTI_CONFIG_VALUE(name) DECLARE_CONFIG_VALUE(MULTI_##name)
|
||||
|
||||
/**
|
||||
|
@ -18,68 +18,68 @@ namespace InferenceEngine {
|
||||
namespace Metrics {
|
||||
|
||||
/**
|
||||
* @brief Metric to get a int of the device number, String value is METRIC_HDDL_DEVICE_NUM
|
||||
*/
|
||||
* @brief Metric to get a int of the device number, String value is METRIC_HDDL_DEVICE_NUM
|
||||
*/
|
||||
DECLARE_METRIC_KEY(HDDL_DEVICE_NUM, int);
|
||||
|
||||
/**
|
||||
* @brief Metric to get a std::vector<std::string> of device names, String value is METRIC_HDDL_DEVICE_NAME
|
||||
*/
|
||||
* @brief Metric to get a std::vector<std::string> of device names, String value is METRIC_HDDL_DEVICE_NAME
|
||||
*/
|
||||
DECLARE_METRIC_KEY(HDDL_DEVICE_NAME, std::vector<std::string>);
|
||||
|
||||
/**
|
||||
* @brief Metric to get a std::vector<float> of device thermal, String value is METRIC_HDDL_DEVICE_THERMAL
|
||||
*/
|
||||
* @brief Metric to get a std::vector<float> of device thermal, String value is METRIC_HDDL_DEVICE_THERMAL
|
||||
*/
|
||||
DECLARE_METRIC_KEY(HDDL_DEVICE_THERMAL, std::vector<float>);
|
||||
|
||||
/**
|
||||
* @brief Metric to get a std::vector<uint32> of device ids, String value is METRIC_HDDL_DEVICE_ID
|
||||
*/
|
||||
* @brief Metric to get a std::vector<uint32> of device ids, String value is METRIC_HDDL_DEVICE_ID
|
||||
*/
|
||||
DECLARE_METRIC_KEY(HDDL_DEVICE_ID, std::vector<unsigned int>);
|
||||
|
||||
/**
|
||||
* @brief Metric to get a std::vector<int> of device subclasses, String value is METRIC_HDDL_DEVICE_SUBCLASS
|
||||
*/
|
||||
* @brief Metric to get a std::vector<int> of device subclasses, String value is METRIC_HDDL_DEVICE_SUBCLASS
|
||||
*/
|
||||
DECLARE_METRIC_KEY(HDDL_DEVICE_SUBCLASS, std::vector<int>);
|
||||
|
||||
/**
|
||||
* @brief Metric to get a std::vector<uint32> of device total memory, String value is METRIC_HDDL_MEMORY_TOTAL
|
||||
*/
|
||||
* @brief Metric to get a std::vector<uint32> of device total memory, String value is METRIC_HDDL_MEMORY_TOTAL
|
||||
*/
|
||||
DECLARE_METRIC_KEY(HDDL_DEVICE_MEMORY_TOTAL, std::vector<unsigned int>);
|
||||
|
||||
/**
|
||||
* @brief Metric to get a std::vector<uint32> of device used memory, String value is METRIC_HDDL_DEVICE_MEMORY_USED
|
||||
*/
|
||||
* @brief Metric to get a std::vector<uint32> of device used memory, String value is METRIC_HDDL_DEVICE_MEMORY_USED
|
||||
*/
|
||||
DECLARE_METRIC_KEY(HDDL_DEVICE_MEMORY_USED, std::vector<unsigned int>);
|
||||
|
||||
/**
|
||||
* @brief Metric to get a std::vector<float> of device utilization, String value is METRIC_HDDL_DEVICE_UTILIZATION
|
||||
*/
|
||||
* @brief Metric to get a std::vector<float> of device utilization, String value is METRIC_HDDL_DEVICE_UTILIZATION
|
||||
*/
|
||||
DECLARE_METRIC_KEY(HDDL_DEVICE_UTILIZATION, std::vector<float>);
|
||||
|
||||
/**
|
||||
* @brief Metric to get a std::vector<std::string> of stream ids, String value is METRIC_HDDL_DEVICE_STREAM_ID
|
||||
*/
|
||||
* @brief Metric to get a std::vector<std::string> of stream ids, String value is METRIC_HDDL_DEVICE_STREAM_ID
|
||||
*/
|
||||
DECLARE_METRIC_KEY(HDDL_STREAM_ID, std::vector<std::string>);
|
||||
|
||||
/**
|
||||
* @brief Metric to get a std::vector<std::string> of device tags, String value is METRIC_HDDL_DEVICE_TAG
|
||||
*/
|
||||
* @brief Metric to get a std::vector<std::string> of device tags, String value is METRIC_HDDL_DEVICE_TAG
|
||||
*/
|
||||
DECLARE_METRIC_KEY(HDDL_DEVICE_TAG, std::vector<std::string>);
|
||||
|
||||
/**
|
||||
* @brief Metric to get a std::vector<int> of group ids, String value is METRIC_HDDL_GROUP_ID
|
||||
*/
|
||||
* @brief Metric to get a std::vector<int> of group ids, String value is METRIC_HDDL_GROUP_ID
|
||||
*/
|
||||
DECLARE_METRIC_KEY(HDDL_GROUP_ID, std::vector<int>);
|
||||
|
||||
/**
|
||||
* @brief Metric to get a int number of device be using for group, String value is METRIC_HDDL_DEVICE_GROUP_USING_NUM
|
||||
*/
|
||||
* @brief Metric to get a int number of device be using for group, String value is METRIC_HDDL_DEVICE_GROUP_USING_NUM
|
||||
*/
|
||||
DECLARE_METRIC_KEY(HDDL_DEVICE_GROUP_USING_NUM, int);
|
||||
|
||||
/**
|
||||
* @brief Metric to get a int number of total device, String value is METRIC_HDDL_DEVICE_TOTAL_NUM
|
||||
*/
|
||||
* @brief Metric to get a int number of total device, String value is METRIC_HDDL_DEVICE_TOTAL_NUM
|
||||
*/
|
||||
DECLARE_METRIC_KEY(HDDL_DEVICE_TOTAL_NUM, int);
|
||||
|
||||
} // namespace Metrics
|
||||
@ -151,8 +151,9 @@ DECLARE_VPU_CONFIG(HDDL_BIND_DEVICE);
|
||||
* @brief [Only for HDDLPlugin]
|
||||
* Type: A signed int wrapped in a string, default is "0".
|
||||
* This config is a sub-config of DEVICE_TAG, and only available when "DEVICE_TAG" is set and "BIND_DEVICE" is "False".
|
||||
* When there are multiple devices running a certain network (a same network running on multiple devices in Bypass Scheduler),
|
||||
* the device with a larger number has a higher priority, and more inference tasks will be fed to it with priority.
|
||||
* When there are multiple devices running a certain network (a same network running on multiple devices in Bypass
|
||||
* Scheduler), the device with a larger number has a higher priority, and more inference tasks will be fed to it with
|
||||
* priority.
|
||||
*/
|
||||
DECLARE_VPU_CONFIG(HDDL_RUNTIME_PRIORITY);
|
||||
|
||||
@ -171,7 +172,7 @@ DECLARE_VPU_CONFIG(HDDL_USE_SGAD);
|
||||
* This config gives a "group id" for a certain device when this device has been reserved for certain client, client
|
||||
* can use this device grouped by calling this group id while other client can't use this device
|
||||
* Each device has their own group id. Device in one group shares same group id.
|
||||
*/
|
||||
*/
|
||||
DECLARE_VPU_CONFIG(HDDL_GROUP_DEVICE);
|
||||
|
||||
} // namespace InferenceEngine
|
||||
|
@ -30,7 +30,7 @@
|
||||
*/
|
||||
#define VPU_HDDL_CONFIG_VALUE(name) InferenceEngine::VPUConfigParams::VPU_HDDL_##name
|
||||
|
||||
#define DECLARE_VPU_HDDL_CONFIG_KEY(name) DECLARE_CONFIG_KEY(VPU_HDDL_##name)
|
||||
#define DECLARE_VPU_HDDL_CONFIG_KEY(name) DECLARE_CONFIG_KEY(VPU_HDDL_##name)
|
||||
#define DECLARE_VPU_HDDL_CONFIG_VALUE(name) DECLARE_CONFIG_VALUE(VPU_HDDL_##name)
|
||||
|
||||
//
|
||||
@ -41,101 +41,102 @@
|
||||
* @def VPU_HDDL_METRIC(name)
|
||||
* @brief Shortcut for defining VPU HDDL metric
|
||||
*/
|
||||
#define VPU_HDDL_METRIC(name) METRIC_KEY(VPU_HDDL_##name)
|
||||
#define DECLARE_VPU_HDDL_METRIC(name, ...) DECLARE_METRIC_KEY(VPU_HDDL_##name, __VA_ARGS__)
|
||||
#define VPU_HDDL_METRIC(name) METRIC_KEY(VPU_HDDL_##name)
|
||||
#define DECLARE_VPU_HDDL_METRIC(name, ...) DECLARE_METRIC_KEY(VPU_HDDL_##name, __VA_ARGS__)
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
namespace Metrics {
|
||||
|
||||
/**
|
||||
* @deprecated Use InferenceEngine::METRIC_HDDL_DEVICE_NUM instead
|
||||
* @brief Metric to get a int of the device number, String value is METRIC_VPU_HDDL_DEVICE_NUM
|
||||
*/
|
||||
* @deprecated Use InferenceEngine::METRIC_HDDL_DEVICE_NUM instead
|
||||
* @brief Metric to get a int of the device number, String value is METRIC_VPU_HDDL_DEVICE_NUM
|
||||
*/
|
||||
INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::METRIC_HDDL_DEVICE_NUM instead")
|
||||
DECLARE_VPU_HDDL_METRIC(DEVICE_NUM, int);
|
||||
|
||||
/**
|
||||
* @deprecated Use InferenceEngine::METRIC_HDDL_DEVICE_NAME instead
|
||||
* @brief Metric to get a std::vector<std::string> of device names, String value is METRIC_VPU_HDDL_DEVICE_NAME
|
||||
*/
|
||||
* @deprecated Use InferenceEngine::METRIC_HDDL_DEVICE_NAME instead
|
||||
* @brief Metric to get a std::vector<std::string> of device names, String value is METRIC_VPU_HDDL_DEVICE_NAME
|
||||
*/
|
||||
INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::METRIC_HDDL_DEVICE_NAME instead")
|
||||
DECLARE_VPU_HDDL_METRIC(DEVICE_NAME, std::vector<std::string>);
|
||||
|
||||
/**
|
||||
* @deprecated Use InferenceEngine::METRIC_HDDL_DEVICE_THERMAL instead
|
||||
* @brief Metric to get a std::vector<float> of device thermal, String value is METRIC_VPU_HDDL_DEVICE_THERMAL
|
||||
*/
|
||||
* @deprecated Use InferenceEngine::METRIC_HDDL_DEVICE_THERMAL instead
|
||||
* @brief Metric to get a std::vector<float> of device thermal, String value is METRIC_VPU_HDDL_DEVICE_THERMAL
|
||||
*/
|
||||
INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::METRIC_HDDL_DEVICE_THERMAL instead")
|
||||
DECLARE_VPU_HDDL_METRIC(DEVICE_THERMAL, std::vector<float>);
|
||||
|
||||
/**
|
||||
* @deprecated Use InferenceEngine::METRIC_HDDL_DEVICE_ID instead
|
||||
* @brief Metric to get a std::vector<uint32> of device ids, String value is METRIC_VPU_HDDL_DEVICE_ID
|
||||
*/
|
||||
* @deprecated Use InferenceEngine::METRIC_HDDL_DEVICE_ID instead
|
||||
* @brief Metric to get a std::vector<uint32> of device ids, String value is METRIC_VPU_HDDL_DEVICE_ID
|
||||
*/
|
||||
INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::METRIC_HDDL_DEVICE_ID instead")
|
||||
DECLARE_VPU_HDDL_METRIC(DEVICE_ID, std::vector<unsigned int>);
|
||||
|
||||
/**
|
||||
* @deprecated Use InferenceEngine::METRIC_HDDL_DEVICE_SUBCLASS instead
|
||||
* @brief Metric to get a std::vector<int> of device subclasses, String value is METRIC_VPU_HDDL_DEVICE_SUBCLASS
|
||||
*/
|
||||
* @deprecated Use InferenceEngine::METRIC_HDDL_DEVICE_SUBCLASS instead
|
||||
* @brief Metric to get a std::vector<int> of device subclasses, String value is METRIC_VPU_HDDL_DEVICE_SUBCLASS
|
||||
*/
|
||||
INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::METRIC_HDDL_DEVICE_SUBCLASS instead")
|
||||
DECLARE_VPU_HDDL_METRIC(DEVICE_SUBCLASS, std::vector<int>);
|
||||
|
||||
/**
|
||||
* @deprecated Use InferenceEngine::METRIC_HDDL_DEVICE_MEMORY_TOTAL instead
|
||||
* @brief Metric to get a std::vector<uint32> of device total memory, String value is METRIC_VPU_HDDL_MEMORY_TOTAL
|
||||
*/
|
||||
* @deprecated Use InferenceEngine::METRIC_HDDL_DEVICE_MEMORY_TOTAL instead
|
||||
* @brief Metric to get a std::vector<uint32> of device total memory, String value is METRIC_VPU_HDDL_MEMORY_TOTAL
|
||||
*/
|
||||
INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::METRIC_HDDL_DEVICE_MEMORY_TOTAL instead")
|
||||
DECLARE_VPU_HDDL_METRIC(DEVICE_MEMORY_TOTAL, std::vector<unsigned int>);
|
||||
|
||||
/**
|
||||
* @deprecated Use InferenceEngine::METRIC_HDDL_DEVICE_MEMORY_USED instead
|
||||
* @brief Metric to get a std::vector<uint32> of device used memory, String value is METRIC_VPU_HDDL_DEVICE_MEMORY_USED
|
||||
*/
|
||||
* @deprecated Use InferenceEngine::METRIC_HDDL_DEVICE_MEMORY_USED instead
|
||||
* @brief Metric to get a std::vector<uint32> of device used memory, String value is METRIC_VPU_HDDL_DEVICE_MEMORY_USED
|
||||
*/
|
||||
INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::METRIC_HDDL_DEVICE_MEMORY_USED instead")
|
||||
DECLARE_VPU_HDDL_METRIC(DEVICE_MEMORY_USED, std::vector<unsigned int>);
|
||||
|
||||
/**
|
||||
* @deprecated Use InferenceEngine::METRIC_HDDL_DEVICE_UTILIZATION instead
|
||||
* @brief Metric to get a std::vector<float> of device utilization, String value is METRIC_VPU_HDDL_DEVICE_UTILIZATION
|
||||
*/
|
||||
* @deprecated Use InferenceEngine::METRIC_HDDL_DEVICE_UTILIZATION instead
|
||||
* @brief Metric to get a std::vector<float> of device utilization, String value is METRIC_VPU_HDDL_DEVICE_UTILIZATION
|
||||
*/
|
||||
INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::METRIC_HDDL_DEVICE_UTILIZATION instead")
|
||||
DECLARE_VPU_HDDL_METRIC(DEVICE_UTILIZATION, std::vector<float>);
|
||||
|
||||
/**
|
||||
* @deprecated Use InferenceEngine::METRIC_HDDL_STREAM_ID instead
|
||||
* @brief Metric to get a std::vector<std::string> of stream ids, String value is METRIC_VPU_HDDL_DEVICE_STREAM_ID
|
||||
*/
|
||||
* @deprecated Use InferenceEngine::METRIC_HDDL_STREAM_ID instead
|
||||
* @brief Metric to get a std::vector<std::string> of stream ids, String value is METRIC_VPU_HDDL_DEVICE_STREAM_ID
|
||||
*/
|
||||
INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::METRIC_HDDL_STREAM_ID instead")
|
||||
DECLARE_VPU_HDDL_METRIC(STREAM_ID, std::vector<std::string>);
|
||||
|
||||
/**
|
||||
* @deprecated Use InferenceEngine::METRIC_HDDL_DEVICE_TAG instead
|
||||
* @brief Metric to get a std::vector<std::string> of device tags, String value is METRIC_VPU_HDDL_DEVICE_TAG
|
||||
*/
|
||||
* @deprecated Use InferenceEngine::METRIC_HDDL_DEVICE_TAG instead
|
||||
* @brief Metric to get a std::vector<std::string> of device tags, String value is METRIC_VPU_HDDL_DEVICE_TAG
|
||||
*/
|
||||
INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::METRIC_HDDL_DEVICE_TAG instead")
|
||||
DECLARE_VPU_HDDL_METRIC(DEVICE_TAG, std::vector<std::string>);
|
||||
|
||||
/**
|
||||
* @deprecated Use InferenceEngine::METRIC_HDDL_GROUP_ID instead
|
||||
* @brief Metric to get a std::vector<int> of group ids, String value is METRIC_VPU_HDDL_GROUP_ID
|
||||
*/
|
||||
* @deprecated Use InferenceEngine::METRIC_HDDL_GROUP_ID instead
|
||||
* @brief Metric to get a std::vector<int> of group ids, String value is METRIC_VPU_HDDL_GROUP_ID
|
||||
*/
|
||||
INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::METRIC_HDDL_GROUP_ID instead")
|
||||
DECLARE_VPU_HDDL_METRIC(GROUP_ID, std::vector<int>);
|
||||
|
||||
/**
|
||||
* @deprecated Use InferenceEngine::METRIC_HDDL_DEVICE_GROUP_USING_NUM instead
|
||||
* @brief Metric to get a int number of device be using for group, String value is METRIC_VPU_HDDL_DEVICE_GROUP_USING_NUM
|
||||
*/
|
||||
* @deprecated Use InferenceEngine::METRIC_HDDL_DEVICE_GROUP_USING_NUM instead
|
||||
* @brief Metric to get a int number of device be using for group, String value is
|
||||
* METRIC_VPU_HDDL_DEVICE_GROUP_USING_NUM
|
||||
*/
|
||||
INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::METRIC_HDDL_DEVICE_GROUP_USING_NUM instead")
|
||||
DECLARE_VPU_HDDL_METRIC(DEVICE_GROUP_USING_NUM, int);
|
||||
|
||||
/**
|
||||
* @deprecated Use InferenceEngine::METRIC_HDDL_DEVICE_TOTAL_NUM instead
|
||||
* @brief Metric to get a int number of total device, String value is METRIC_VPU_HDDL_DEVICE_TOTAL_NUM
|
||||
*/
|
||||
* @deprecated Use InferenceEngine::METRIC_HDDL_DEVICE_TOTAL_NUM instead
|
||||
* @brief Metric to get a int number of total device, String value is METRIC_VPU_HDDL_DEVICE_TOTAL_NUM
|
||||
*/
|
||||
INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::METRIC_HDDL_DEVICE_TOTAL_NUM instead")
|
||||
DECLARE_VPU_HDDL_METRIC(DEVICE_TOTAL_NUM, int);
|
||||
|
||||
@ -219,8 +220,9 @@ DECLARE_VPU_HDDL_CONFIG_KEY(BIND_DEVICE);
|
||||
* @brief [Only for HDDLPlugin]
|
||||
* Type: A signed int wrapped in a string, default is "0".
|
||||
* This config is a sub-config of DEVICE_TAG, and only available when "DEVICE_TAG" is set and "BIND_DEVICE" is "False".
|
||||
* When there are multiple devices running a certain network (a same network running on multiple devices in Bypass Scheduler),
|
||||
* the device with a larger number has a higher priority, and more inference tasks will be fed to it with priority.
|
||||
* When there are multiple devices running a certain network (a same network running on multiple devices in Bypass
|
||||
* Scheduler), the device with a larger number has a higher priority, and more inference tasks will be fed to it with
|
||||
* priority.
|
||||
*/
|
||||
INFERENCE_ENGINE_DEPRECATED("Use InferenceEngine::HDDL_RUNTIME_PRIORITY instead")
|
||||
DECLARE_VPU_HDDL_CONFIG_KEY(RUNTIME_PRIORITY);
|
||||
|
@ -26,7 +26,7 @@
|
||||
*/
|
||||
#define VPU_MYRIAD_CONFIG_VALUE(name) InferenceEngine::VPUConfigParams::VPU_MYRIAD_##name
|
||||
|
||||
#define DECLARE_VPU_MYRIAD_CONFIG_KEY(name) DECLARE_CONFIG_KEY(VPU_MYRIAD_##name)
|
||||
#define DECLARE_VPU_MYRIAD_CONFIG_KEY(name) DECLARE_CONFIG_KEY(VPU_MYRIAD_##name)
|
||||
#define DECLARE_VPU_MYRIAD_CONFIG_VALUE(name) DECLARE_CONFIG_VALUE(VPU_MYRIAD_##name)
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
@ -12,11 +12,11 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ie_plugin_config.hpp"
|
||||
#include "ie_api.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "ie_api.h"
|
||||
#include "ie_plugin_config.hpp"
|
||||
|
||||
#define DECLARE_VPU_CONFIG(name) static constexpr auto name = #name
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
@ -12,26 +12,26 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "vpu/myriad_plugin_config.hpp"
|
||||
#include "vpu/hddl_plugin_config.hpp"
|
||||
#include "ie_api.h"
|
||||
#include "vpu/hddl_plugin_config.hpp"
|
||||
#include "vpu/myriad_plugin_config.hpp"
|
||||
|
||||
//
|
||||
// Common options
|
||||
//
|
||||
|
||||
#define VPU_CONFIG_KEY(name) InferenceEngine::VPUConfigParams::_CONFIG_KEY(VPU_##name)
|
||||
#define VPU_CONFIG_KEY(name) InferenceEngine::VPUConfigParams::_CONFIG_KEY(VPU_##name)
|
||||
#define VPU_CONFIG_VALUE(name) InferenceEngine::VPUConfigParams::VPU_##name
|
||||
|
||||
#define DECLARE_VPU_CONFIG_KEY(name) DECLARE_CONFIG_KEY(VPU_##name)
|
||||
#define DECLARE_VPU_CONFIG_KEY(name) DECLARE_CONFIG_KEY(VPU_##name)
|
||||
#define DECLARE_VPU_CONFIG_VALUE(name) DECLARE_CONFIG_VALUE(VPU_##name)
|
||||
|
||||
//
|
||||
// Common metrics
|
||||
//
|
||||
|
||||
#define VPU_METRIC(name) METRIC_KEY(VPU_##name)
|
||||
#define DECLARE_VPU_METRIC(name, ...) DECLARE_METRIC_KEY(VPU_##name, __VA_ARGS__)
|
||||
#define VPU_METRIC(name) METRIC_KEY(VPU_##name)
|
||||
#define DECLARE_VPU_METRIC(name, ...) DECLARE_METRIC_KEY(VPU_##name, __VA_ARGS__)
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
@ -77,8 +77,8 @@ DECLARE_VPU_CONFIG_KEY(PRINT_RECEIVE_TENSOR_TIME);
|
||||
* VPU_CONFIG_VALUE(NCHW) executable network forced to use NCHW input/output layouts
|
||||
* VPU_CONFIG_VALUE(NHWC) executable network forced to use NHWC input/output layouts
|
||||
*/
|
||||
INFERENCE_ENGINE_DEPRECATED("Use InputInfo::setLayout on input data from CNNNetwork::getInputsInfo() or" \
|
||||
"Data::setLayout on output data from CNNNetwork::getOutputsInfo()")
|
||||
INFERENCE_ENGINE_DEPRECATED("Use InputInfo::setLayout on input data from CNNNetwork::getInputsInfo() or"
|
||||
"Data::setLayout on output data from CNNNetwork::getOutputsInfo()")
|
||||
DECLARE_VPU_CONFIG_KEY(COMPUTE_LAYOUT);
|
||||
|
||||
/**
|
||||
|
@ -15,9 +15,9 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "ie_version.hpp"
|
||||
#include "ie_plugin_config.hpp"
|
||||
#include "cpp/ie_executable_network.hpp"
|
||||
#include "ie_plugin_config.hpp"
|
||||
#include "ie_version.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
class Function;
|
||||
@ -101,7 +101,8 @@ public:
|
||||
* constant data becomes to point to invalid memory.
|
||||
* @return Function
|
||||
*/
|
||||
std::shared_ptr<ngraph::Function> read_model(const std::string& model, const std::shared_ptr<const InferenceEngine::Blob>& weights) const;
|
||||
std::shared_ptr<ngraph::Function> read_model(const std::string& model,
|
||||
const std::shared_ptr<const InferenceEngine::Blob>& weights) const;
|
||||
|
||||
/**
|
||||
* @brief Creates an executable network from a network object.
|
||||
@ -115,9 +116,9 @@ public:
|
||||
* operation
|
||||
* @return An executable network reference
|
||||
*/
|
||||
InferenceEngine::ExecutableNetwork compile_model(
|
||||
const std::shared_ptr<const ngraph::Function>& network, const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config = {});
|
||||
InferenceEngine::ExecutableNetwork compile_model(const std::shared_ptr<const ngraph::Function>& network,
|
||||
const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config = {});
|
||||
|
||||
/**
|
||||
* @brief Reads model and creates an executable network from IR or ONNX file
|
||||
@ -132,9 +133,9 @@ public:
|
||||
*
|
||||
* @return An executable network reference
|
||||
*/
|
||||
InferenceEngine::ExecutableNetwork compile_model(
|
||||
const std::string& modelPath, const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config = {});
|
||||
InferenceEngine::ExecutableNetwork compile_model(const std::string& modelPath,
|
||||
const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config = {});
|
||||
|
||||
/**
|
||||
* @brief Creates an executable network from a network object within a specified remote context.
|
||||
@ -144,9 +145,9 @@ public:
|
||||
* operation
|
||||
* @return An executable network object
|
||||
*/
|
||||
InferenceEngine::ExecutableNetwork compile_model(
|
||||
const std::shared_ptr<const ngraph::Function>& network, const std::shared_ptr<InferenceEngine::RemoteContext>& context,
|
||||
const std::map<std::string, std::string>& config = {});
|
||||
InferenceEngine::ExecutableNetwork compile_model(const std::shared_ptr<const ngraph::Function>& network,
|
||||
const std::shared_ptr<InferenceEngine::RemoteContext>& context,
|
||||
const std::map<std::string, std::string>& config = {});
|
||||
|
||||
/**
|
||||
* @brief Registers extension
|
||||
@ -162,8 +163,9 @@ public:
|
||||
* operation*
|
||||
* @return An executable network reference
|
||||
*/
|
||||
InferenceEngine::ExecutableNetwork import_model(std::istream& networkModel, const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config = {});
|
||||
InferenceEngine::ExecutableNetwork import_model(std::istream& networkModel,
|
||||
const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config = {});
|
||||
|
||||
/**
|
||||
* @brief Creates an executable network from a previously exported network within a specified
|
||||
@ -176,8 +178,8 @@ public:
|
||||
* @return An executable network reference
|
||||
*/
|
||||
InferenceEngine::ExecutableNetwork import_model(std::istream& networkModel,
|
||||
const std::shared_ptr<InferenceEngine::RemoteContext>& context,
|
||||
const std::map<std::string, std::string>& config = {});
|
||||
const std::shared_ptr<InferenceEngine::RemoteContext>& context,
|
||||
const std::map<std::string, std::string>& config = {});
|
||||
|
||||
/**
|
||||
* @brief Query device if it supports specified network with specified configuration
|
||||
@ -187,9 +189,9 @@ public:
|
||||
* @param config Optional map of pairs: (config parameter name, config parameter value)
|
||||
* @return An object containing a map of pairs a layer name -> a device name supporting this layer.
|
||||
*/
|
||||
InferenceEngine::QueryNetworkResult query_model(
|
||||
const std::shared_ptr<const ngraph::Function>& network, const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config = {}) const;
|
||||
InferenceEngine::QueryNetworkResult query_model(const std::shared_ptr<const ngraph::Function>& network,
|
||||
const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config = {}) const;
|
||||
|
||||
/**
|
||||
* @brief Sets configuration for device, acceptable keys can be found in ie_plugin_config.hpp
|
||||
|
@ -6,7 +6,7 @@
|
||||
|
||||
#include "ie_system_conf.h"
|
||||
#ifdef HAVE_SSE
|
||||
#include "cpu_x86_sse42/blob_transform_sse42.hpp"
|
||||
# include "cpu_x86_sse42/blob_transform_sse42.hpp"
|
||||
#endif
|
||||
|
||||
#include <cstdint>
|
||||
@ -53,16 +53,30 @@ static void blob_copy_4d_t(Blob::Ptr src, Blob::Ptr dst) {
|
||||
if (src->getTensorDesc().getLayout() == NHWC && dst->getTensorDesc().getLayout() == NCHW && C == 3 &&
|
||||
C_src_stride == 1 && W_src_stride == 3 && W_dst_stride == 1 && with_cpu_x86_sse42()) {
|
||||
if (PRC == Precision::U8) {
|
||||
blob_copy_4d_split_u8c3(reinterpret_cast<const uint8_t*>(src_ptr), reinterpret_cast<uint8_t*>(dst_ptr),
|
||||
N_src_stride, H_src_stride, N_dst_stride, H_dst_stride, C_dst_stride,
|
||||
static_cast<int>(N), static_cast<int>(H), static_cast<int>(W));
|
||||
blob_copy_4d_split_u8c3(reinterpret_cast<const uint8_t*>(src_ptr),
|
||||
reinterpret_cast<uint8_t*>(dst_ptr),
|
||||
N_src_stride,
|
||||
H_src_stride,
|
||||
N_dst_stride,
|
||||
H_dst_stride,
|
||||
C_dst_stride,
|
||||
static_cast<int>(N),
|
||||
static_cast<int>(H),
|
||||
static_cast<int>(W));
|
||||
return;
|
||||
}
|
||||
|
||||
if (PRC == Precision::FP32) {
|
||||
blob_copy_4d_split_f32c3(reinterpret_cast<const float*>(src_ptr), reinterpret_cast<float*>(dst_ptr),
|
||||
N_src_stride, H_src_stride, N_dst_stride, H_dst_stride, C_dst_stride,
|
||||
static_cast<int>(N), static_cast<int>(H), static_cast<int>(W));
|
||||
blob_copy_4d_split_f32c3(reinterpret_cast<const float*>(src_ptr),
|
||||
reinterpret_cast<float*>(dst_ptr),
|
||||
N_src_stride,
|
||||
H_src_stride,
|
||||
N_dst_stride,
|
||||
H_dst_stride,
|
||||
C_dst_stride,
|
||||
static_cast<int>(N),
|
||||
static_cast<int>(H),
|
||||
static_cast<int>(W));
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -70,16 +84,30 @@ static void blob_copy_4d_t(Blob::Ptr src, Blob::Ptr dst) {
|
||||
if (src->getTensorDesc().getLayout() == NCHW && dst->getTensorDesc().getLayout() == NHWC && C == 3 &&
|
||||
C_dst_stride == 1 && W_dst_stride == 3 && W_src_stride == 1 && with_cpu_x86_sse42()) {
|
||||
if (PRC == Precision::U8) {
|
||||
blob_copy_4d_merge_u8c3(reinterpret_cast<const uint8_t*>(src_ptr), reinterpret_cast<uint8_t*>(dst_ptr),
|
||||
N_src_stride, H_src_stride, C_src_stride, N_dst_stride, H_dst_stride,
|
||||
static_cast<int>(N), static_cast<int>(H), static_cast<int>(W));
|
||||
blob_copy_4d_merge_u8c3(reinterpret_cast<const uint8_t*>(src_ptr),
|
||||
reinterpret_cast<uint8_t*>(dst_ptr),
|
||||
N_src_stride,
|
||||
H_src_stride,
|
||||
C_src_stride,
|
||||
N_dst_stride,
|
||||
H_dst_stride,
|
||||
static_cast<int>(N),
|
||||
static_cast<int>(H),
|
||||
static_cast<int>(W));
|
||||
return;
|
||||
}
|
||||
|
||||
if (PRC == Precision::FP32) {
|
||||
blob_copy_4d_merge_f32c3(reinterpret_cast<const float*>(src_ptr), reinterpret_cast<float*>(dst_ptr),
|
||||
N_src_stride, H_src_stride, C_src_stride, N_dst_stride, H_dst_stride,
|
||||
static_cast<int>(N), static_cast<int>(H), static_cast<int>(W));
|
||||
blob_copy_4d_merge_f32c3(reinterpret_cast<const float*>(src_ptr),
|
||||
reinterpret_cast<float*>(dst_ptr),
|
||||
N_src_stride,
|
||||
H_src_stride,
|
||||
C_src_stride,
|
||||
N_dst_stride,
|
||||
H_dst_stride,
|
||||
static_cast<int>(N),
|
||||
static_cast<int>(H),
|
||||
static_cast<int>(W));
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -186,17 +214,35 @@ static void blob_copy_5d_t(Blob::Ptr src, Blob::Ptr dst) {
|
||||
if (src->getTensorDesc().getLayout() == NDHWC && dst->getTensorDesc().getLayout() == NCDHW && C == 3 &&
|
||||
C_src_stride == 1 && W_src_stride == 3 && W_dst_stride == 1 && with_cpu_x86_sse42()) {
|
||||
if (PRC == Precision::U8) {
|
||||
blob_copy_5d_split_u8c3(reinterpret_cast<const uint8_t*>(src_ptr), reinterpret_cast<uint8_t*>(dst_ptr),
|
||||
N_src_stride, D_src_stride, H_src_stride, N_dst_stride, D_dst_stride, H_dst_stride,
|
||||
C_dst_stride, static_cast<int>(N), static_cast<int>(D), static_cast<int>(H),
|
||||
blob_copy_5d_split_u8c3(reinterpret_cast<const uint8_t*>(src_ptr),
|
||||
reinterpret_cast<uint8_t*>(dst_ptr),
|
||||
N_src_stride,
|
||||
D_src_stride,
|
||||
H_src_stride,
|
||||
N_dst_stride,
|
||||
D_dst_stride,
|
||||
H_dst_stride,
|
||||
C_dst_stride,
|
||||
static_cast<int>(N),
|
||||
static_cast<int>(D),
|
||||
static_cast<int>(H),
|
||||
static_cast<int>(W));
|
||||
return;
|
||||
}
|
||||
|
||||
if (PRC == Precision::FP32) {
|
||||
blob_copy_5d_split_f32c3(reinterpret_cast<const float*>(src_ptr), reinterpret_cast<float*>(dst_ptr),
|
||||
N_src_stride, D_src_stride, H_src_stride, N_dst_stride, D_dst_stride, H_dst_stride,
|
||||
C_dst_stride, static_cast<int>(N), static_cast<int>(D), static_cast<int>(H),
|
||||
blob_copy_5d_split_f32c3(reinterpret_cast<const float*>(src_ptr),
|
||||
reinterpret_cast<float*>(dst_ptr),
|
||||
N_src_stride,
|
||||
D_src_stride,
|
||||
H_src_stride,
|
||||
N_dst_stride,
|
||||
D_dst_stride,
|
||||
H_dst_stride,
|
||||
C_dst_stride,
|
||||
static_cast<int>(N),
|
||||
static_cast<int>(D),
|
||||
static_cast<int>(H),
|
||||
static_cast<int>(W));
|
||||
return;
|
||||
}
|
||||
@ -205,17 +251,35 @@ static void blob_copy_5d_t(Blob::Ptr src, Blob::Ptr dst) {
|
||||
if (src->getTensorDesc().getLayout() == NCDHW && dst->getTensorDesc().getLayout() == NDHWC && C == 3 &&
|
||||
C_dst_stride == 1 && W_dst_stride == 3 && W_src_stride == 1 && with_cpu_x86_sse42()) {
|
||||
if (PRC == Precision::U8) {
|
||||
blob_copy_5d_merge_u8c3(reinterpret_cast<const uint8_t*>(src_ptr), reinterpret_cast<uint8_t*>(dst_ptr),
|
||||
N_src_stride, D_src_stride, H_src_stride, C_src_stride, N_dst_stride, D_dst_stride,
|
||||
H_dst_stride, static_cast<int>(N), static_cast<int>(D), static_cast<int>(H),
|
||||
blob_copy_5d_merge_u8c3(reinterpret_cast<const uint8_t*>(src_ptr),
|
||||
reinterpret_cast<uint8_t*>(dst_ptr),
|
||||
N_src_stride,
|
||||
D_src_stride,
|
||||
H_src_stride,
|
||||
C_src_stride,
|
||||
N_dst_stride,
|
||||
D_dst_stride,
|
||||
H_dst_stride,
|
||||
static_cast<int>(N),
|
||||
static_cast<int>(D),
|
||||
static_cast<int>(H),
|
||||
static_cast<int>(W));
|
||||
return;
|
||||
}
|
||||
|
||||
if (PRC == Precision::FP32) {
|
||||
blob_copy_5d_merge_f32c3(reinterpret_cast<const float*>(src_ptr), reinterpret_cast<float*>(dst_ptr),
|
||||
N_src_stride, D_src_stride, H_src_stride, C_src_stride, N_dst_stride, D_dst_stride,
|
||||
H_dst_stride, static_cast<int>(N), static_cast<int>(D), static_cast<int>(H),
|
||||
blob_copy_5d_merge_f32c3(reinterpret_cast<const float*>(src_ptr),
|
||||
reinterpret_cast<float*>(dst_ptr),
|
||||
N_src_stride,
|
||||
D_src_stride,
|
||||
H_src_stride,
|
||||
C_src_stride,
|
||||
N_dst_stride,
|
||||
D_dst_stride,
|
||||
H_dst_stride,
|
||||
static_cast<int>(N),
|
||||
static_cast<int>(D),
|
||||
static_cast<int>(H),
|
||||
static_cast<int>(W));
|
||||
return;
|
||||
}
|
||||
@ -289,13 +353,15 @@ static inline void blob_copy_5d(Blob::Ptr src, Blob::Ptr dst) {
|
||||
}
|
||||
|
||||
void blob_copy(Blob::Ptr src, Blob::Ptr dst) {
|
||||
if (src->buffer() == nullptr) IE_THROW() << "Cannot copy blob data. Source is not allocated.";
|
||||
if (src->buffer() == nullptr)
|
||||
IE_THROW() << "Cannot copy blob data. Source is not allocated.";
|
||||
|
||||
if (dst->buffer() == nullptr) IE_THROW() << "Cannot copy blob data. Destination is not allocated.";
|
||||
if (dst->buffer() == nullptr)
|
||||
IE_THROW() << "Cannot copy blob data. Destination is not allocated.";
|
||||
|
||||
if (src->getTensorDesc().getPrecision() != dst->getTensorDesc().getPrecision())
|
||||
IE_THROW() << "Unimplemented blob transformation from precision " << src->getTensorDesc().getPrecision()
|
||||
<< " to " << src->getTensorDesc().getPrecision();
|
||||
<< " to " << src->getTensorDesc().getPrecision();
|
||||
|
||||
if (src->getTensorDesc().getDims() != dst->getTensorDesc().getDims())
|
||||
IE_THROW() << "Unimplemented blob transformation from different shapes ";
|
||||
|
@ -4,43 +4,39 @@
|
||||
|
||||
#include "cnn_network_ngraph_impl.hpp"
|
||||
|
||||
#include <cpp/ie_cnn_network.h>
|
||||
#include <ie_common.h>
|
||||
#include <math.h>
|
||||
|
||||
#include <ie_memcpy.h>
|
||||
#include <blob_factory.hpp>
|
||||
|
||||
|
||||
#include <cassert>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <unordered_set>
|
||||
#include <ngraph/ngraph.hpp>
|
||||
#include <ngraph/graph_util.hpp>
|
||||
#include <ngraph/pass/constant_folding.hpp>
|
||||
#include <ngraph/pass/manager.hpp>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
#include <transformations/utils/utils.hpp>
|
||||
#include <transformations/smart_reshape/set_batch_size.hpp>
|
||||
#include <transformations/smart_reshape/smart_reshape.hpp>
|
||||
#include "blob_factory.hpp"
|
||||
#include "cpp/ie_cnn_network.h"
|
||||
#include "ie_common.h"
|
||||
#include "ie_memcpy.h"
|
||||
#include "ngraph/graph_util.hpp"
|
||||
#include "ngraph/ngraph.hpp"
|
||||
#include "ngraph/pass/constant_folding.hpp"
|
||||
#include "ngraph/pass/manager.hpp"
|
||||
#include "transformations/serialize.hpp"
|
||||
#include "transformations/smart_reshape/set_batch_size.hpp"
|
||||
#include "transformations/smart_reshape/smart_reshape.hpp"
|
||||
#include "transformations/utils/utils.hpp"
|
||||
|
||||
// TODO: remove this pass usage
|
||||
#include <legacy/transformations/convert_opset1_to_legacy/convert_one_hot_to_one_hot_ie.hpp>
|
||||
#include <legacy/transformations/convert_opset1_to_legacy/convert_nms_5_to_legacy.hpp>
|
||||
|
||||
#include <legacy/transformations/convert_opset1_to_legacy/convert_one_hot_to_one_hot_ie.hpp>
|
||||
#include <transformations/low_precision/disable_convert_constant_folding_on_const_path.hpp>
|
||||
|
||||
#include <transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.hpp>
|
||||
#include <transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.hpp>
|
||||
#include <transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.hpp>
|
||||
|
||||
#include "ie_ngraph_utils.hpp"
|
||||
#include "exec_graph_info.hpp"
|
||||
#include "ie_itt.hpp"
|
||||
#include "ie_ngraph_utils.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace InferenceEngine;
|
||||
@ -48,7 +44,8 @@ using details::CNNNetworkNGraphImpl;
|
||||
using InferenceEngine::details::CNNNetworkNGraphImpl;
|
||||
using ngraph::Function;
|
||||
|
||||
void CNNNetworkNGraphImpl::createDataForResult(const ::ngraph::Output<::ngraph::Node>& output, const std::string& outName,
|
||||
void CNNNetworkNGraphImpl::createDataForResult(const ::ngraph::Output<::ngraph::Node>& output,
|
||||
const std::string& outName,
|
||||
DataPtr& ptr) {
|
||||
const auto isCompatible = [](size_t size, const Layout& l) -> bool {
|
||||
switch (size) {
|
||||
@ -111,10 +108,10 @@ void CNNNetworkNGraphImpl::validateFunctionNames() const {
|
||||
}
|
||||
}
|
||||
|
||||
CNNNetworkNGraphImpl::CNNNetworkNGraphImpl(
|
||||
const std::shared_ptr<Function>& nGraph,
|
||||
const std::vector<IExtensionPtr>& exts)
|
||||
: _ngraph_function(nGraph), _ie_extensions(exts) {
|
||||
CNNNetworkNGraphImpl::CNNNetworkNGraphImpl(const std::shared_ptr<Function>& nGraph,
|
||||
const std::vector<IExtensionPtr>& exts)
|
||||
: _ngraph_function(nGraph),
|
||||
_ie_extensions(exts) {
|
||||
// Restore usual attributes for CNNNetwork
|
||||
auto keep_input_info = [](CNNNetworkNGraphImpl& network, const DataPtr& inData) {
|
||||
InputInfo::Ptr info(new InputInfo());
|
||||
@ -152,7 +149,7 @@ CNNNetworkNGraphImpl::CNNNetworkNGraphImpl(
|
||||
if (output.second->getPrecision() == Precision::I64) {
|
||||
output.second->setPrecision(Precision::I32);
|
||||
} else if (output.second->getPrecision() != Precision::FP32 &&
|
||||
output.second->getPrecision() != Precision::I32) {
|
||||
output.second->getPrecision() != Precision::I32) {
|
||||
output.second->setPrecision(Precision::FP32);
|
||||
}
|
||||
}
|
||||
@ -225,18 +222,20 @@ void CNNNetworkNGraphImpl::validate(int version) {
|
||||
_ngraph_function->validate_nodes_and_infer_types();
|
||||
}
|
||||
|
||||
StatusCode CNNNetworkNGraphImpl::addOutput(const std::string& layerName, size_t outputIndex,
|
||||
StatusCode CNNNetworkNGraphImpl::addOutput(const std::string& layerName,
|
||||
size_t outputIndex,
|
||||
ResponseDesc* resp) noexcept {
|
||||
OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "CNNNetworkNGraphImpl::addOutput");
|
||||
|
||||
try {
|
||||
for (const auto & layer : _ngraph_function->get_ops()) {
|
||||
for (const auto& layer : _ngraph_function->get_ops()) {
|
||||
// Result can have the same name as previous operation
|
||||
if (layer->get_friendly_name() == layerName && !std::dynamic_pointer_cast<ngraph::op::Result>(layer)) {
|
||||
// Check that output port exists
|
||||
if (layer->outputs().size() <= outputIndex) {
|
||||
return DescriptionBuffer(OUT_OF_BOUNDS, resp)
|
||||
<< "port index " << outputIndex << " exceeds the number of layer outputs " << layer->outputs().size();
|
||||
<< "port index " << outputIndex << " exceeds the number of layer outputs "
|
||||
<< layer->outputs().size();
|
||||
}
|
||||
std::string outputName = layerName;
|
||||
if (layer->outputs().size() != 1) {
|
||||
@ -271,7 +270,7 @@ StatusCode CNNNetworkNGraphImpl::addOutput(const std::string& layerName, size_t
|
||||
return DescriptionBuffer(NOT_FOUND, resp) << "Cannot add output! Layer " << layerName << " wasn't found!";
|
||||
}
|
||||
|
||||
void CNNNetworkNGraphImpl::addOutput(const ::ngraph::Output<::ngraph::Node> & output) {
|
||||
void CNNNetworkNGraphImpl::addOutput(const ::ngraph::Output<::ngraph::Node>& output) {
|
||||
auto dataName = ngraph::op::util::create_ie_output_name(output);
|
||||
DataPtr data;
|
||||
if (_data.count(dataName))
|
||||
@ -313,16 +312,16 @@ void CNNNetworkNGraphImpl::reshape() {
|
||||
reshape({});
|
||||
}
|
||||
|
||||
StatusCode
|
||||
CNNNetworkNGraphImpl::reshape(const std::map<std::string, std::vector<size_t>>& inputShapes,
|
||||
ResponseDesc* responseDesc) noexcept {
|
||||
if (inputShapes.empty()) return OK;
|
||||
StatusCode CNNNetworkNGraphImpl::reshape(const std::map<std::string, std::vector<size_t>>& inputShapes,
|
||||
ResponseDesc* responseDesc) noexcept {
|
||||
if (inputShapes.empty())
|
||||
return OK;
|
||||
|
||||
const auto & params = _ngraph_function->get_parameters();
|
||||
const auto& params = _ngraph_function->get_parameters();
|
||||
|
||||
// Check that we need to do reshape only if input shapes will be changed
|
||||
bool needReshape = false;
|
||||
for (const auto & param : params) {
|
||||
for (const auto& param : params) {
|
||||
const auto it = inputShapes.find(param->get_friendly_name());
|
||||
if (it == inputShapes.end()) {
|
||||
continue;
|
||||
@ -333,11 +332,12 @@ CNNNetworkNGraphImpl::reshape(const std::map<std::string, std::vector<size_t>>&
|
||||
}
|
||||
}
|
||||
|
||||
if (!needReshape) return OK;
|
||||
if (!needReshape)
|
||||
return OK;
|
||||
|
||||
// save original parameters shape
|
||||
std::map<std::string, ngraph::PartialShape> originalInputShapes;
|
||||
for (const auto & param : params) {
|
||||
for (const auto& param : params) {
|
||||
originalInputShapes[param->get_friendly_name()] = param->get_partial_shape();
|
||||
}
|
||||
|
||||
@ -347,7 +347,7 @@ CNNNetworkNGraphImpl::reshape(const std::map<std::string, std::vector<size_t>>&
|
||||
ssr_manager.run_passes(_ngraph_function);
|
||||
|
||||
std::map<std::string, ngraph::PartialShape> reshapeShapes;
|
||||
for (const auto & item : inputShapes) {
|
||||
for (const auto& item : inputShapes) {
|
||||
reshapeShapes[item.first] = ngraph::PartialShape(item.second);
|
||||
}
|
||||
reshape(reshapeShapes);
|
||||
@ -359,8 +359,7 @@ CNNNetworkNGraphImpl::reshape(const std::map<std::string, std::vector<size_t>>&
|
||||
return OK;
|
||||
}
|
||||
|
||||
void
|
||||
CNNNetworkNGraphImpl::reshape(const std::map<std::string, ngraph::PartialShape>& inputShapes) {
|
||||
void CNNNetworkNGraphImpl::reshape(const std::map<std::string, ngraph::PartialShape>& inputShapes) {
|
||||
OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "CNNNetworkNGraphImpl::reshape");
|
||||
|
||||
auto params = _ngraph_function->get_parameters();
|
||||
@ -377,9 +376,9 @@ CNNNetworkNGraphImpl::reshape(const std::map<std::string, ngraph::PartialShape>&
|
||||
_ngraph_function->validate_nodes_and_infer_types();
|
||||
|
||||
const auto& results = _ngraph_function->get_results();
|
||||
bool outputs_are_static = all_of(
|
||||
begin(results), end(results),
|
||||
[](const std::shared_ptr<ngraph::Node>& n){ return n->get_output_partial_shape(0).is_static(); });
|
||||
bool outputs_are_static = all_of(begin(results), end(results), [](const std::shared_ptr<ngraph::Node>& n) {
|
||||
return n->get_output_partial_shape(0).is_static();
|
||||
});
|
||||
|
||||
{
|
||||
shared_ptr<Function> specialized_ngraph_function = nullptr;
|
||||
@ -398,7 +397,7 @@ CNNNetworkNGraphImpl::reshape(const std::map<std::string, ngraph::PartialShape>&
|
||||
manager.register_pass<::ngraph::pass::ConstantFolding>();
|
||||
// OneHotToLegacy changes output precision
|
||||
manager.register_pass<::ngraph::pass::ConvertOneHotToOneHotIEMatcher>()->detect_output_type(
|
||||
specialized_ngraph_function);
|
||||
specialized_ngraph_function);
|
||||
manager.run_passes(specialized_ngraph_function);
|
||||
}
|
||||
specialized_ngraph_function->validate_nodes_and_infer_types();
|
||||
@ -443,12 +442,12 @@ CNNNetworkNGraphImpl::reshape(const std::map<std::string, ngraph::PartialShape>&
|
||||
}
|
||||
#endif
|
||||
std::unordered_set<std::string> opName;
|
||||
for (const auto &result : specialized_ngraph_function->get_results()) {
|
||||
for (const auto& result : specialized_ngraph_function->get_results()) {
|
||||
addOutput(result->input_value(0));
|
||||
}
|
||||
|
||||
for (const auto ¶meter : specialized_ngraph_function->get_parameters()) {
|
||||
const auto &outName = parameter->get_friendly_name();
|
||||
for (const auto& parameter : specialized_ngraph_function->get_parameters()) {
|
||||
const auto& outName = parameter->get_friendly_name();
|
||||
if (opName.find(outName) != opName.end()) {
|
||||
IE_THROW() << "All operations in nGraph function should have unique friendly names!";
|
||||
}
|
||||
@ -468,9 +467,10 @@ StatusCode CNNNetworkNGraphImpl::serialize(const std::string& xmlPath,
|
||||
custom_opsets.insert(begin(opset), end(opset));
|
||||
}
|
||||
ngraph::pass::Manager manager;
|
||||
manager.register_pass<ngraph::pass::Serialize>(
|
||||
xmlPath, binPath, ngraph::pass::Serialize::Version::IR_V10,
|
||||
custom_opsets);
|
||||
manager.register_pass<ngraph::pass::Serialize>(xmlPath,
|
||||
binPath,
|
||||
ngraph::pass::Serialize::Version::IR_V10,
|
||||
custom_opsets);
|
||||
manager.run_passes(_ngraph_function);
|
||||
} catch (const Exception& e) {
|
||||
return DescriptionBuffer(GENERAL_ERROR, resp) << e.what();
|
||||
@ -482,9 +482,8 @@ StatusCode CNNNetworkNGraphImpl::serialize(const std::string& xmlPath,
|
||||
return OK;
|
||||
}
|
||||
|
||||
StatusCode CNNNetworkNGraphImpl::serialize(std::ostream& xmlBuf,
|
||||
std::ostream& binBuf,
|
||||
ResponseDesc* resp) const noexcept {
|
||||
StatusCode CNNNetworkNGraphImpl::serialize(std::ostream& xmlBuf, std::ostream& binBuf, ResponseDesc* resp) const
|
||||
noexcept {
|
||||
try {
|
||||
std::map<std::string, ngraph::OpSet> custom_opsets;
|
||||
for (const auto& extension : _ie_extensions) {
|
||||
@ -492,9 +491,10 @@ StatusCode CNNNetworkNGraphImpl::serialize(std::ostream& xmlBuf,
|
||||
custom_opsets.insert(begin(opset), end(opset));
|
||||
}
|
||||
ngraph::pass::Manager manager;
|
||||
manager.register_pass<ngraph::pass::Serialize>(
|
||||
xmlBuf, binBuf, ngraph::pass::Serialize::Version::IR_V10,
|
||||
custom_opsets);
|
||||
manager.register_pass<ngraph::pass::Serialize>(xmlBuf,
|
||||
binBuf,
|
||||
ngraph::pass::Serialize::Version::IR_V10,
|
||||
custom_opsets);
|
||||
manager.run_passes(_ngraph_function);
|
||||
} catch (const Exception& e) {
|
||||
return DescriptionBuffer(GENERAL_ERROR, resp) << e.what();
|
||||
@ -506,9 +506,8 @@ StatusCode CNNNetworkNGraphImpl::serialize(std::ostream& xmlBuf,
|
||||
return OK;
|
||||
}
|
||||
|
||||
StatusCode CNNNetworkNGraphImpl::serialize(std::ostream& xmlBuf,
|
||||
Blob::Ptr& binBlob,
|
||||
ResponseDesc* resp) const noexcept {
|
||||
StatusCode CNNNetworkNGraphImpl::serialize(std::ostream& xmlBuf, Blob::Ptr& binBlob, ResponseDesc* resp) const
|
||||
noexcept {
|
||||
try {
|
||||
std::map<std::string, ngraph::OpSet> custom_opsets;
|
||||
for (const auto& extension : _ie_extensions) {
|
||||
@ -518,15 +517,16 @@ StatusCode CNNNetworkNGraphImpl::serialize(std::ostream& xmlBuf,
|
||||
|
||||
std::stringstream binBuf;
|
||||
ngraph::pass::Manager manager;
|
||||
manager.register_pass<ngraph::pass::Serialize>(
|
||||
xmlBuf, binBuf, ngraph::pass::Serialize::Version::IR_V10,
|
||||
custom_opsets);
|
||||
manager.register_pass<ngraph::pass::Serialize>(xmlBuf,
|
||||
binBuf,
|
||||
ngraph::pass::Serialize::Version::IR_V10,
|
||||
custom_opsets);
|
||||
manager.run_passes(_ngraph_function);
|
||||
|
||||
std::streambuf* pbuf = binBuf.rdbuf();
|
||||
unsigned long bufSize = binBuf.tellp();
|
||||
|
||||
TensorDesc tensorDesc(Precision::U8, { bufSize }, Layout::C);
|
||||
TensorDesc tensorDesc(Precision::U8, {bufSize}, Layout::C);
|
||||
binBlob = make_shared_blob<uint8_t>(tensorDesc);
|
||||
binBlob->allocate();
|
||||
pbuf->sgetn(binBlob->buffer(), bufSize);
|
||||
@ -540,46 +540,66 @@ StatusCode CNNNetworkNGraphImpl::serialize(std::ostream& xmlBuf,
|
||||
return OK;
|
||||
}
|
||||
|
||||
StatusCode CNNNetworkNGraphImpl::getOVNameForTensor(std::string& ov_name, const std::string& orig_name, ResponseDesc* resp) const noexcept {
|
||||
StatusCode CNNNetworkNGraphImpl::getOVNameForTensor(std::string& ov_name,
|
||||
const std::string& orig_name,
|
||||
ResponseDesc* resp) const noexcept {
|
||||
if (_tensorNames.find(orig_name) == _tensorNames.end())
|
||||
return DescriptionBuffer(NOT_FOUND, resp) << "Framework tensor with name \"" << orig_name << "\" was not mapped to OpenVINO data!";
|
||||
return DescriptionBuffer(NOT_FOUND, resp)
|
||||
<< "Framework tensor with name \"" << orig_name << "\" was not mapped to OpenVINO data!";
|
||||
ov_name = _tensorNames.at(orig_name);
|
||||
return OK;
|
||||
}
|
||||
|
||||
StatusCode CNNNetworkNGraphImpl::setBatchSize(size_t size, ResponseDesc* responseDesc) noexcept {
|
||||
try {
|
||||
if (getBatchSize() == size) return OK;
|
||||
if (getBatchSize() == size)
|
||||
return OK;
|
||||
auto original_parameters = _ngraph_function->get_parameters();
|
||||
if (original_parameters.empty()) return DescriptionBuffer(GENERAL_ERROR, responseDesc) << "Cannot set batch! Function doesn't contain parameters!";
|
||||
if (original_parameters.empty())
|
||||
return DescriptionBuffer(GENERAL_ERROR, responseDesc)
|
||||
<< "Cannot set batch! Function doesn't contain parameters!";
|
||||
|
||||
stringstream ss;
|
||||
ss << " Please use reshape method instead. Original parameter shapes are: ";
|
||||
for (size_t i = 0; i < original_parameters.size(); ++i) {
|
||||
if (i) ss << ", ";
|
||||
ss << "\"" << original_parameters[i]->get_friendly_name() << "\": " << original_parameters[i]->get_partial_shape();
|
||||
if (i)
|
||||
ss << ", ";
|
||||
ss << "\"" << original_parameters[i]->get_friendly_name()
|
||||
<< "\": " << original_parameters[i]->get_partial_shape();
|
||||
}
|
||||
|
||||
// ill-formed logic from the past setBatchSize (we keep it for backward-compatibility)
|
||||
const auto first_parameter = *std::min_element(original_parameters.begin(), original_parameters.end(),
|
||||
[](std::shared_ptr<ngraph::Node> lhs, std::shared_ptr<ngraph::Node> rhs){return lhs->get_friendly_name() < rhs->get_friendly_name();});
|
||||
const auto first_parameter =
|
||||
*std::min_element(original_parameters.begin(),
|
||||
original_parameters.end(),
|
||||
[](std::shared_ptr<ngraph::Node> lhs, std::shared_ptr<ngraph::Node> rhs) {
|
||||
return lhs->get_friendly_name() < rhs->get_friendly_name();
|
||||
});
|
||||
const auto first_parameter_pshape = first_parameter->get_partial_shape();
|
||||
if (first_parameter_pshape.is_dynamic()) return DescriptionBuffer(PARAMETER_MISMATCH, responseDesc) <<
|
||||
"Cannot set batch! Function contains parameter with partially defined shape!" << ss.str();
|
||||
if (first_parameter_pshape.is_dynamic())
|
||||
return DescriptionBuffer(PARAMETER_MISMATCH, responseDesc)
|
||||
<< "Cannot set batch! Function contains parameter with partially defined shape!" << ss.str();
|
||||
const auto first_parameter_rank = first_parameter_pshape.rank().get_length();
|
||||
if (first_parameter_rank == 0 || first_parameter_rank == 1 || first_parameter_rank == 3) return DescriptionBuffer(PARAMETER_MISMATCH, responseDesc) <<
|
||||
"Cannot set batch! Function contains 0D/1D/3D parameter with unknown batch dimension placement." << ss.str();
|
||||
if (first_parameter_rank == 0 || first_parameter_rank == 1 || first_parameter_rank == 3)
|
||||
return DescriptionBuffer(PARAMETER_MISMATCH, responseDesc)
|
||||
<< "Cannot set batch! Function contains 0D/1D/3D parameter with unknown batch dimension placement."
|
||||
<< ss.str();
|
||||
|
||||
std::map<std::string, std::vector<size_t>> inShapes;
|
||||
for (const auto ¶meter : original_parameters) {
|
||||
const auto & pshape = parameter->get_partial_shape();
|
||||
if (pshape.is_dynamic()) return DescriptionBuffer(PARAMETER_MISMATCH, responseDesc) <<
|
||||
"Cannot set batch! Function contains parameter with partially defined shape!" << ss.str();
|
||||
const auto & rank = pshape.rank().get_length();
|
||||
if (rank == 0) return DescriptionBuffer(PARAMETER_MISMATCH, responseDesc) <<
|
||||
"Cannot set batch! Function contains 0D/1D/3D parameter with unknown batch dimension placement." << ss.str();
|
||||
for (const auto& parameter : original_parameters) {
|
||||
const auto& pshape = parameter->get_partial_shape();
|
||||
if (pshape.is_dynamic())
|
||||
return DescriptionBuffer(PARAMETER_MISMATCH, responseDesc)
|
||||
<< "Cannot set batch! Function contains parameter with partially defined shape!" << ss.str();
|
||||
const auto& rank = pshape.rank().get_length();
|
||||
if (rank == 0)
|
||||
return DescriptionBuffer(PARAMETER_MISMATCH, responseDesc)
|
||||
<< "Cannot set batch! Function contains 0D/1D/3D parameter with unknown batch dimension "
|
||||
"placement."
|
||||
<< ss.str();
|
||||
auto shape = parameter->get_shape();
|
||||
shape[0] = {static_cast<size_t>(std::ceil(size * static_cast<float>(shape[0]) / static_cast<float>(getBatchSize())))};
|
||||
shape[0] = {static_cast<size_t>(
|
||||
std::ceil(size * static_cast<float>(shape[0]) / static_cast<float>(getBatchSize())))};
|
||||
inShapes[parameter->get_friendly_name()] = shape;
|
||||
}
|
||||
ngraph::pass::Manager ssr_manager;
|
||||
|
@ -11,24 +11,23 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
#include <unordered_map>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include <ngraph/attribute_visitor.hpp>
|
||||
#include <ngraph/function.hpp>
|
||||
#include <ngraph/node.hpp>
|
||||
|
||||
#include <cpp/ie_cnn_network.h>
|
||||
#include "cpp/ie_cnn_network.h"
|
||||
#include "description_buffer.hpp"
|
||||
#include "ie_api.h"
|
||||
#include "ie_blob.h"
|
||||
#include "ie_common.h"
|
||||
#include "ie_data.h"
|
||||
#include "ie_input_info.hpp"
|
||||
#include "ie_extension.h"
|
||||
#include "ie_input_info.hpp"
|
||||
#include "ngraph/attribute_visitor.hpp"
|
||||
#include "ngraph/function.hpp"
|
||||
#include "ngraph/node.hpp"
|
||||
|
||||
namespace InferenceEngine {
|
||||
namespace details {
|
||||
@ -62,7 +61,7 @@ public:
|
||||
|
||||
StatusCode addOutput(const std::string& layerName, size_t outputIndex, ResponseDesc* resp) noexcept override;
|
||||
|
||||
void addOutput(const ::ngraph::Output<::ngraph::Node> & dataName);
|
||||
void addOutput(const ::ngraph::Output<::ngraph::Node>& dataName);
|
||||
|
||||
std::shared_ptr<const ::ngraph::Function> getFunction() const noexcept override {
|
||||
return _ngraph_function;
|
||||
@ -79,16 +78,16 @@ public:
|
||||
StatusCode serialize(const std::string& xmlPath, const std::string& binPath, ResponseDesc* resp) const
|
||||
noexcept override;
|
||||
|
||||
StatusCode serialize(std::ostream& xmlBuf, std::ostream& binBuf, ResponseDesc* resp) const
|
||||
noexcept override;
|
||||
StatusCode serialize(std::ostream& xmlBuf, std::ostream& binBuf, ResponseDesc* resp) const noexcept override;
|
||||
|
||||
StatusCode serialize(std::ostream& xmlBuf, Blob::Ptr& binBlob, ResponseDesc* resp) const
|
||||
noexcept override;
|
||||
StatusCode serialize(std::ostream& xmlBuf, Blob::Ptr& binBlob, ResponseDesc* resp) const noexcept override;
|
||||
|
||||
StatusCode getOVNameForTensor(std::string& ov_name, const std::string& orig_name, ResponseDesc* resp) const noexcept override;
|
||||
StatusCode getOVNameForTensor(std::string& ov_name, const std::string& orig_name, ResponseDesc* resp) const
|
||||
noexcept override;
|
||||
|
||||
// used by convertFunctionToICNNNetwork from legacy library
|
||||
std::map<std::string, DataPtr> _data;
|
||||
|
||||
protected:
|
||||
std::shared_ptr<::ngraph::Function> _ngraph_function;
|
||||
|
||||
|
@ -4,28 +4,27 @@
|
||||
|
||||
#include "compilation_context.hpp"
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#ifndef WIN32
|
||||
#include <unistd.h>
|
||||
# include <unistd.h>
|
||||
#endif
|
||||
#include <xml_parse_utils.h>
|
||||
|
||||
#include "ie_itt.hpp"
|
||||
#include "transformations/serialize.hpp"
|
||||
#include "cpp/ie_cnn_network.h"
|
||||
#include "details/ie_exception.hpp"
|
||||
|
||||
#include "ngraph/variant.hpp"
|
||||
#include "file_utils.h"
|
||||
#include "ie_itt.hpp"
|
||||
#include "ngraph/opsets/opset6.hpp"
|
||||
#include "ngraph/variant.hpp"
|
||||
#include "transformations/rt_info/dequantization_attribute.hpp"
|
||||
#include "transformations/rt_info/fused_names_attribute.hpp"
|
||||
#include "transformations/rt_info/primitives_priority_attribute.hpp"
|
||||
#include "file_utils.h"
|
||||
#include "transformations/serialize.hpp"
|
||||
|
||||
#ifdef WIN32
|
||||
#define stat _stat
|
||||
# define stat _stat
|
||||
#endif
|
||||
|
||||
namespace InferenceEngine {
|
||||
@ -41,12 +40,15 @@ static int32_t as_int32_t(T v) {
|
||||
return static_cast<int32_t>(v);
|
||||
}
|
||||
|
||||
class OstreamHashWrapper final: public std::streambuf {
|
||||
class OstreamHashWrapper final : public std::streambuf {
|
||||
std::size_t m_res = 0;
|
||||
|
||||
public:
|
||||
std::size_t getResult() const { return m_res; }
|
||||
std::size_t getResult() const {
|
||||
return m_res;
|
||||
}
|
||||
std::streamsize xsputn(const char* s, std::streamsize n) override {
|
||||
const std::int64_t* intS = (const std::int64_t *)s;
|
||||
const std::int64_t* intS = (const std::int64_t*)s;
|
||||
std::streamsize n64 = n / sizeof(std::int64_t);
|
||||
std::streamsize i = 0;
|
||||
// Using 64-bit values executes much faster than char
|
||||
@ -85,7 +87,7 @@ std::string NetworkCompilationContext::calculateFileInfo(const std::string& file
|
||||
}
|
||||
|
||||
std::string NetworkCompilationContext::computeHash(const CNNNetwork& network,
|
||||
const std::map<std::string, std::string>& compileOptions) {
|
||||
const std::map<std::string, std::string>& compileOptions) {
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "NetworkCompilationContext::computeHash - CNN");
|
||||
OstreamHashWrapper xmlHash;
|
||||
OstreamHashWrapper binHash;
|
||||
@ -96,8 +98,7 @@ std::string NetworkCompilationContext::computeHash(const CNNNetwork& network,
|
||||
|
||||
// 1. Serialize
|
||||
CNNNetwork net(network);
|
||||
ngraph::pass::Serialize serializer(xml, bin,
|
||||
ngraph::pass::Serialize::Version::IR_V10);
|
||||
ngraph::pass::Serialize serializer(xml, bin, ngraph::pass::Serialize::Version::IR_V10);
|
||||
serializer.run_on_function(net.getFunction());
|
||||
|
||||
// 2. Compute hash on serialized data and options
|
||||
@ -117,13 +118,17 @@ std::string NetworkCompilationContext::computeHash(const CNNNetwork& network,
|
||||
|
||||
if (auto stringData = std::dynamic_pointer_cast<ngraph::VariantWrapper<std::string>>(rtMapData.second)) {
|
||||
seed = hash_combine(seed, stringData->get());
|
||||
} else if (auto intData = std::dynamic_pointer_cast<ngraph::VariantWrapper<std::int64_t>>(rtMapData.second)) {
|
||||
} else if (auto intData =
|
||||
std::dynamic_pointer_cast<ngraph::VariantWrapper<std::int64_t>>(rtMapData.second)) {
|
||||
seed = hash_combine(seed, intData->get());
|
||||
} else if (auto deq = std::dynamic_pointer_cast<ngraph::VariantWrapper<ngraph::DequantizationAttr>>(rtMapData.second)) {
|
||||
} else if (auto deq = std::dynamic_pointer_cast<ngraph::VariantWrapper<ngraph::DequantizationAttr>>(
|
||||
rtMapData.second)) {
|
||||
seed = hash_combine(seed, deq->get().getDequantizationAttr());
|
||||
} else if (auto fNames = std::dynamic_pointer_cast<ngraph::VariantWrapper<ngraph::FusedNames>>(rtMapData.second)) {
|
||||
} else if (auto fNames =
|
||||
std::dynamic_pointer_cast<ngraph::VariantWrapper<ngraph::FusedNames>>(rtMapData.second)) {
|
||||
seed = hash_combine(seed, fNames->get().getNames());
|
||||
} else if (auto prim = std::dynamic_pointer_cast<ngraph::VariantWrapper<ngraph::PrimitivesPriority>>(rtMapData.second)) {
|
||||
} else if (auto prim = std::dynamic_pointer_cast<ngraph::VariantWrapper<ngraph::PrimitivesPriority>>(
|
||||
rtMapData.second)) {
|
||||
seed = hash_combine(seed, prim->get().getPrimitivesPriority());
|
||||
}
|
||||
}
|
||||
@ -141,7 +146,7 @@ std::string NetworkCompilationContext::computeHash(const CNNNetwork& network,
|
||||
if (preproc.getMeanVariant() == MeanVariant::MEAN_VALUE) {
|
||||
seed = hash_combine(seed, preproc.getNumberOfChannels());
|
||||
for (size_t c = 0; c < preproc.getNumberOfChannels(); ++c) {
|
||||
const PreProcessChannel::Ptr & channelInfo = preproc[c];
|
||||
const PreProcessChannel::Ptr& channelInfo = preproc[c];
|
||||
seed = hash_combine(seed, channelInfo->stdScale);
|
||||
seed = hash_combine(seed, channelInfo->meanValue);
|
||||
}
|
||||
@ -161,7 +166,7 @@ std::string NetworkCompilationContext::computeHash(const CNNNetwork& network,
|
||||
}
|
||||
|
||||
std::string NetworkCompilationContext::computeHash(const std::string& modelName,
|
||||
const std::map<std::string, std::string>& compileOptions) {
|
||||
const std::map<std::string, std::string>& compileOptions) {
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::IE_LT, "NetworkCompilationContext::computeHash - ModelName");
|
||||
size_t seed = 0;
|
||||
try {
|
||||
@ -180,12 +185,11 @@ std::string NetworkCompilationContext::computeHash(const std::string& modelName,
|
||||
|
||||
CompiledBlobHeader::CompiledBlobHeader() {}
|
||||
|
||||
CompiledBlobHeader::CompiledBlobHeader(const std::string& ieVersion, const std::string& fileInfo) :
|
||||
m_ieVersion(ieVersion),
|
||||
m_fileInfo(fileInfo) {
|
||||
}
|
||||
CompiledBlobHeader::CompiledBlobHeader(const std::string& ieVersion, const std::string& fileInfo)
|
||||
: m_ieVersion(ieVersion),
|
||||
m_fileInfo(fileInfo) {}
|
||||
|
||||
std::istream& operator >> (std::istream& stream, CompiledBlobHeader& header) {
|
||||
std::istream& operator>>(std::istream& stream, CompiledBlobHeader& header) {
|
||||
std::string xmlStr;
|
||||
std::getline(stream, xmlStr);
|
||||
|
||||
@ -203,7 +207,7 @@ std::istream& operator >> (std::istream& stream, CompiledBlobHeader& header) {
|
||||
return stream;
|
||||
}
|
||||
|
||||
std::ostream& operator << (std::ostream& stream, const CompiledBlobHeader& header) {
|
||||
std::ostream& operator<<(std::ostream& stream, const CompiledBlobHeader& header) {
|
||||
pugi::xml_document document;
|
||||
auto compiledBlobNode = document.append_child("compiled_blob");
|
||||
compiledBlobNode.append_attribute("ie_version").set_value(header.m_ieVersion.c_str());
|
||||
|
@ -4,10 +4,10 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <istream>
|
||||
#include <map>
|
||||
#include <ostream>
|
||||
#include <string>
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
@ -16,8 +16,7 @@ class CNNNetwork;
|
||||
struct NetworkCompilationContext final {
|
||||
static std::string calculateFileInfo(const std::string& filePath);
|
||||
|
||||
static std::string computeHash(const CNNNetwork& network,
|
||||
const std::map<std::string, std::string>& compileOptions);
|
||||
static std::string computeHash(const CNNNetwork& network, const std::map<std::string, std::string>& compileOptions);
|
||||
|
||||
static std::string computeHash(const std::string& modelName,
|
||||
const std::map<std::string, std::string>& compileOptions);
|
||||
@ -39,9 +38,9 @@ public:
|
||||
return m_fileInfo;
|
||||
}
|
||||
|
||||
friend std::istream & operator >> (std::istream& stream, CompiledBlobHeader& header);
|
||||
friend std::istream& operator>>(std::istream& stream, CompiledBlobHeader& header);
|
||||
|
||||
friend std::ostream & operator << (std::ostream& stream, const CompiledBlobHeader& header);
|
||||
friend std::ostream& operator<<(std::ostream& stream, const CompiledBlobHeader& header);
|
||||
};
|
||||
|
||||
} // namespace InferenceEngine
|
||||
|
@ -3,27 +3,24 @@
|
||||
//
|
||||
|
||||
#include "cpp/ie_cnn_network.h"
|
||||
#include "exception2status.hpp"
|
||||
|
||||
#include "cnn_network_ngraph_impl.hpp"
|
||||
#include "exception2status.hpp"
|
||||
#include "ie_itt.hpp"
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
CNNNetwork::CNNNetwork() :
|
||||
network(), actual() {
|
||||
}
|
||||
CNNNetwork::CNNNetwork() : network(), actual() {}
|
||||
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
|
||||
CNNNetwork::CNNNetwork(std::shared_ptr<ICNNNetwork> network)
|
||||
: network(network) {
|
||||
CNNNetwork::CNNNetwork(std::shared_ptr<ICNNNetwork> network) : network(network) {
|
||||
actual = network.get();
|
||||
if (actual == nullptr) IE_THROW() << "CNNNetwork was not initialized.";
|
||||
if (actual == nullptr)
|
||||
IE_THROW() << "CNNNetwork was not initialized.";
|
||||
}
|
||||
|
||||
CNNNetwork::CNNNetwork(const std::shared_ptr<ngraph::Function>& graph,
|
||||
const std::vector<IExtensionPtr>& exts) {
|
||||
CNNNetwork::CNNNetwork(const std::shared_ptr<ngraph::Function>& graph, const std::vector<IExtensionPtr>& exts) {
|
||||
OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "CNNNetwork::CNNNetwork");
|
||||
|
||||
if (graph == nullptr) {
|
||||
@ -39,26 +36,30 @@ CNNNetwork::CNNNetwork(const std::shared_ptr<ngraph::Function>& graph,
|
||||
}
|
||||
|
||||
OutputsDataMap CNNNetwork::getOutputsInfo() const {
|
||||
if (actual == nullptr) IE_THROW() << "CNNNetwork was not initialized.";
|
||||
if (actual == nullptr)
|
||||
IE_THROW() << "CNNNetwork was not initialized.";
|
||||
OutputsDataMap outputs;
|
||||
actual->getOutputsInfo(outputs);
|
||||
return outputs;
|
||||
}
|
||||
|
||||
InputsDataMap CNNNetwork::getInputsInfo() const {
|
||||
if (actual == nullptr) IE_THROW() << "CNNNetwork was not initialized.";
|
||||
if (actual == nullptr)
|
||||
IE_THROW() << "CNNNetwork was not initialized.";
|
||||
InputsDataMap inputs;
|
||||
actual->getInputsInfo(inputs);
|
||||
return inputs;
|
||||
}
|
||||
|
||||
size_t CNNNetwork::layerCount() const {
|
||||
if (actual == nullptr) IE_THROW() << "CNNNetwork was not initialized.";
|
||||
if (actual == nullptr)
|
||||
IE_THROW() << "CNNNetwork was not initialized.";
|
||||
return actual->layerCount();
|
||||
}
|
||||
|
||||
const std::string& CNNNetwork::getName() const {
|
||||
if (actual == nullptr) IE_THROW() << "CNNNetwork was not initialized.";
|
||||
if (actual == nullptr)
|
||||
IE_THROW() << "CNNNetwork was not initialized.";
|
||||
return actual->getName();
|
||||
}
|
||||
|
||||
@ -67,7 +68,8 @@ void CNNNetwork::setBatchSize(const size_t size) {
|
||||
}
|
||||
|
||||
size_t CNNNetwork::getBatchSize() const {
|
||||
if (actual == nullptr) IE_THROW() << "CNNNetwork was not initialized.";
|
||||
if (actual == nullptr)
|
||||
IE_THROW() << "CNNNetwork was not initialized.";
|
||||
return actual->getBatchSize();
|
||||
}
|
||||
|
||||
@ -76,22 +78,26 @@ CNNNetwork::operator ICNNNetwork::Ptr() {
|
||||
}
|
||||
|
||||
CNNNetwork::operator ICNNNetwork&() {
|
||||
if (actual == nullptr) IE_THROW() << "CNNNetwork was not initialized.";
|
||||
if (actual == nullptr)
|
||||
IE_THROW() << "CNNNetwork was not initialized.";
|
||||
return *actual;
|
||||
}
|
||||
|
||||
CNNNetwork::operator const ICNNNetwork&() const {
|
||||
if (actual == nullptr) IE_THROW() << "CNNNetwork was not initialized.";
|
||||
if (actual == nullptr)
|
||||
IE_THROW() << "CNNNetwork was not initialized.";
|
||||
return *actual;
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Function> CNNNetwork::getFunction() {
|
||||
if (actual == nullptr) IE_THROW() << "CNNNetwork was not initialized.";
|
||||
if (actual == nullptr)
|
||||
IE_THROW() << "CNNNetwork was not initialized.";
|
||||
return actual->getFunction();
|
||||
}
|
||||
|
||||
std::shared_ptr<const ngraph::Function> CNNNetwork::getFunction() const {
|
||||
if (actual == nullptr) IE_THROW() << "CNNNetwork was not initialized.";
|
||||
if (actual == nullptr)
|
||||
IE_THROW() << "CNNNetwork was not initialized.";
|
||||
return actual->getFunction();
|
||||
}
|
||||
|
||||
@ -100,7 +106,8 @@ void CNNNetwork::addOutput(const std::string& layerName, size_t outputIndex) {
|
||||
}
|
||||
|
||||
ICNNNetwork::InputShapes CNNNetwork::getInputShapes() const {
|
||||
if (actual == nullptr) IE_THROW() << "CNNNetwork was not initialized.";
|
||||
if (actual == nullptr)
|
||||
IE_THROW() << "CNNNetwork was not initialized.";
|
||||
ICNNNetwork::InputShapes shapes;
|
||||
InputsDataMap inputs;
|
||||
actual->getInputsInfo(inputs);
|
||||
|
@ -2,24 +2,27 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "ie_common.h"
|
||||
|
||||
#include "cpp/ie_executable_network.hpp"
|
||||
|
||||
#include "cpp/exception2status.hpp"
|
||||
#include "ie_executable_network_base.hpp"
|
||||
#include "cpp_interfaces/interface/ie_iexecutable_network_internal.hpp"
|
||||
#include "ie_common.h"
|
||||
#include "ie_executable_network_base.hpp"
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
#define EXEC_NET_CALL_STATEMENT(...) \
|
||||
if (_impl == nullptr) IE_THROW(NotAllocated) << "ExecutableNetwork was not initialized."; \
|
||||
try { \
|
||||
__VA_ARGS__; \
|
||||
} catch(...) {details::Rethrow();}
|
||||
#define EXEC_NET_CALL_STATEMENT(...) \
|
||||
if (_impl == nullptr) \
|
||||
IE_THROW(NotAllocated) << "ExecutableNetwork was not initialized."; \
|
||||
try { \
|
||||
__VA_ARGS__; \
|
||||
} catch (...) { \
|
||||
details::Rethrow(); \
|
||||
}
|
||||
|
||||
ExecutableNetwork::ExecutableNetwork(const details::SharedObjectLoader& so,
|
||||
const IExecutableNetworkInternal::Ptr& impl)
|
||||
: _so(so), _impl(impl) {
|
||||
ExecutableNetwork::ExecutableNetwork(const details::SharedObjectLoader& so, const IExecutableNetworkInternal::Ptr& impl)
|
||||
: _so(so),
|
||||
_impl(impl) {
|
||||
IE_ASSERT(_impl != nullptr);
|
||||
}
|
||||
|
||||
@ -34,8 +37,10 @@ ConstInputsDataMap ExecutableNetwork::GetInputsInfo() const {
|
||||
}
|
||||
|
||||
void ExecutableNetwork::reset(IExecutableNetwork::Ptr newActual) {
|
||||
if (_impl == nullptr) IE_THROW() << "ExecutableNetwork was not initialized.";
|
||||
if (newActual == nullptr) IE_THROW() << "ExecutableNetwork wrapper used for reset was not initialized.";
|
||||
if (_impl == nullptr)
|
||||
IE_THROW() << "ExecutableNetwork was not initialized.";
|
||||
if (newActual == nullptr)
|
||||
IE_THROW() << "ExecutableNetwork wrapper used for reset was not initialized.";
|
||||
auto newBase = std::dynamic_pointer_cast<ExecutableNetworkBase>(newActual);
|
||||
IE_ASSERT(newBase != nullptr);
|
||||
auto newImpl = newBase->GetImpl();
|
||||
@ -49,10 +54,10 @@ ExecutableNetwork::operator IExecutableNetwork::Ptr() {
|
||||
|
||||
std::vector<VariableState> ExecutableNetwork::QueryState() {
|
||||
std::vector<VariableState> controller;
|
||||
EXEC_NET_CALL_STATEMENT(
|
||||
for (auto&& state : _impl->QueryState()) {
|
||||
controller.emplace_back(VariableState{ _so, state });
|
||||
});
|
||||
EXEC_NET_CALL_STATEMENT(for (auto&& state
|
||||
: _impl->QueryState()) {
|
||||
controller.emplace_back(VariableState{_so, state});
|
||||
});
|
||||
return controller;
|
||||
}
|
||||
|
||||
|
@ -14,19 +14,20 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <ie_iexecutable_network.hpp>
|
||||
#include <cpp_interfaces/interface/ie_ivariable_state_internal.hpp>
|
||||
#include <cpp_interfaces/interface/ie_iexecutable_network_internal.hpp>
|
||||
#include "cpp/exception2status.hpp"
|
||||
#include "cpp_interfaces/interface/ie_iexecutable_network_internal.hpp"
|
||||
#include "cpp_interfaces/interface/ie_ivariable_state_internal.hpp"
|
||||
#include "ie_iexecutable_network.hpp"
|
||||
#include "ie_infer_async_request_base.hpp"
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
/**
|
||||
* @brief Executable network `noexcept` wrapper which accepts IExecutableNetworkInternal derived instance which can throw exceptions
|
||||
* @brief Executable network `noexcept` wrapper which accepts IExecutableNetworkInternal derived instance which can
|
||||
* throw exceptions
|
||||
* @ingroup ie_dev_api_exec_network_api
|
||||
*/
|
||||
*/
|
||||
class ExecutableNetworkBase : public IExecutableNetwork {
|
||||
protected:
|
||||
std::shared_ptr<IExecutableNetworkInternal> _impl;
|
||||
|
@ -2,28 +2,31 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cpp/ie_infer_request.hpp"
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "ie_remote_context.hpp"
|
||||
|
||||
#include "cpp/ie_infer_request.hpp"
|
||||
#include "cpp/exception2status.hpp"
|
||||
#include "ie_infer_async_request_base.hpp"
|
||||
#include "cpp_interfaces/interface/ie_iinfer_request_internal.hpp"
|
||||
#include "ie_infer_async_request_base.hpp"
|
||||
#include "ie_remote_context.hpp"
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
#define INFER_REQ_CALL_STATEMENT(...) \
|
||||
if (_impl == nullptr) IE_THROW(NotAllocated) << "Inference Request is not initialized"; \
|
||||
try { \
|
||||
__VA_ARGS__ \
|
||||
} catch(...) {details::Rethrow();}
|
||||
#define INFER_REQ_CALL_STATEMENT(...) \
|
||||
if (_impl == nullptr) \
|
||||
IE_THROW(NotAllocated) << "Inference Request is not initialized"; \
|
||||
try { \
|
||||
__VA_ARGS__ \
|
||||
} catch (...) { \
|
||||
details::Rethrow(); \
|
||||
}
|
||||
|
||||
InferRequest::InferRequest(const details::SharedObjectLoader& so,
|
||||
const IInferRequestInternal::Ptr& impl)
|
||||
: _so(so), _impl(impl) {
|
||||
InferRequest::InferRequest(const details::SharedObjectLoader& so, const IInferRequestInternal::Ptr& impl)
|
||||
: _so(so),
|
||||
_impl(impl) {
|
||||
IE_ASSERT(_impl != nullptr);
|
||||
}
|
||||
|
||||
@ -38,12 +41,14 @@ Blob::Ptr InferRequest::GetBlob(const std::string& name) {
|
||||
INFER_REQ_CALL_STATEMENT(blobPtr = _impl->GetBlob(name);)
|
||||
std::string error = "Internal error: blob with name `" + name + "` is not allocated!";
|
||||
const bool remoteBlobPassed = blobPtr->is<RemoteBlob>();
|
||||
if (blobPtr == nullptr) IE_THROW() << error;
|
||||
if (!remoteBlobPassed && blobPtr->buffer() == nullptr) IE_THROW() << error;
|
||||
if (blobPtr == nullptr)
|
||||
IE_THROW() << error;
|
||||
if (!remoteBlobPassed && blobPtr->buffer() == nullptr)
|
||||
IE_THROW() << error;
|
||||
return blobPtr;
|
||||
}
|
||||
|
||||
void InferRequest::SetBlob(const std::string &name, const Blob::Ptr &data, const PreProcessInfo& info) {
|
||||
void InferRequest::SetBlob(const std::string& name, const Blob::Ptr& data, const PreProcessInfo& info) {
|
||||
INFER_REQ_CALL_STATEMENT(_impl->SetBlob(name, data, info);)
|
||||
}
|
||||
|
||||
@ -64,19 +69,11 @@ std::map<std::string, InferenceEngineProfileInfo> InferRequest::GetPerformanceCo
|
||||
}
|
||||
|
||||
void InferRequest::SetInput(const BlobMap& inputs) {
|
||||
INFER_REQ_CALL_STATEMENT(
|
||||
for (auto&& input : inputs) {
|
||||
_impl->SetBlob(input.first, input.second);
|
||||
}
|
||||
)
|
||||
INFER_REQ_CALL_STATEMENT(for (auto&& input : inputs) { _impl->SetBlob(input.first, input.second); })
|
||||
}
|
||||
|
||||
void InferRequest::SetOutput(const BlobMap& results) {
|
||||
INFER_REQ_CALL_STATEMENT(
|
||||
for (auto&& result : results) {
|
||||
_impl->SetBlob(result.first, result.second);
|
||||
}
|
||||
)
|
||||
INFER_REQ_CALL_STATEMENT(for (auto&& result : results) { _impl->SetBlob(result.first, result.second); })
|
||||
}
|
||||
|
||||
void InferRequest::SetBatch(const int batch) {
|
||||
@ -87,92 +84,92 @@ void InferRequest::StartAsync() {
|
||||
INFER_REQ_CALL_STATEMENT(_impl->StartAsync();)
|
||||
}
|
||||
|
||||
|
||||
StatusCode InferRequest::Wait(int64_t millis_timeout) {
|
||||
INFER_REQ_CALL_STATEMENT(return _impl->Wait(millis_timeout);)
|
||||
}
|
||||
|
||||
void InferRequest::SetCompletionCallbackImpl(std::function<void()> callbackToSet) {
|
||||
INFER_REQ_CALL_STATEMENT(
|
||||
_impl->SetCallback([callbackToSet] (std::exception_ptr) {
|
||||
callbackToSet();
|
||||
});
|
||||
)
|
||||
INFER_REQ_CALL_STATEMENT(_impl->SetCallback([callbackToSet](std::exception_ptr) {
|
||||
callbackToSet();
|
||||
});)
|
||||
}
|
||||
|
||||
#define CATCH_IE_EXCEPTION_RETURN(StatusCode, ExceptionType) catch (const ExceptionType&) {return StatusCode;}
|
||||
|
||||
#define CATCH_IE_EXCEPTIONS_RETURN \
|
||||
CATCH_IE_EXCEPTION_RETURN(GENERAL_ERROR, GeneralError) \
|
||||
CATCH_IE_EXCEPTION_RETURN(NOT_IMPLEMENTED, NotImplemented) \
|
||||
CATCH_IE_EXCEPTION_RETURN(NETWORK_NOT_LOADED, NetworkNotLoaded) \
|
||||
CATCH_IE_EXCEPTION_RETURN(PARAMETER_MISMATCH, ParameterMismatch) \
|
||||
CATCH_IE_EXCEPTION_RETURN(NOT_FOUND, NotFound) \
|
||||
CATCH_IE_EXCEPTION_RETURN(OUT_OF_BOUNDS, OutOfBounds) \
|
||||
CATCH_IE_EXCEPTION_RETURN(UNEXPECTED, Unexpected) \
|
||||
CATCH_IE_EXCEPTION_RETURN(REQUEST_BUSY, RequestBusy) \
|
||||
CATCH_IE_EXCEPTION_RETURN(RESULT_NOT_READY, ResultNotReady) \
|
||||
CATCH_IE_EXCEPTION_RETURN(NOT_ALLOCATED, NotAllocated) \
|
||||
CATCH_IE_EXCEPTION_RETURN(INFER_NOT_STARTED, InferNotStarted) \
|
||||
CATCH_IE_EXCEPTION_RETURN(NETWORK_NOT_READ, NetworkNotRead) \
|
||||
CATCH_IE_EXCEPTION_RETURN(INFER_CANCELLED, InferCancelled)
|
||||
#define CATCH_IE_EXCEPTION_RETURN(StatusCode, ExceptionType) \
|
||||
catch (const ExceptionType&) { \
|
||||
return StatusCode; \
|
||||
}
|
||||
|
||||
#define CATCH_IE_EXCEPTIONS_RETURN \
|
||||
CATCH_IE_EXCEPTION_RETURN(GENERAL_ERROR, GeneralError) \
|
||||
CATCH_IE_EXCEPTION_RETURN(NOT_IMPLEMENTED, NotImplemented) \
|
||||
CATCH_IE_EXCEPTION_RETURN(NETWORK_NOT_LOADED, NetworkNotLoaded) \
|
||||
CATCH_IE_EXCEPTION_RETURN(PARAMETER_MISMATCH, ParameterMismatch) \
|
||||
CATCH_IE_EXCEPTION_RETURN(NOT_FOUND, NotFound) \
|
||||
CATCH_IE_EXCEPTION_RETURN(OUT_OF_BOUNDS, OutOfBounds) \
|
||||
CATCH_IE_EXCEPTION_RETURN(UNEXPECTED, Unexpected) \
|
||||
CATCH_IE_EXCEPTION_RETURN(REQUEST_BUSY, RequestBusy) \
|
||||
CATCH_IE_EXCEPTION_RETURN(RESULT_NOT_READY, ResultNotReady) \
|
||||
CATCH_IE_EXCEPTION_RETURN(NOT_ALLOCATED, NotAllocated) \
|
||||
CATCH_IE_EXCEPTION_RETURN(INFER_NOT_STARTED, InferNotStarted) \
|
||||
CATCH_IE_EXCEPTION_RETURN(NETWORK_NOT_READ, NetworkNotRead) \
|
||||
CATCH_IE_EXCEPTION_RETURN(INFER_CANCELLED, InferCancelled)
|
||||
|
||||
void InferRequest::SetCompletionCallbackImpl(std::function<void(InferRequest, StatusCode)> callbackToSet) {
|
||||
INFER_REQ_CALL_STATEMENT(
|
||||
auto weakThis = InferRequest{_so, std::shared_ptr<IInferRequestInternal>{_impl.get(), [](IInferRequestInternal*){}}};
|
||||
_impl->SetCallback([callbackToSet, weakThis] (std::exception_ptr exceptionPtr) {
|
||||
auto weakThis =
|
||||
InferRequest{_so, std::shared_ptr<IInferRequestInternal>{_impl.get(), [](IInferRequestInternal*) {}}};
|
||||
_impl->SetCallback([callbackToSet, weakThis](std::exception_ptr exceptionPtr) {
|
||||
StatusCode statusCode = StatusCode::OK;
|
||||
if (exceptionPtr != nullptr) {
|
||||
statusCode = [&] {
|
||||
try {
|
||||
std::rethrow_exception(exceptionPtr);
|
||||
} CATCH_IE_EXCEPTIONS_RETURN catch (const std::exception&) {
|
||||
}
|
||||
CATCH_IE_EXCEPTIONS_RETURN catch (const std::exception&) {
|
||||
return GENERAL_ERROR;
|
||||
} catch (...) {
|
||||
}
|
||||
catch (...) {
|
||||
return UNEXPECTED;
|
||||
}
|
||||
} ();
|
||||
}();
|
||||
}
|
||||
callbackToSet(weakThis, statusCode);
|
||||
});
|
||||
)
|
||||
});)
|
||||
}
|
||||
|
||||
void InferRequest::SetCompletionCallbackImpl(IInferRequest::CompletionCallback callbackToSet) {
|
||||
INFER_REQ_CALL_STATEMENT(
|
||||
IInferRequest::Ptr weakThis = InferRequest{_so, std::shared_ptr<IInferRequestInternal>{_impl.get(), [](IInferRequestInternal*){}}};
|
||||
_impl->SetCallback([callbackToSet, weakThis] (std::exception_ptr exceptionPtr) {
|
||||
IInferRequest::Ptr weakThis =
|
||||
InferRequest{_so, std::shared_ptr<IInferRequestInternal>{_impl.get(), [](IInferRequestInternal*) {}}};
|
||||
_impl->SetCallback([callbackToSet, weakThis](std::exception_ptr exceptionPtr) {
|
||||
StatusCode statusCode = StatusCode::OK;
|
||||
if (exceptionPtr != nullptr) {
|
||||
statusCode = [&] {
|
||||
try {
|
||||
std::rethrow_exception(exceptionPtr);
|
||||
} CATCH_IE_EXCEPTIONS_RETURN catch (const std::exception&) {
|
||||
}
|
||||
CATCH_IE_EXCEPTIONS_RETURN catch (const std::exception&) {
|
||||
return GENERAL_ERROR;
|
||||
} catch (...) {
|
||||
}
|
||||
catch (...) {
|
||||
return UNEXPECTED;
|
||||
}
|
||||
} ();
|
||||
}();
|
||||
}
|
||||
callbackToSet(weakThis, statusCode);
|
||||
});
|
||||
)
|
||||
});)
|
||||
}
|
||||
|
||||
InferRequest::operator IInferRequest::Ptr () {
|
||||
INFER_REQ_CALL_STATEMENT(
|
||||
return std::make_shared<InferRequestBase>(_impl);
|
||||
)
|
||||
InferRequest::operator IInferRequest::Ptr() {
|
||||
INFER_REQ_CALL_STATEMENT(return std::make_shared<InferRequestBase>(_impl);)
|
||||
}
|
||||
|
||||
std::vector<VariableState> InferRequest::QueryState() {
|
||||
std::vector<VariableState> controller;
|
||||
INFER_REQ_CALL_STATEMENT(
|
||||
for (auto&& state : _impl->QueryState()) {
|
||||
controller.emplace_back(VariableState{_so, state});
|
||||
}
|
||||
)
|
||||
INFER_REQ_CALL_STATEMENT(for (auto&& state
|
||||
: _impl->QueryState()) {
|
||||
controller.emplace_back(VariableState{_so, state});
|
||||
})
|
||||
return controller;
|
||||
}
|
||||
|
||||
|
@ -2,23 +2,27 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "details/ie_so_loader.h"
|
||||
#include "cpp/ie_memory_state.hpp"
|
||||
#include "cpp_interfaces/interface/ie_ivariable_state_internal.hpp"
|
||||
#include "details/ie_so_loader.h"
|
||||
#include "exception2status.hpp"
|
||||
|
||||
#define VARIABLE_CALL_STATEMENT(...) \
|
||||
if (_impl == nullptr) IE_THROW(NotAllocated) << "VariableState was not initialized."; \
|
||||
try { \
|
||||
__VA_ARGS__; \
|
||||
} catch(...) {details::Rethrow();}
|
||||
#define VARIABLE_CALL_STATEMENT(...) \
|
||||
if (_impl == nullptr) \
|
||||
IE_THROW(NotAllocated) << "VariableState was not initialized."; \
|
||||
try { \
|
||||
__VA_ARGS__; \
|
||||
} catch (...) { \
|
||||
details::Rethrow(); \
|
||||
}
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
VariableState::VariableState(const details::SharedObjectLoader& so,
|
||||
const IVariableStateInternal::Ptr& impl)
|
||||
: _so(so), _impl(impl) {
|
||||
if (_impl == nullptr) IE_THROW() << "VariableState was not initialized.";
|
||||
VariableState::VariableState(const details::SharedObjectLoader& so, const IVariableStateInternal::Ptr& impl)
|
||||
: _so(so),
|
||||
_impl(impl) {
|
||||
if (_impl == nullptr)
|
||||
IE_THROW() << "VariableState was not initialized.";
|
||||
}
|
||||
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
|
@ -2,19 +2,19 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <cpp_interfaces/interface/ie_iplugin_internal.hpp>
|
||||
#include <cpp_interfaces/interface/ie_iexecutable_network_internal.hpp>
|
||||
#include <cpp_interfaces/interface/ie_iinfer_request_internal.hpp>
|
||||
#include "cpp_interfaces/interface/ie_iexecutable_network_internal.hpp"
|
||||
|
||||
#include <fstream>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <fstream>
|
||||
|
||||
#include <ie_icore.hpp>
|
||||
#include <ie_parameter.hpp>
|
||||
#include <cpp/ie_cnn_network.h>
|
||||
#include "cpp/ie_cnn_network.h"
|
||||
#include "cpp_interfaces/interface/ie_iinfer_request_internal.hpp"
|
||||
#include "cpp_interfaces/interface/ie_iplugin_internal.hpp"
|
||||
#include "ie_icore.hpp"
|
||||
#include "ie_parameter.hpp"
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
@ -90,8 +90,9 @@ std::shared_ptr<RemoteContext> IExecutableNetworkInternal::GetContext() const {
|
||||
IE_THROW(NotImplemented);
|
||||
}
|
||||
|
||||
std::shared_ptr<IInferRequestInternal> IExecutableNetworkInternal::CreateInferRequestImpl(InputsDataMap networkInputs,
|
||||
OutputsDataMap networkOutputs) {
|
||||
std::shared_ptr<IInferRequestInternal> IExecutableNetworkInternal::CreateInferRequestImpl(
|
||||
InputsDataMap networkInputs,
|
||||
OutputsDataMap networkOutputs) {
|
||||
IE_THROW(NotImplemented);
|
||||
}
|
||||
|
||||
|
@ -2,31 +2,30 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "cpp_interfaces/interface/ie_iinfer_request_internal.hpp"
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include <ie_blob.h>
|
||||
#include <ie_common.h>
|
||||
#include <ie_preprocess.hpp>
|
||||
#include <ie_compound_blob.h>
|
||||
#include <ie_algorithm.hpp>
|
||||
#include <ie_remote_context.hpp>
|
||||
#include <debug.h>
|
||||
#include <cpp_interfaces/interface/ie_iinfer_request_internal.hpp>
|
||||
#include <cpp_interfaces/interface/ie_iplugin_internal.hpp>
|
||||
#include <cpp_interfaces/plugin_itt.hpp>
|
||||
|
||||
#include "cpp_interfaces/interface/ie_iplugin_internal.hpp"
|
||||
#include "cpp_interfaces/plugin_itt.hpp"
|
||||
#include "debug.h"
|
||||
#include "ie_algorithm.hpp"
|
||||
#include "ie_blob.h"
|
||||
#include "ie_common.h"
|
||||
#include "ie_compound_blob.h"
|
||||
#include "ie_preprocess.hpp"
|
||||
#include "ie_remote_context.hpp"
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
IInferRequestInternal::~IInferRequestInternal() {}
|
||||
|
||||
IInferRequestInternal::IInferRequestInternal(const InputsDataMap& networkInputs, const OutputsDataMap& networkOutputs) :
|
||||
// We should copy maps since they can be overriden in SetBlob with preprocess
|
||||
_networkInputs{copyInfo(networkInputs)},
|
||||
_networkOutputs{copyInfo(networkOutputs)} {
|
||||
}
|
||||
IInferRequestInternal::IInferRequestInternal(const InputsDataMap& networkInputs, const OutputsDataMap& networkOutputs)
|
||||
: // We should copy maps since they can be overriden in SetBlob with preprocess
|
||||
_networkInputs{copyInfo(networkInputs)},
|
||||
_networkOutputs{copyInfo(networkOutputs)} {}
|
||||
|
||||
void IInferRequestInternal::Infer() {
|
||||
checkBlobs();
|
||||
@ -50,9 +49,10 @@ void IInferRequestInternal::SetBlob(const std::string& name, const Blob::Ptr& us
|
||||
if (name.empty()) {
|
||||
IE_THROW(NotFound) << "Failed to set blob with empty name";
|
||||
}
|
||||
if (!userBlob) IE_THROW(NotAllocated) << "Failed to set empty blob with name: \'" << name << "\'";
|
||||
if (!userBlob)
|
||||
IE_THROW(NotAllocated) << "Failed to set empty blob with name: \'" << name << "\'";
|
||||
const bool compoundBlobPassed = userBlob->is<CompoundBlob>();
|
||||
const bool remoteBlobPassed = userBlob->is<RemoteBlob>();
|
||||
const bool remoteBlobPassed = userBlob->is<RemoteBlob>();
|
||||
if (!compoundBlobPassed && !remoteBlobPassed && userBlob->buffer() == nullptr)
|
||||
IE_THROW(NotAllocated) << "Input data was not allocated. Input name: \'" << name << "\'";
|
||||
if (userBlob->size() == 0) {
|
||||
@ -66,7 +66,8 @@ void IInferRequestInternal::SetBlob(const std::string& name, const Blob::Ptr& us
|
||||
// ilavreno: the condition below is obsolete, but we need an exact list of precisions
|
||||
// which are supports by G-API preprocessing
|
||||
if (foundInput->getPrecision() != userBlob->getTensorDesc().getPrecision()) {
|
||||
IE_THROW(ParameterMismatch) << "Failed to set Blob with precision not corresponding to user input precision";
|
||||
IE_THROW(ParameterMismatch)
|
||||
<< "Failed to set Blob with precision not corresponding to user input precision";
|
||||
}
|
||||
|
||||
auto& devBlob = _deviceInputs[name];
|
||||
@ -79,10 +80,11 @@ void IInferRequestInternal::SetBlob(const std::string& name, const Blob::Ptr& us
|
||||
addInputPreProcessingFor(name, userBlob, devBlob ? devBlob : _inputs[name]);
|
||||
} else {
|
||||
size_t inputSize = foundInput->getTensorDesc().getLayout() != InferenceEngine::Layout::SCALAR
|
||||
? InferenceEngine::details::product(foundInput->getTensorDesc().getDims())
|
||||
: 1;
|
||||
? InferenceEngine::details::product(foundInput->getTensorDesc().getDims())
|
||||
: 1;
|
||||
if (dataSize != inputSize) {
|
||||
IE_THROW() << "Input blob size is not equal network input size (" << dataSize << "!=" << inputSize << ").";
|
||||
IE_THROW() << "Input blob size is not equal network input size (" << dataSize << "!=" << inputSize
|
||||
<< ").";
|
||||
}
|
||||
_inputs[name] = userBlob;
|
||||
devBlob = userBlob;
|
||||
@ -92,13 +94,15 @@ void IInferRequestInternal::SetBlob(const std::string& name, const Blob::Ptr& us
|
||||
IE_THROW(NotImplemented) << "cannot set compound blob: supported only for input pre-processing";
|
||||
}
|
||||
size_t outputSize = foundOutput->getTensorDesc().getLayout() != InferenceEngine::Layout::SCALAR
|
||||
? details::product(foundOutput->getTensorDesc().getDims()) :
|
||||
1;
|
||||
? details::product(foundOutput->getTensorDesc().getDims())
|
||||
: 1;
|
||||
if (dataSize != outputSize) {
|
||||
IE_THROW() << "Output blob size is not equal network output size (" << dataSize << "!=" << outputSize << ").";
|
||||
IE_THROW() << "Output blob size is not equal network output size (" << dataSize << "!=" << outputSize
|
||||
<< ").";
|
||||
}
|
||||
if (foundOutput->getPrecision() != userBlob->getTensorDesc().getPrecision()) {
|
||||
IE_THROW(ParameterMismatch) << "Failed to set Blob with precision not corresponding to user output precision";
|
||||
IE_THROW(ParameterMismatch)
|
||||
<< "Failed to set Blob with precision not corresponding to user output precision";
|
||||
}
|
||||
// ilavreno: this condition is valid for most plugins except MYRIAD
|
||||
// it is able to perform layout conversion for output blob dynamically
|
||||
@ -114,7 +118,7 @@ Blob::Ptr IInferRequestInternal::GetBlob(const std::string& name) {
|
||||
Blob::Ptr data;
|
||||
InputInfo::Ptr foundInput;
|
||||
DataPtr foundOutput;
|
||||
const SizeVector oneVector = { 1 };
|
||||
const SizeVector oneVector = {1};
|
||||
if (findInputAndOutputBlobByName(name, foundInput, foundOutput)) {
|
||||
// ROI blob is returned only if it was set previously. Otherwise default blob is returned.
|
||||
auto it = _preProcData.find(name);
|
||||
@ -122,10 +126,11 @@ Blob::Ptr IInferRequestInternal::GetBlob(const std::string& name) {
|
||||
data = it->second->getRoiBlob();
|
||||
} else {
|
||||
data = _inputs[name];
|
||||
checkBlob(data, name, true,
|
||||
foundInput->getTensorDesc().getLayout() != SCALAR
|
||||
? foundInput->getTensorDesc().getDims()
|
||||
: oneVector);
|
||||
checkBlob(
|
||||
data,
|
||||
name,
|
||||
true,
|
||||
foundInput->getTensorDesc().getLayout() != SCALAR ? foundInput->getTensorDesc().getDims() : oneVector);
|
||||
|
||||
auto& devBlob = _deviceInputs[name];
|
||||
if (preProcessingRequired(foundInput, data, devBlob)) {
|
||||
@ -135,10 +140,11 @@ Blob::Ptr IInferRequestInternal::GetBlob(const std::string& name) {
|
||||
}
|
||||
} else {
|
||||
data = _outputs[name];
|
||||
checkBlob(data, name, false,
|
||||
foundOutput->getTensorDesc().getLayout() != SCALAR
|
||||
? foundOutput->getTensorDesc().getDims()
|
||||
: oneVector);
|
||||
checkBlob(
|
||||
data,
|
||||
name,
|
||||
false,
|
||||
foundOutput->getTensorDesc().getLayout() != SCALAR ? foundOutput->getTensorDesc().getDims() : oneVector);
|
||||
}
|
||||
return data;
|
||||
}
|
||||
@ -147,7 +153,7 @@ void IInferRequestInternal::SetBlob(const std::string& name, const Blob::Ptr& da
|
||||
InputInfo::Ptr foundInput;
|
||||
DataPtr foundOutput;
|
||||
if (findInputAndOutputBlobByName(name, foundInput, foundOutput)) {
|
||||
foundInput->getPreProcess() = copyPreProcess(info);
|
||||
foundInput->getPreProcess() = copyPreProcess(info);
|
||||
} else {
|
||||
IE_THROW() << "Pre-process can't be set to output blob";
|
||||
}
|
||||
@ -201,17 +207,21 @@ void IInferRequestInternal::execDataPreprocessing(InferenceEngine::BlobMap& prep
|
||||
}
|
||||
}
|
||||
|
||||
bool IInferRequestInternal::findInputAndOutputBlobByName(const std::string& name, InputInfo::Ptr& foundInput, DataPtr& foundOutput) const {
|
||||
bool IInferRequestInternal::findInputAndOutputBlobByName(const std::string& name,
|
||||
InputInfo::Ptr& foundInput,
|
||||
DataPtr& foundOutput) const {
|
||||
foundInput = nullptr;
|
||||
foundOutput = nullptr;
|
||||
if (_networkOutputs.empty()) {
|
||||
IE_THROW() << "Internal error: network outputs is not set";
|
||||
}
|
||||
auto foundInputPair = std::find_if(std::begin(_networkInputs), std::end(_networkInputs),
|
||||
[&](const std::pair<std::string, InputInfo::Ptr>& pair) {
|
||||
return pair.first == name;
|
||||
});
|
||||
auto foundOutputPair = std::find_if(std::begin(_networkOutputs), std::end(_networkOutputs),
|
||||
auto foundInputPair = std::find_if(std::begin(_networkInputs),
|
||||
std::end(_networkInputs),
|
||||
[&](const std::pair<std::string, InputInfo::Ptr>& pair) {
|
||||
return pair.first == name;
|
||||
});
|
||||
auto foundOutputPair = std::find_if(std::begin(_networkOutputs),
|
||||
std::end(_networkOutputs),
|
||||
[&](const std::pair<std::string, DataPtr>& pair) {
|
||||
return pair.first == name;
|
||||
});
|
||||
@ -229,7 +239,10 @@ bool IInferRequestInternal::findInputAndOutputBlobByName(const std::string& name
|
||||
return retVal;
|
||||
}
|
||||
|
||||
void IInferRequestInternal::checkBlob(const Blob::Ptr& blob, const std::string& name, bool isInput, const SizeVector& refDims) const {
|
||||
void IInferRequestInternal::checkBlob(const Blob::Ptr& blob,
|
||||
const std::string& name,
|
||||
bool isInput,
|
||||
const SizeVector& refDims) const {
|
||||
std::string bType = isInput ? "Input" : "Output";
|
||||
std::string sType = isInput ? "input" : "output";
|
||||
std::string strNotAllocated(bType + " data was not allocated.");
|
||||
@ -242,19 +255,19 @@ void IInferRequestInternal::checkBlob(const Blob::Ptr& blob, const std::string&
|
||||
if (refDims.empty()) {
|
||||
SizeVector dims;
|
||||
if (isInput) {
|
||||
auto foundInputPair = std::find_if(std::begin(_networkInputs), std::end(_networkInputs),
|
||||
[&](const std::pair<std::string, InputInfo::Ptr>& pair) {
|
||||
return pair.first == name;
|
||||
});
|
||||
auto foundInputPair = std::find_if(std::begin(_networkInputs),
|
||||
std::end(_networkInputs),
|
||||
[&](const std::pair<std::string, InputInfo::Ptr>& pair) {
|
||||
return pair.first == name;
|
||||
});
|
||||
if (foundInputPair == std::end(_networkInputs)) {
|
||||
IE_THROW(NotFound) << "Failed to find input with name: \'" << name << "\'";
|
||||
}
|
||||
dims = foundInputPair->second->getTensorDesc().getDims();
|
||||
refSize = foundInputPair->second->getTensorDesc().getLayout() != SCALAR
|
||||
? details::product(dims)
|
||||
: 1;
|
||||
refSize = foundInputPair->second->getTensorDesc().getLayout() != SCALAR ? details::product(dims) : 1;
|
||||
} else {
|
||||
auto foundOutputPair = std::find_if(std::begin(_networkOutputs), std::end(_networkOutputs),
|
||||
auto foundOutputPair = std::find_if(std::begin(_networkOutputs),
|
||||
std::end(_networkOutputs),
|
||||
[&](const std::pair<std::string, DataPtr>& pair) {
|
||||
return pair.first == name;
|
||||
});
|
||||
@ -262,9 +275,7 @@ void IInferRequestInternal::checkBlob(const Blob::Ptr& blob, const std::string&
|
||||
IE_THROW(NotFound) << "Failed to find output with name: \'" << name << "\'";
|
||||
}
|
||||
dims = foundOutputPair->second->getTensorDesc().getDims();
|
||||
refSize = foundOutputPair->second->getTensorDesc().getLayout() != SCALAR
|
||||
? details::product(dims)
|
||||
: 1;
|
||||
refSize = foundOutputPair->second->getTensorDesc().getLayout() != SCALAR ? details::product(dims) : 1;
|
||||
}
|
||||
} else {
|
||||
refSize = details::product(refDims);
|
||||
@ -274,7 +285,8 @@ void IInferRequestInternal::checkBlob(const Blob::Ptr& blob, const std::string&
|
||||
IE_THROW() << strNotMatched + ": got " << blob->size() << " expecting " << refSize;
|
||||
}
|
||||
const bool remoteBlobPassed = blob->is<RemoteBlob>();
|
||||
if (!remoteBlobPassed && blob->buffer() == nullptr) IE_THROW() << strNotAllocated;
|
||||
if (!remoteBlobPassed && blob->buffer() == nullptr)
|
||||
IE_THROW() << strNotAllocated;
|
||||
}
|
||||
|
||||
void IInferRequestInternal::checkBlobs() {
|
||||
@ -286,11 +298,14 @@ void IInferRequestInternal::checkBlobs() {
|
||||
}
|
||||
}
|
||||
|
||||
void IInferRequestInternal::setPointerToExecutableNetworkInternal(const std::shared_ptr<IExecutableNetworkInternal>& exeNetwork) {
|
||||
void IInferRequestInternal::setPointerToExecutableNetworkInternal(
|
||||
const std::shared_ptr<IExecutableNetworkInternal>& exeNetwork) {
|
||||
_exeNetwork = exeNetwork;
|
||||
}
|
||||
|
||||
bool IInferRequestInternal::preProcessingRequired(const InputInfo::Ptr& info, const Blob::Ptr& userBlob, const Blob::Ptr& deviceBlob) {
|
||||
bool IInferRequestInternal::preProcessingRequired(const InputInfo::Ptr& info,
|
||||
const Blob::Ptr& userBlob,
|
||||
const Blob::Ptr& deviceBlob) {
|
||||
// pre-processing is required if:
|
||||
// 1. resize algorithm is specified (resize required)
|
||||
// 2. color format specified:
|
||||
@ -305,30 +320,34 @@ bool IInferRequestInternal::preProcessingRequired(const InputInfo::Ptr& info, co
|
||||
const auto networkColorFormat = ColorFormat::BGR;
|
||||
const bool colorFormatSpecified = inputColorFormat != ColorFormat::RAW;
|
||||
|
||||
auto blob_layout = [](const Blob::Ptr& b) { return b->getTensorDesc().getLayout(); };
|
||||
auto blob_prec = [](const Blob::Ptr& b) { return b->getTensorDesc().getPrecision();};
|
||||
auto blob_layout = [](const Blob::Ptr& b) {
|
||||
return b->getTensorDesc().getLayout();
|
||||
};
|
||||
auto blob_prec = [](const Blob::Ptr& b) {
|
||||
return b->getTensorDesc().getPrecision();
|
||||
};
|
||||
|
||||
auto dst_layout = deviceBlob ? blob_layout(deviceBlob) : info->getLayout();
|
||||
auto dst_prec = deviceBlob ? blob_prec(deviceBlob) : info->getPrecision();
|
||||
auto dst_prec = deviceBlob ? blob_prec(deviceBlob) : info->getPrecision();
|
||||
|
||||
//FIXME: remove the first part to allow any needed conversion?
|
||||
const bool need_layout_conv = (colorFormatSpecified || deviceBlob) &&
|
||||
(blob_layout(userBlob) != dst_layout);
|
||||
// FIXME: remove the first part to allow any needed conversion?
|
||||
const bool need_layout_conv = (colorFormatSpecified || deviceBlob) && (blob_layout(userBlob) != dst_layout);
|
||||
|
||||
return preProcessInfo.getResizeAlgorithm() != ResizeAlgorithm::NO_RESIZE ||
|
||||
(colorFormatSpecified && inputColorFormat != networkColorFormat) ||
|
||||
need_layout_conv ||
|
||||
(blob_prec(userBlob) != dst_prec);
|
||||
(colorFormatSpecified && inputColorFormat != networkColorFormat) || need_layout_conv ||
|
||||
(blob_prec(userBlob) != dst_prec);
|
||||
}
|
||||
|
||||
void IInferRequestInternal::addInputPreProcessingFor(const std::string& name, Blob::Ptr const& from, const Blob::Ptr& to) {
|
||||
void IInferRequestInternal::addInputPreProcessingFor(const std::string& name,
|
||||
Blob::Ptr const& from,
|
||||
const Blob::Ptr& to) {
|
||||
auto ppDataIt = _preProcData.find(name);
|
||||
if (ppDataIt == _preProcData.end()) {
|
||||
ppDataIt = (_preProcData.emplace(name, CreatePreprocDataHelper())).first;
|
||||
}
|
||||
|
||||
auto& preproc_ptr = ppDataIt->second;
|
||||
preproc_ptr->isApplicable(from, to);
|
||||
preproc_ptr->isApplicable(from, to);
|
||||
// Stores the given blob as ROI blob. It will be used to fill in network input
|
||||
// during pre-processing
|
||||
preproc_ptr->setRoiBlob(from);
|
||||
|
@ -7,20 +7,20 @@
|
||||
* @file ie_iplugin_internal.hpp
|
||||
*/
|
||||
|
||||
#include <cpp_interfaces/interface/ie_iplugin_internal.hpp>
|
||||
#include <ie_iextension.h>
|
||||
#include <ie_input_info.hpp>
|
||||
#include <ie_icore.hpp>
|
||||
#include <ie_parameter.hpp>
|
||||
#include "cpp_interfaces/interface/ie_iplugin_internal.hpp"
|
||||
|
||||
#include <blob_factory.hpp>
|
||||
|
||||
#include <istream>
|
||||
#include <fstream>
|
||||
#include <istream>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "blob_factory.hpp"
|
||||
#include "ie_icore.hpp"
|
||||
#include "ie_iextension.h"
|
||||
#include "ie_input_info.hpp"
|
||||
#include "ie_parameter.hpp"
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
PreProcessInfo copyPreProcess(const PreProcessInfo& from) {
|
||||
@ -99,21 +99,24 @@ void IInferencePlugin::SetName(const std::string& pluginName) noexcept {
|
||||
_pluginName = pluginName;
|
||||
}
|
||||
|
||||
std::shared_ptr<IExecutableNetworkInternal> IInferencePlugin::LoadNetwork(const CNNNetwork& network,
|
||||
const std::map<std::string, std::string>& config) {
|
||||
std::shared_ptr<IExecutableNetworkInternal> IInferencePlugin::LoadNetwork(
|
||||
const CNNNetwork& network,
|
||||
const std::map<std::string, std::string>& config) {
|
||||
return LoadNetwork(network, config, nullptr);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
template <typename T>
|
||||
std::map<std::string, std::shared_ptr<const T>> const_map_cast(const std::map<std::string, std::shared_ptr<T>>& map) {
|
||||
std::map<std::string, std::shared_ptr<const T>> res;
|
||||
for (auto&& v : map) res.emplace(v.first, std::const_pointer_cast<const T>(v.second));
|
||||
for (auto&& v : map)
|
||||
res.emplace(v.first, std::const_pointer_cast<const T>(v.second));
|
||||
return res;
|
||||
}
|
||||
|
||||
std::shared_ptr<IExecutableNetworkInternal> IInferencePlugin::LoadNetwork(const CNNNetwork& network,
|
||||
const std::map<std::string, std::string>& config,
|
||||
const std::shared_ptr<RemoteContext>& context) {
|
||||
std::shared_ptr<IExecutableNetworkInternal> IInferencePlugin::LoadNetwork(
|
||||
const CNNNetwork& network,
|
||||
const std::map<std::string, std::string>& config,
|
||||
const std::shared_ptr<RemoteContext>& context) {
|
||||
std::shared_ptr<IExecutableNetworkInternal> impl;
|
||||
if (nullptr == context) {
|
||||
impl = LoadExeNetworkImpl(network, config);
|
||||
@ -126,8 +129,9 @@ std::shared_ptr<IExecutableNetworkInternal> IInferencePlugin::LoadNetwork(const
|
||||
return impl;
|
||||
}
|
||||
|
||||
std::shared_ptr<IExecutableNetworkInternal> IInferencePlugin::LoadNetwork(const std::string& modelPath,
|
||||
const std::map<std::string, std::string>& config) {
|
||||
std::shared_ptr<IExecutableNetworkInternal> IInferencePlugin::LoadNetwork(
|
||||
const std::string& modelPath,
|
||||
const std::map<std::string, std::string>& config) {
|
||||
auto cnnNet = GetCore()->ReadNetwork(modelPath, std::string());
|
||||
return GetCore()->LoadNetwork(cnnNet, GetName(), config);
|
||||
}
|
||||
@ -140,13 +144,11 @@ void IInferencePlugin::SetConfig(const std::map<std::string, std::string>&) {
|
||||
IE_THROW(NotImplemented);
|
||||
}
|
||||
|
||||
Parameter IInferencePlugin::GetConfig(const std::string&,
|
||||
const std::map<std::string, Parameter>&) const {
|
||||
Parameter IInferencePlugin::GetConfig(const std::string&, const std::map<std::string, Parameter>&) const {
|
||||
IE_THROW(NotImplemented);
|
||||
}
|
||||
|
||||
Parameter IInferencePlugin::GetMetric(const std::string&,
|
||||
const std::map<std::string, Parameter>&) const {
|
||||
Parameter IInferencePlugin::GetMetric(const std::string&, const std::map<std::string, Parameter>&) const {
|
||||
IE_THROW(NotImplemented);
|
||||
}
|
||||
|
||||
@ -158,8 +160,9 @@ RemoteContext::Ptr IInferencePlugin::GetDefaultContext(const ParamMap&) {
|
||||
IE_THROW(NotImplemented);
|
||||
}
|
||||
|
||||
std::shared_ptr<IExecutableNetworkInternal> IInferencePlugin::ImportNetwork(const std::string& modelFileName,
|
||||
const std::map<std::string, std::string>& config) {
|
||||
std::shared_ptr<IExecutableNetworkInternal> IInferencePlugin::ImportNetwork(
|
||||
const std::string& modelFileName,
|
||||
const std::map<std::string, std::string>& config) {
|
||||
std::ifstream blobFile(modelFileName, std::ios::binary);
|
||||
|
||||
if (!blobFile.is_open()) {
|
||||
@ -169,15 +172,17 @@ std::shared_ptr<IExecutableNetworkInternal> IInferencePlugin::ImportNetwork(cons
|
||||
return ImportNetwork(blobFile, config);
|
||||
}
|
||||
|
||||
std::shared_ptr<IExecutableNetworkInternal> IInferencePlugin::ImportNetwork(std::istream& networkModel,
|
||||
const std::map<std::string, std::string>& config) {
|
||||
std::shared_ptr<IExecutableNetworkInternal> IInferencePlugin::ImportNetwork(
|
||||
std::istream& networkModel,
|
||||
const std::map<std::string, std::string>& config) {
|
||||
IE_THROW(NotImplemented);
|
||||
}
|
||||
|
||||
std::shared_ptr<IExecutableNetworkInternal> IInferencePlugin::ImportNetwork(std::istream& networkModel,
|
||||
const std::shared_ptr<RemoteContext>& context,
|
||||
const std::map<std::string, std::string>& config) {
|
||||
IE_THROW(NotImplemented);
|
||||
std::shared_ptr<IExecutableNetworkInternal> IInferencePlugin::ImportNetwork(
|
||||
std::istream& networkModel,
|
||||
const std::shared_ptr<RemoteContext>& context,
|
||||
const std::map<std::string, std::string>& config) {
|
||||
IE_THROW(NotImplemented);
|
||||
}
|
||||
|
||||
void IInferencePlugin::SetCore(std::weak_ptr<ICore> core) {
|
||||
@ -194,14 +199,16 @@ QueryNetworkResult IInferencePlugin::QueryNetwork(const CNNNetwork& network,
|
||||
IE_THROW(NotImplemented);
|
||||
}
|
||||
|
||||
std::shared_ptr<IExecutableNetworkInternal> IInferencePlugin::LoadExeNetworkImpl(const CNNNetwork&,
|
||||
const std::map<std::string, std::string>&) {
|
||||
IE_THROW(NotImplemented);
|
||||
std::shared_ptr<IExecutableNetworkInternal> IInferencePlugin::LoadExeNetworkImpl(
|
||||
const CNNNetwork&,
|
||||
const std::map<std::string, std::string>&) {
|
||||
IE_THROW(NotImplemented);
|
||||
}
|
||||
|
||||
std::shared_ptr<IExecutableNetworkInternal> IInferencePlugin::LoadExeNetworkImpl(const CNNNetwork&,
|
||||
const std::shared_ptr<RemoteContext>&,
|
||||
const std::map<std::string, std::string>&) {
|
||||
std::shared_ptr<IExecutableNetworkInternal> IInferencePlugin::LoadExeNetworkImpl(
|
||||
const CNNNetwork&,
|
||||
const std::shared_ptr<RemoteContext>&,
|
||||
const std::map<std::string, std::string>&) {
|
||||
IE_THROW(NotImplemented);
|
||||
}
|
||||
|
||||
@ -215,4 +222,4 @@ void IInferencePlugin::SetExeNetworkInfo(const std::shared_ptr<IExecutableNetwor
|
||||
exeNetwork->SetPointerToPlugin(shared_from_this());
|
||||
}
|
||||
|
||||
} // namespace InferenceEngine
|
||||
} // namespace InferenceEngine
|
||||
|
@ -15,7 +15,7 @@ void IVariableStateInternal::Reset() {
|
||||
IE_THROW(NotImplemented);
|
||||
}
|
||||
|
||||
void IVariableStateInternal::SetState(const Blob::Ptr& newState) {
|
||||
void IVariableStateInternal::SetState(const Blob::Ptr& newState) {
|
||||
state = newState;
|
||||
}
|
||||
|
||||
|
@ -91,8 +91,16 @@ static inline void mm_store_interleave(float* ptr, __m128 a, __m128 b, __m128 c)
|
||||
//
|
||||
//------------------------------------------------------------------------
|
||||
|
||||
void blob_copy_4d_split_u8c3(const uint8_t* src_ptr, uint8_t* dst_ptr, size_t N_src_stride, size_t H_src_stride,
|
||||
size_t N_dst_stride, size_t H_dst_stride, size_t C_dst_stride, int N, int H, int W) {
|
||||
void blob_copy_4d_split_u8c3(const uint8_t* src_ptr,
|
||||
uint8_t* dst_ptr,
|
||||
size_t N_src_stride,
|
||||
size_t H_src_stride,
|
||||
size_t N_dst_stride,
|
||||
size_t H_dst_stride,
|
||||
size_t C_dst_stride,
|
||||
int N,
|
||||
int H,
|
||||
int W) {
|
||||
for (int n = 0; n < N; n++)
|
||||
for (int h = 0; h < H; h++) {
|
||||
const uint8_t* src = src_ptr + n * N_src_stride + h * H_src_stride;
|
||||
@ -119,8 +127,16 @@ void blob_copy_4d_split_u8c3(const uint8_t* src_ptr, uint8_t* dst_ptr, size_t N_
|
||||
}
|
||||
}
|
||||
|
||||
void blob_copy_4d_split_f32c3(const float* src_ptr, float* dst_ptr, size_t N_src_stride, size_t H_src_stride,
|
||||
size_t N_dst_stride, size_t H_dst_stride, size_t C_dst_stride, int N, int H, int W) {
|
||||
void blob_copy_4d_split_f32c3(const float* src_ptr,
|
||||
float* dst_ptr,
|
||||
size_t N_src_stride,
|
||||
size_t H_src_stride,
|
||||
size_t N_dst_stride,
|
||||
size_t H_dst_stride,
|
||||
size_t C_dst_stride,
|
||||
int N,
|
||||
int H,
|
||||
int W) {
|
||||
for (int n = 0; n < N; n++)
|
||||
for (int h = 0; h < H; h++) {
|
||||
const float* src = src_ptr + n * N_src_stride + h * H_src_stride;
|
||||
@ -147,8 +163,16 @@ void blob_copy_4d_split_f32c3(const float* src_ptr, float* dst_ptr, size_t N_src
|
||||
}
|
||||
}
|
||||
|
||||
void blob_copy_4d_merge_u8c3(const uint8_t* src_ptr, uint8_t* dst_ptr, size_t N_src_stride, size_t H_src_stride,
|
||||
size_t C_src_stride, size_t N_dst_stride, size_t H_dst_stride, int N, int H, int W) {
|
||||
void blob_copy_4d_merge_u8c3(const uint8_t* src_ptr,
|
||||
uint8_t* dst_ptr,
|
||||
size_t N_src_stride,
|
||||
size_t H_src_stride,
|
||||
size_t C_src_stride,
|
||||
size_t N_dst_stride,
|
||||
size_t H_dst_stride,
|
||||
int N,
|
||||
int H,
|
||||
int W) {
|
||||
for (int n = 0; n < N; n++)
|
||||
for (int h = 0; h < H; h++) {
|
||||
const uint8_t* src0 = src_ptr + n * N_src_stride + 0 * C_src_stride + h * H_src_stride;
|
||||
@ -176,8 +200,16 @@ void blob_copy_4d_merge_u8c3(const uint8_t* src_ptr, uint8_t* dst_ptr, size_t N_
|
||||
}
|
||||
}
|
||||
|
||||
void blob_copy_4d_merge_f32c3(const float* src_ptr, float* dst_ptr, size_t N_src_stride, size_t H_src_stride,
|
||||
size_t C_src_stride, size_t N_dst_stride, size_t H_dst_stride, int N, int H, int W) {
|
||||
void blob_copy_4d_merge_f32c3(const float* src_ptr,
|
||||
float* dst_ptr,
|
||||
size_t N_src_stride,
|
||||
size_t H_src_stride,
|
||||
size_t C_src_stride,
|
||||
size_t N_dst_stride,
|
||||
size_t H_dst_stride,
|
||||
int N,
|
||||
int H,
|
||||
int W) {
|
||||
for (int n = 0; n < N; n++)
|
||||
for (int h = 0; h < H; h++) {
|
||||
const float* src0 = src_ptr + n * N_src_stride + 0 * C_src_stride + h * H_src_stride;
|
||||
@ -205,9 +237,19 @@ void blob_copy_4d_merge_f32c3(const float* src_ptr, float* dst_ptr, size_t N_src
|
||||
}
|
||||
}
|
||||
|
||||
void blob_copy_5d_split_u8c3(const uint8_t* src_ptr, uint8_t* dst_ptr, size_t N_src_stride, size_t D_src_stride,
|
||||
size_t H_src_stride, size_t N_dst_stride, size_t D_dst_stride, size_t H_dst_stride,
|
||||
size_t C_dst_stride, int N, int D, int H, int W) {
|
||||
void blob_copy_5d_split_u8c3(const uint8_t* src_ptr,
|
||||
uint8_t* dst_ptr,
|
||||
size_t N_src_stride,
|
||||
size_t D_src_stride,
|
||||
size_t H_src_stride,
|
||||
size_t N_dst_stride,
|
||||
size_t D_dst_stride,
|
||||
size_t H_dst_stride,
|
||||
size_t C_dst_stride,
|
||||
int N,
|
||||
int D,
|
||||
int H,
|
||||
int W) {
|
||||
for (int n = 0; n < N; n++)
|
||||
for (int d = 0; d < D; d++) {
|
||||
for (int h = 0; h < H; h++) {
|
||||
@ -236,9 +278,19 @@ void blob_copy_5d_split_u8c3(const uint8_t* src_ptr, uint8_t* dst_ptr, size_t N_
|
||||
}
|
||||
}
|
||||
|
||||
void blob_copy_5d_split_f32c3(const float* src_ptr, float* dst_ptr, size_t N_src_stride, size_t D_src_stride,
|
||||
size_t H_src_stride, size_t N_dst_stride, size_t D_dst_stride, size_t H_dst_stride,
|
||||
size_t C_dst_stride, int N, int D, int H, int W) {
|
||||
void blob_copy_5d_split_f32c3(const float* src_ptr,
|
||||
float* dst_ptr,
|
||||
size_t N_src_stride,
|
||||
size_t D_src_stride,
|
||||
size_t H_src_stride,
|
||||
size_t N_dst_stride,
|
||||
size_t D_dst_stride,
|
||||
size_t H_dst_stride,
|
||||
size_t C_dst_stride,
|
||||
int N,
|
||||
int D,
|
||||
int H,
|
||||
int W) {
|
||||
for (int n = 0; n < N; n++)
|
||||
for (int d = 0; d < D; d++) {
|
||||
for (int h = 0; h < H; h++) {
|
||||
@ -267,9 +319,19 @@ void blob_copy_5d_split_f32c3(const float* src_ptr, float* dst_ptr, size_t N_src
|
||||
}
|
||||
}
|
||||
|
||||
void blob_copy_5d_merge_u8c3(const uint8_t* src_ptr, uint8_t* dst_ptr, size_t N_src_stride, size_t D_src_stride,
|
||||
size_t H_src_stride, size_t C_src_stride, size_t N_dst_stride, size_t D_dst_stride,
|
||||
size_t H_dst_stride, int N, int D, int H, int W) {
|
||||
void blob_copy_5d_merge_u8c3(const uint8_t* src_ptr,
|
||||
uint8_t* dst_ptr,
|
||||
size_t N_src_stride,
|
||||
size_t D_src_stride,
|
||||
size_t H_src_stride,
|
||||
size_t C_src_stride,
|
||||
size_t N_dst_stride,
|
||||
size_t D_dst_stride,
|
||||
size_t H_dst_stride,
|
||||
int N,
|
||||
int D,
|
||||
int H,
|
||||
int W) {
|
||||
for (int n = 0; n < N; n++)
|
||||
for (int d = 0; d < D; d++) {
|
||||
for (int h = 0; h < H; h++) {
|
||||
@ -302,9 +364,19 @@ void blob_copy_5d_merge_u8c3(const uint8_t* src_ptr, uint8_t* dst_ptr, size_t N_
|
||||
}
|
||||
}
|
||||
|
||||
void blob_copy_5d_merge_f32c3(const float* src_ptr, float* dst_ptr, size_t N_src_stride, size_t D_src_stride,
|
||||
size_t H_src_stride, size_t C_src_stride, size_t N_dst_stride, size_t D_dst_stride,
|
||||
size_t H_dst_stride, int N, int D, int H, int W) {
|
||||
void blob_copy_5d_merge_f32c3(const float* src_ptr,
|
||||
float* dst_ptr,
|
||||
size_t N_src_stride,
|
||||
size_t D_src_stride,
|
||||
size_t H_src_stride,
|
||||
size_t C_src_stride,
|
||||
size_t N_dst_stride,
|
||||
size_t D_dst_stride,
|
||||
size_t H_dst_stride,
|
||||
int N,
|
||||
int D,
|
||||
int H,
|
||||
int W) {
|
||||
for (int n = 0; n < N; n++)
|
||||
for (int d = 0; d < D; d++) {
|
||||
for (int h = 0; h < H; h++) {
|
||||
|
@ -15,32 +15,104 @@ namespace InferenceEngine {
|
||||
//
|
||||
//------------------------------------------------------------------------
|
||||
|
||||
void blob_copy_4d_split_u8c3(const uint8_t* src_ptr, uint8_t* dst_ptr, size_t N_src_stride, size_t H_src_stride,
|
||||
size_t N_dst_stride, size_t H_dst_stride, size_t C_dst_stride, int N, int H, int W);
|
||||
void blob_copy_4d_split_u8c3(const uint8_t* src_ptr,
|
||||
uint8_t* dst_ptr,
|
||||
size_t N_src_stride,
|
||||
size_t H_src_stride,
|
||||
size_t N_dst_stride,
|
||||
size_t H_dst_stride,
|
||||
size_t C_dst_stride,
|
||||
int N,
|
||||
int H,
|
||||
int W);
|
||||
|
||||
void blob_copy_4d_split_f32c3(const float* src_ptr, float* dst_ptr, size_t N_src_stride, size_t H_src_stride,
|
||||
size_t N_dst_stride, size_t H_dst_stride, size_t C_dst_stride, int N, int H, int W);
|
||||
void blob_copy_4d_split_f32c3(const float* src_ptr,
|
||||
float* dst_ptr,
|
||||
size_t N_src_stride,
|
||||
size_t H_src_stride,
|
||||
size_t N_dst_stride,
|
||||
size_t H_dst_stride,
|
||||
size_t C_dst_stride,
|
||||
int N,
|
||||
int H,
|
||||
int W);
|
||||
|
||||
void blob_copy_4d_merge_u8c3(const uint8_t* src_ptr, uint8_t* dst_ptr, size_t N_src_stride, size_t H_src_stride,
|
||||
size_t C_src_stride, size_t N_dst_stride, size_t H_dst_stride, int N, int H, int W);
|
||||
void blob_copy_4d_merge_u8c3(const uint8_t* src_ptr,
|
||||
uint8_t* dst_ptr,
|
||||
size_t N_src_stride,
|
||||
size_t H_src_stride,
|
||||
size_t C_src_stride,
|
||||
size_t N_dst_stride,
|
||||
size_t H_dst_stride,
|
||||
int N,
|
||||
int H,
|
||||
int W);
|
||||
|
||||
void blob_copy_4d_merge_f32c3(const float* src_ptr, float* dst_ptr, size_t N_src_stride, size_t H_src_stride,
|
||||
size_t C_src_stride, size_t N_dst_stride, size_t H_dst_stride, int N, int H, int W);
|
||||
void blob_copy_4d_merge_f32c3(const float* src_ptr,
|
||||
float* dst_ptr,
|
||||
size_t N_src_stride,
|
||||
size_t H_src_stride,
|
||||
size_t C_src_stride,
|
||||
size_t N_dst_stride,
|
||||
size_t H_dst_stride,
|
||||
int N,
|
||||
int H,
|
||||
int W);
|
||||
|
||||
void blob_copy_5d_split_u8c3(const uint8_t* src_ptr, uint8_t* dst_ptr, size_t N_src_stride, size_t D_src_stride,
|
||||
size_t H_src_stride, size_t N_dst_stride, size_t D_dst_stride, size_t H_dst_stride,
|
||||
size_t C_dst_stride, int N, int D, int H, int W);
|
||||
void blob_copy_5d_split_u8c3(const uint8_t* src_ptr,
|
||||
uint8_t* dst_ptr,
|
||||
size_t N_src_stride,
|
||||
size_t D_src_stride,
|
||||
size_t H_src_stride,
|
||||
size_t N_dst_stride,
|
||||
size_t D_dst_stride,
|
||||
size_t H_dst_stride,
|
||||
size_t C_dst_stride,
|
||||
int N,
|
||||
int D,
|
||||
int H,
|
||||
int W);
|
||||
|
||||
void blob_copy_5d_split_f32c3(const float* src_ptr, float* dst_ptr, size_t N_src_stride, size_t D_src_stride,
|
||||
size_t H_src_stride, size_t N_dst_stride, size_t D_dst_stride, size_t H_dst_stride,
|
||||
size_t C_dst_stride, int N, int D, int H, int W);
|
||||
void blob_copy_5d_split_f32c3(const float* src_ptr,
|
||||
float* dst_ptr,
|
||||
size_t N_src_stride,
|
||||
size_t D_src_stride,
|
||||
size_t H_src_stride,
|
||||
size_t N_dst_stride,
|
||||
size_t D_dst_stride,
|
||||
size_t H_dst_stride,
|
||||
size_t C_dst_stride,
|
||||
int N,
|
||||
int D,
|
||||
int H,
|
||||
int W);
|
||||
|
||||
void blob_copy_5d_merge_u8c3(const uint8_t* src_ptr, uint8_t* dst_ptr, size_t N_src_stride, size_t D_src_stride,
|
||||
size_t H_src_stride, size_t C_src_stride, size_t N_dst_stride, size_t D_dst_stride,
|
||||
size_t H_dst_stride, int N, int D, int H, int W);
|
||||
void blob_copy_5d_merge_u8c3(const uint8_t* src_ptr,
|
||||
uint8_t* dst_ptr,
|
||||
size_t N_src_stride,
|
||||
size_t D_src_stride,
|
||||
size_t H_src_stride,
|
||||
size_t C_src_stride,
|
||||
size_t N_dst_stride,
|
||||
size_t D_dst_stride,
|
||||
size_t H_dst_stride,
|
||||
int N,
|
||||
int D,
|
||||
int H,
|
||||
int W);
|
||||
|
||||
void blob_copy_5d_merge_f32c3(const float* src_ptr, float* dst_ptr, size_t N_src_stride, size_t D_src_stride,
|
||||
size_t H_src_stride, size_t C_src_stride, size_t N_dst_stride, size_t D_dst_stride,
|
||||
size_t H_dst_stride, int N, int D, int H, int W);
|
||||
void blob_copy_5d_merge_f32c3(const float* src_ptr,
|
||||
float* dst_ptr,
|
||||
size_t N_src_stride,
|
||||
size_t D_src_stride,
|
||||
size_t H_src_stride,
|
||||
size_t C_src_stride,
|
||||
size_t N_dst_stride,
|
||||
size_t D_dst_stride,
|
||||
size_t H_dst_stride,
|
||||
int N,
|
||||
int D,
|
||||
int H,
|
||||
int W);
|
||||
|
||||
} // namespace InferenceEngine
|
||||
|
@ -2,12 +2,12 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "ie_blob.h"
|
||||
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "ie_blob.h"
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
Blob::Ptr Blob::createROI(const ROI&) const {
|
||||
|
@ -3,13 +3,19 @@
|
||||
//
|
||||
|
||||
#include "ie_cache_guard.hpp"
|
||||
|
||||
#include "ie_common.h"
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
CacheGuardEntry::CacheGuardEntry(CacheGuard& cacheGuard, const std::string& hash,
|
||||
std::shared_ptr<std::mutex> m, std::atomic_int& refCount):
|
||||
m_cacheGuard(cacheGuard), m_hash(hash), m_mutex(m), m_refCount(refCount) {
|
||||
CacheGuardEntry::CacheGuardEntry(CacheGuard& cacheGuard,
|
||||
const std::string& hash,
|
||||
std::shared_ptr<std::mutex> m,
|
||||
std::atomic_int& refCount)
|
||||
: m_cacheGuard(cacheGuard),
|
||||
m_hash(hash),
|
||||
m_mutex(m),
|
||||
m_refCount(refCount) {
|
||||
// Don't lock mutex right here for exception-safe considerations
|
||||
m_refCount++;
|
||||
}
|
||||
@ -36,8 +42,8 @@ std::unique_ptr<CacheGuardEntry> CacheGuard::getHashLock(const std::string& hash
|
||||
std::unique_ptr<CacheGuardEntry> res;
|
||||
try {
|
||||
// TODO: use std::make_unique when migrated to C++14
|
||||
res = std::unique_ptr<CacheGuardEntry>(
|
||||
new CacheGuardEntry(*this, hash, data.m_mutexPtr, data.m_itemRefCounter));
|
||||
res =
|
||||
std::unique_ptr<CacheGuardEntry>(new CacheGuardEntry(*this, hash, data.m_mutexPtr, data.m_itemRefCounter));
|
||||
} catch (...) {
|
||||
// In case of exception, we shall remove hash entry if it is not used
|
||||
if (data.m_itemRefCounter == 0) {
|
||||
@ -45,15 +51,15 @@ std::unique_ptr<CacheGuardEntry> CacheGuard::getHashLock(const std::string& hash
|
||||
}
|
||||
throw;
|
||||
}
|
||||
lock.unlock(); // can unlock table lock here, as refCounter is positive and nobody can remove entry
|
||||
res->performLock(); // in case of exception, 'res' will be destroyed and item will be cleaned up from table
|
||||
lock.unlock(); // can unlock table lock here, as refCounter is positive and nobody can remove entry
|
||||
res->performLock(); // in case of exception, 'res' will be destroyed and item will be cleaned up from table
|
||||
return res;
|
||||
}
|
||||
|
||||
void CacheGuard::checkForRemove(const std::string& hash) {
|
||||
std::lock_guard<std::mutex> lock(m_tableMutex);
|
||||
if (m_table.count(hash)) {
|
||||
auto &data = m_table[hash];
|
||||
auto& data = m_table[hash];
|
||||
if (data.m_itemRefCounter == 0) {
|
||||
// Nobody is using this and nobody is waiting for it - can be removed
|
||||
m_table.erase(hash);
|
||||
|
@ -10,11 +10,11 @@
|
||||
* @file ie_cache_guard.hpp
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
#include <memory>
|
||||
#include <atomic>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace InferenceEngine {
|
||||
@ -36,8 +36,10 @@ public:
|
||||
* @param m Shared pointer to mutex for internal locking
|
||||
* @param refCount Reference counter. Will be decremented on CacheGuardEntry destruction
|
||||
*/
|
||||
CacheGuardEntry(CacheGuard& cacheGuard, const std::string& hash,
|
||||
std::shared_ptr<std::mutex> m, std::atomic_int& refCount);
|
||||
CacheGuardEntry(CacheGuard& cacheGuard,
|
||||
const std::string& hash,
|
||||
std::shared_ptr<std::mutex> m,
|
||||
std::atomic_int& refCount);
|
||||
CacheGuardEntry(const CacheGuardEntry&) = delete;
|
||||
CacheGuardEntry& operator=(const CacheGuardEntry&) = delete;
|
||||
|
||||
@ -106,16 +108,14 @@ public:
|
||||
|
||||
private:
|
||||
struct Item {
|
||||
std::shared_ptr<std::mutex> m_mutexPtr { std::make_shared<std::mutex>() };
|
||||
std::shared_ptr<std::mutex> m_mutexPtr{std::make_shared<std::mutex>()};
|
||||
// Reference counter for item usage
|
||||
std::atomic_int m_itemRefCounter {0};
|
||||
std::atomic_int m_itemRefCounter{0};
|
||||
|
||||
Item() = default;
|
||||
Item(const Item& other): m_mutexPtr(other.m_mutexPtr),
|
||||
m_itemRefCounter(other.m_itemRefCounter.load()) {}
|
||||
Item(const Item& other) : m_mutexPtr(other.m_mutexPtr), m_itemRefCounter(other.m_itemRefCounter.load()) {}
|
||||
Item& operator=(const Item& other) = delete;
|
||||
Item(Item&& other): m_mutexPtr(std::move(other.m_mutexPtr)),
|
||||
m_itemRefCounter(other.m_itemRefCounter.load()) {}
|
||||
Item(Item&& other) : m_mutexPtr(std::move(other.m_mutexPtr)), m_itemRefCounter(other.m_itemRefCounter.load()) {}
|
||||
Item& operator=(Item&& other) = delete;
|
||||
};
|
||||
std::mutex m_tableMutex;
|
||||
|
@ -9,12 +9,13 @@
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <functional>
|
||||
#include "ie_api.h"
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "file_utils.h"
|
||||
#include "ie_api.h"
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
|
@ -2,21 +2,21 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <tuple>
|
||||
#include <memory>
|
||||
#include <map>
|
||||
#include "ie_common.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include <ie_common.h>
|
||||
#include <ie_blob.h>
|
||||
#include <ie_parameter.hpp>
|
||||
#include <ie_iextension.h>
|
||||
#include <ie_extension.h>
|
||||
#include <exec_graph_info.hpp>
|
||||
|
||||
#include <ngraph/opsets/opset.hpp>
|
||||
#include "exec_graph_info.hpp"
|
||||
#include "ie_blob.h"
|
||||
#include "ie_extension.h"
|
||||
#include "ie_iextension.h"
|
||||
#include "ie_parameter.hpp"
|
||||
#include "ngraph/opsets/opset.hpp"
|
||||
|
||||
namespace ExecGraphInfoSerialization {
|
||||
//
|
||||
@ -57,21 +57,37 @@ namespace details {
|
||||
void Rethrow() {
|
||||
try {
|
||||
throw;
|
||||
} catch (const GeneralError& e) {throw e;}
|
||||
catch (const NotImplemented& e) {throw e;}
|
||||
catch (const NetworkNotLoaded& e) {throw e;}
|
||||
catch (const ParameterMismatch& e) {throw e;}
|
||||
catch (const NotFound& e) {throw e;}
|
||||
catch (const OutOfBounds& e) {throw e;}
|
||||
catch (const Unexpected& e) {throw e;}
|
||||
catch (const RequestBusy& e) {throw e;}
|
||||
catch (const ResultNotReady& e) {throw e;}
|
||||
catch (const NotAllocated& e) {throw e;}
|
||||
catch (const InferNotStarted& e) {throw e;}
|
||||
catch (const NetworkNotRead& e) {throw e;}
|
||||
catch (const InferCancelled& e) {throw e;}
|
||||
catch (const std::exception& e) {IE_THROW() << e.what();}
|
||||
catch(...) {IE_THROW(Unexpected);}
|
||||
} catch (const GeneralError& e) {
|
||||
throw e;
|
||||
} catch (const NotImplemented& e) {
|
||||
throw e;
|
||||
} catch (const NetworkNotLoaded& e) {
|
||||
throw e;
|
||||
} catch (const ParameterMismatch& e) {
|
||||
throw e;
|
||||
} catch (const NotFound& e) {
|
||||
throw e;
|
||||
} catch (const OutOfBounds& e) {
|
||||
throw e;
|
||||
} catch (const Unexpected& e) {
|
||||
throw e;
|
||||
} catch (const RequestBusy& e) {
|
||||
throw e;
|
||||
} catch (const ResultNotReady& e) {
|
||||
throw e;
|
||||
} catch (const NotAllocated& e) {
|
||||
throw e;
|
||||
} catch (const InferNotStarted& e) {
|
||||
throw e;
|
||||
} catch (const NetworkNotRead& e) {
|
||||
throw e;
|
||||
} catch (const InferCancelled& e) {
|
||||
throw e;
|
||||
} catch (const std::exception& e) {
|
||||
IE_THROW() << e.what();
|
||||
} catch (...) {
|
||||
IE_THROW(Unexpected);
|
||||
}
|
||||
}
|
||||
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
@ -104,7 +120,8 @@ StatusCode InferenceEngineException::getStatus() const {
|
||||
} else if (dynamic_cast<const InferCancelled*>(this) != nullptr) {
|
||||
return INFER_CANCELLED;
|
||||
} else {
|
||||
assert(!"Unreachable"); return OK;
|
||||
assert(!"Unreachable");
|
||||
return OK;
|
||||
}
|
||||
}
|
||||
} // namespace details
|
||||
|
@ -36,7 +36,7 @@ TensorDesc verifyNV12BlobInput(const Blob::Ptr& y, const Blob::Ptr& uv) {
|
||||
// check Blob element size
|
||||
if (yMemoryBlob->element_size() != uvMemoryBlob->element_size()) {
|
||||
IE_THROW() << "Y and UV planes have different element sizes: " << yMemoryBlob->element_size()
|
||||
<< " != " << uvMemoryBlob->element_size();
|
||||
<< " != " << uvMemoryBlob->element_size();
|
||||
}
|
||||
|
||||
// check tensor descriptor parameters
|
||||
@ -64,7 +64,7 @@ TensorDesc verifyNV12BlobInput(const Blob::Ptr& y, const Blob::Ptr& uv) {
|
||||
const auto& uvDims = uvDesc.getDims();
|
||||
if (yDims.size() != 4 || uvDims.size() != 4) {
|
||||
IE_THROW() << "Y and UV planes dimension sizes must be 4, actual: " << yDims.size() << "(Y plane) and "
|
||||
<< uvDims.size() << "(UV plane)";
|
||||
<< uvDims.size() << "(UV plane)";
|
||||
}
|
||||
|
||||
// check batch size
|
||||
@ -83,13 +83,13 @@ TensorDesc verifyNV12BlobInput(const Blob::Ptr& y, const Blob::Ptr& uv) {
|
||||
// check height
|
||||
if (yDims[2] != 2 * uvDims[2]) {
|
||||
IE_THROW() << "The height of the Y plane must be equal to (2 * the height of the UV plane), actual: "
|
||||
<< yDims[2] << "(Y plane) and " << uvDims[2] << "(UV plane)";
|
||||
<< yDims[2] << "(Y plane) and " << uvDims[2] << "(UV plane)";
|
||||
}
|
||||
|
||||
// check width
|
||||
if (yDims[3] != 2 * uvDims[3]) {
|
||||
IE_THROW() << "The width of the Y plane must be equal to (2 * the width of the UV plane), actual: "
|
||||
<< yDims[3] << "(Y plane) and " << uvDims[3] << "(UV plane)";
|
||||
IE_THROW() << "The width of the Y plane must be equal to (2 * the width of the UV plane), actual: " << yDims[3]
|
||||
<< "(Y plane) and " << uvDims[3] << "(UV plane)";
|
||||
}
|
||||
|
||||
return {Precision::U8, {}, Layout::NCHW};
|
||||
@ -112,10 +112,10 @@ TensorDesc verifyI420BlobInput(const Blob::Ptr& y, const Blob::Ptr& u, const Blo
|
||||
auto uMemoryBlob = u->as<MemoryBlob>();
|
||||
auto vMemoryBlob = v->as<MemoryBlob>();
|
||||
// check Blob element size
|
||||
if (yMemoryBlob->element_size() != uMemoryBlob->element_size() || yMemoryBlob->element_size() != vMemoryBlob->element_size()) {
|
||||
if (yMemoryBlob->element_size() != uMemoryBlob->element_size() ||
|
||||
yMemoryBlob->element_size() != vMemoryBlob->element_size()) {
|
||||
IE_THROW() << "Y and UV planes have different element sizes: " << yMemoryBlob->element_size()
|
||||
<< " != " << uMemoryBlob->element_size()
|
||||
<< " != " << vMemoryBlob->element_size();
|
||||
<< " != " << uMemoryBlob->element_size() << " != " << vMemoryBlob->element_size();
|
||||
}
|
||||
|
||||
// check tensor descriptor parameters
|
||||
@ -152,8 +152,7 @@ TensorDesc verifyI420BlobInput(const Blob::Ptr& y, const Blob::Ptr& u, const Blo
|
||||
|
||||
if (yDims.size() != 4 || uDims.size() != 4 || vDims.size() != 4) {
|
||||
IE_THROW() << "Y,U and V planes dimension sizes must be 4, actual: " << yDims.size() << "(Y plane) and "
|
||||
<< uDims.size() << "(U plane) "
|
||||
<< vDims.size() << "(V plane)";
|
||||
<< uDims.size() << "(U plane) " << vDims.size() << "(V plane)";
|
||||
}
|
||||
|
||||
// check batch size
|
||||
@ -174,23 +173,23 @@ TensorDesc verifyI420BlobInput(const Blob::Ptr& y, const Blob::Ptr& u, const Blo
|
||||
|
||||
// check height
|
||||
if (yDims[2] != 2 * uDims[2]) {
|
||||
IE_THROW() << "The height of the Y plane must be equal to (2 * the height of the U plane), actual: "
|
||||
<< yDims[2] << "(Y plane) and " << uDims[2] << "(U plane)";
|
||||
IE_THROW() << "The height of the Y plane must be equal to (2 * the height of the U plane), actual: " << yDims[2]
|
||||
<< "(Y plane) and " << uDims[2] << "(U plane)";
|
||||
}
|
||||
|
||||
if (yDims[2] != 2 * vDims[2]) {
|
||||
IE_THROW() << "The height of the Y plane must be equal to (2 * the height of the UV plane), actual: "
|
||||
<< yDims[2] << "(Y plane) and " << vDims[2] << "(V plane)";
|
||||
<< yDims[2] << "(Y plane) and " << vDims[2] << "(V plane)";
|
||||
}
|
||||
|
||||
// check width
|
||||
if (yDims[3] != 2 * uDims[3]) {
|
||||
IE_THROW() << "The width of the Y plane must be equal to (2 * the width of the UV plane), actual: "
|
||||
<< yDims[3] << "(Y plane) and " << uDims[3] << "(U plane)";
|
||||
IE_THROW() << "The width of the Y plane must be equal to (2 * the width of the UV plane), actual: " << yDims[3]
|
||||
<< "(Y plane) and " << uDims[3] << "(U plane)";
|
||||
}
|
||||
if (yDims[3] != 2 * vDims[3]) {
|
||||
IE_THROW() << "The width of the Y plane must be equal to (2 * the width of the UV plane), actual: "
|
||||
<< yDims[3] << "(Y plane) and " << vDims[3] << "(V plane)";
|
||||
IE_THROW() << "The width of the Y plane must be equal to (2 * the width of the UV plane), actual: " << yDims[3]
|
||||
<< "(Y plane) and " << vDims[3] << "(V plane)";
|
||||
}
|
||||
|
||||
return {Precision::U8, {}, Layout::NCHW};
|
||||
@ -215,7 +214,8 @@ TensorDesc getBlobTensorDesc(const Blob::Ptr& blob) {
|
||||
TensorDesc verifyBatchedBlobInput(const std::vector<Blob::Ptr>& blobs) {
|
||||
// verify invariants
|
||||
if (blobs.empty()) {
|
||||
IE_THROW() << "BatchedBlob cannot be created from empty vector of Blob, Please, make sure vector contains at least one Blob";
|
||||
IE_THROW() << "BatchedBlob cannot be created from empty vector of Blob, Please, make sure vector contains at "
|
||||
"least one Blob";
|
||||
}
|
||||
|
||||
// Cannot create a compound blob from nullptr Blob objects
|
||||
@ -227,10 +227,9 @@ TensorDesc verifyBatchedBlobInput(const std::vector<Blob::Ptr>& blobs) {
|
||||
|
||||
const auto subBlobDesc = getBlobTensorDesc(blobs[0]);
|
||||
|
||||
if (std::any_of(blobs.begin(), blobs.end(),
|
||||
[&subBlobDesc](const Blob::Ptr& blob) {
|
||||
return getBlobTensorDesc(blob) != subBlobDesc;
|
||||
})) {
|
||||
if (std::any_of(blobs.begin(), blobs.end(), [&subBlobDesc](const Blob::Ptr& blob) {
|
||||
return getBlobTensorDesc(blob) != subBlobDesc;
|
||||
})) {
|
||||
IE_THROW() << "All blobs tensors should be equal";
|
||||
}
|
||||
|
||||
@ -272,9 +271,9 @@ TensorDesc verifyBatchedBlobInput(const std::vector<Blob::Ptr>& blobs) {
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
CompoundBlob::CompoundBlob(const TensorDesc& tensorDesc): Blob(tensorDesc) {}
|
||||
CompoundBlob::CompoundBlob(const TensorDesc& tensorDesc) : Blob(tensorDesc) {}
|
||||
|
||||
CompoundBlob::CompoundBlob(const std::vector<Blob::Ptr>& blobs): CompoundBlob(TensorDesc{}) {
|
||||
CompoundBlob::CompoundBlob(const std::vector<Blob::Ptr>& blobs) : CompoundBlob(TensorDesc{}) {
|
||||
// Cannot create a compound blob from nullptr Blob objects
|
||||
if (std::any_of(blobs.begin(), blobs.end(), [](const Blob::Ptr& blob) {
|
||||
return blob == nullptr;
|
||||
@ -293,7 +292,7 @@ CompoundBlob::CompoundBlob(const std::vector<Blob::Ptr>& blobs): CompoundBlob(Te
|
||||
this->_blobs = blobs;
|
||||
}
|
||||
|
||||
CompoundBlob::CompoundBlob(std::vector<Blob::Ptr>&& blobs): CompoundBlob(TensorDesc{}) {
|
||||
CompoundBlob::CompoundBlob(std::vector<Blob::Ptr>&& blobs) : CompoundBlob(TensorDesc{}) {
|
||||
// Cannot create a compound blob from nullptr Blob objects
|
||||
if (std::any_of(blobs.begin(), blobs.end(), [](const Blob::Ptr& blob) {
|
||||
return blob == nullptr;
|
||||
@ -361,13 +360,11 @@ const std::shared_ptr<IAllocator>& CompoundBlob::getAllocator() const noexcept {
|
||||
return _allocator;
|
||||
};
|
||||
|
||||
NV12Blob::NV12Blob(const Blob::Ptr& y, const Blob::Ptr& uv)
|
||||
: CompoundBlob(verifyNV12BlobInput(y, uv)) {
|
||||
NV12Blob::NV12Blob(const Blob::Ptr& y, const Blob::Ptr& uv) : CompoundBlob(verifyNV12BlobInput(y, uv)) {
|
||||
this->_blobs = {y, uv};
|
||||
}
|
||||
|
||||
NV12Blob::NV12Blob(Blob::Ptr&& y, Blob::Ptr&& uv)
|
||||
: CompoundBlob(verifyNV12BlobInput(y, uv)) {
|
||||
NV12Blob::NV12Blob(Blob::Ptr&& y, Blob::Ptr&& uv) : CompoundBlob(verifyNV12BlobInput(y, uv)) {
|
||||
this->_blobs = {std::move(y), std::move(uv)};
|
||||
}
|
||||
|
||||
@ -409,8 +406,7 @@ I420Blob::I420Blob(const Blob::Ptr& y, const Blob::Ptr& u, const Blob::Ptr& v)
|
||||
this->_blobs = {y, u, v};
|
||||
}
|
||||
|
||||
I420Blob::I420Blob(Blob::Ptr&& y, Blob::Ptr&& u, Blob::Ptr&& v)
|
||||
: CompoundBlob(verifyI420BlobInput(y, u, v)) {
|
||||
I420Blob::I420Blob(Blob::Ptr&& y, Blob::Ptr&& u, Blob::Ptr&& v) : CompoundBlob(verifyI420BlobInput(y, u, v)) {
|
||||
this->_blobs = {std::move(y), std::move(u), std::move(v)};
|
||||
}
|
||||
|
||||
@ -458,13 +454,11 @@ Blob::Ptr I420Blob::createROI(const ROI& roi) const {
|
||||
return std::make_shared<I420Blob>(yRoiBlob, uRoiBlob, vRoiBlob);
|
||||
}
|
||||
|
||||
BatchedBlob::BatchedBlob(const std::vector<Blob::Ptr>& blobs)
|
||||
: CompoundBlob(verifyBatchedBlobInput(blobs)) {
|
||||
BatchedBlob::BatchedBlob(const std::vector<Blob::Ptr>& blobs) : CompoundBlob(verifyBatchedBlobInput(blobs)) {
|
||||
this->_blobs = blobs;
|
||||
}
|
||||
|
||||
BatchedBlob::BatchedBlob(std::vector<Blob::Ptr>&& blobs)
|
||||
: CompoundBlob(verifyBatchedBlobInput(blobs)) {
|
||||
BatchedBlob::BatchedBlob(std::vector<Blob::Ptr>&& blobs) : CompoundBlob(verifyBatchedBlobInput(blobs)) {
|
||||
this->_blobs = std::move(blobs);
|
||||
}
|
||||
|
||||
|
@ -2,32 +2,33 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <mutex>
|
||||
#include "ie_core.hpp"
|
||||
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include <ie_core.hpp>
|
||||
#include <openvino/runtime/core.hpp>
|
||||
#include <ie_icore.hpp>
|
||||
#include <ngraph/opsets/opset.hpp>
|
||||
#include <ngraph/ngraph.hpp>
|
||||
#include <ngraph/graph_util.hpp>
|
||||
#include <ngraph/pass/constant_folding.hpp>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "compilation_context.hpp"
|
||||
#include "cpp/ie_plugin.hpp"
|
||||
#include "ie_plugin_config.hpp"
|
||||
#include "ie_cache_manager.hpp"
|
||||
#include "ie_cache_guard.hpp"
|
||||
#include "ie_itt.hpp"
|
||||
#include "file_utils.h"
|
||||
#include "ie_network_reader.hpp"
|
||||
#include "xml_parse_utils.h"
|
||||
#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
|
||||
#include "cpp_interfaces/interface/ie_iexecutable_network_internal.hpp"
|
||||
#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
|
||||
#include "file_utils.h"
|
||||
#include "ie_cache_guard.hpp"
|
||||
#include "ie_cache_manager.hpp"
|
||||
#include "ie_icore.hpp"
|
||||
#include "ie_itt.hpp"
|
||||
#include "ie_network_reader.hpp"
|
||||
#include "ie_plugin_config.hpp"
|
||||
#include "ngraph/graph_util.hpp"
|
||||
#include "ngraph/ngraph.hpp"
|
||||
#include "ngraph/opsets/opset.hpp"
|
||||
#include "ngraph/pass/constant_folding.hpp"
|
||||
#include "openvino/runtime/core.hpp"
|
||||
#include "xml_parse_utils.h"
|
||||
|
||||
using namespace InferenceEngine::PluginConfigParams;
|
||||
using namespace std::placeholders;
|
||||
@ -44,8 +45,8 @@ std::string parseXmlConfig(const std::string& xmlFile) {
|
||||
std::string xmlConfigFile_ = xmlFile;
|
||||
if (xmlConfigFile_.empty()) {
|
||||
// register plugins from default plugins.xml config
|
||||
FileUtils::FilePath xmlConfigFileDefault = FileUtils::makePath(InferenceEngine::getInferenceEngineLibraryPath(),
|
||||
FileUtils::toFilePath("plugins.xml"));
|
||||
FileUtils::FilePath xmlConfigFileDefault =
|
||||
FileUtils::makePath(InferenceEngine::getInferenceEngineLibraryPath(), FileUtils::toFilePath("plugins.xml"));
|
||||
xmlConfigFile_ = FileUtils::fromFilePath(xmlConfigFileDefault);
|
||||
}
|
||||
return xmlConfigFile_;
|
||||
@ -85,39 +86,40 @@ Parsed<T> parseDeviceNameIntoConfig(const std::string& deviceName, const std::ma
|
||||
return {deviceName_, config_};
|
||||
}
|
||||
|
||||
InferenceEngine::Parameter copyParameterValue(const InferenceEngine::Parameter & value) {
|
||||
InferenceEngine::Parameter copyParameterValue(const InferenceEngine::Parameter& value) {
|
||||
if (value.is<bool>()) {
|
||||
return { value.as<bool>() };
|
||||
return {value.as<bool>()};
|
||||
} else if (value.is<int>()) {
|
||||
return { value.as<int>() };
|
||||
return {value.as<int>()};
|
||||
} else if (value.is<unsigned int>()) {
|
||||
return { value.as<unsigned int>() };
|
||||
return {value.as<unsigned int>()};
|
||||
} else if (value.is<float>()) {
|
||||
return { value.as<float>() };
|
||||
return {value.as<float>()};
|
||||
} else if (value.is<std::string>()) {
|
||||
return { value.as<std::string>() };
|
||||
} else if (value.is<std::vector<std::string> >()) {
|
||||
return { value.as<std::vector<std::string> >() };
|
||||
} else if (value.is<std::vector<int> >()) {
|
||||
return { value.as<std::vector<int> >() };
|
||||
} else if (value.is<std::vector<float> >()) {
|
||||
return { value.as<std::vector<float> >() };
|
||||
} else if (value.is<std::vector<unsigned int> >()) {
|
||||
return { value.as<std::vector<unsigned int> >() };
|
||||
} else if (value.is<std::tuple<unsigned int, unsigned int, unsigned int> >()) {
|
||||
return { value.as<std::tuple<unsigned int, unsigned int, unsigned int> >() };
|
||||
} else if (value.is<std::tuple<unsigned int, unsigned int> >()) {
|
||||
return { value.as<std::tuple<unsigned int, unsigned int> >() };
|
||||
return {value.as<std::string>()};
|
||||
} else if (value.is<std::vector<std::string>>()) {
|
||||
return {value.as<std::vector<std::string>>()};
|
||||
} else if (value.is<std::vector<int>>()) {
|
||||
return {value.as<std::vector<int>>()};
|
||||
} else if (value.is<std::vector<float>>()) {
|
||||
return {value.as<std::vector<float>>()};
|
||||
} else if (value.is<std::vector<unsigned int>>()) {
|
||||
return {value.as<std::vector<unsigned int>>()};
|
||||
} else if (value.is<std::tuple<unsigned int, unsigned int, unsigned int>>()) {
|
||||
return {value.as<std::tuple<unsigned int, unsigned int, unsigned int>>()};
|
||||
} else if (value.is<std::tuple<unsigned int, unsigned int>>()) {
|
||||
return {value.as<std::tuple<unsigned int, unsigned int>>()};
|
||||
}
|
||||
|
||||
return std::move(value);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
void allowNotImplemented(F && f) {
|
||||
void allowNotImplemented(F&& f) {
|
||||
try {
|
||||
f();
|
||||
} catch (const InferenceEngine::NotImplemented&) { }
|
||||
} catch (const InferenceEngine::NotImplemented&) {
|
||||
}
|
||||
}
|
||||
|
||||
class CoreImpl : public InferenceEngine::ICore, public std::enable_shared_from_this<InferenceEngine::ICore> {
|
||||
@ -126,7 +128,7 @@ class CoreImpl : public InferenceEngine::ICore, public std::enable_shared_from_t
|
||||
class CoreConfig final {
|
||||
public:
|
||||
struct CacheConfig {
|
||||
std::string _cacheDir;
|
||||
std::string _cacheDir;
|
||||
std::shared_ptr<InferenceEngine::ICacheManager> _cacheManager;
|
||||
};
|
||||
|
||||
@ -137,7 +139,8 @@ class CoreImpl : public InferenceEngine::ICore, public std::enable_shared_from_t
|
||||
_cacheConfig._cacheDir = it->second;
|
||||
if (!it->second.empty()) {
|
||||
FileUtils::createDirectoryRecursive(it->second);
|
||||
_cacheConfig._cacheManager = std::make_shared<InferenceEngine::FileStorageCacheManager>(std::move(it->second));
|
||||
_cacheConfig._cacheManager =
|
||||
std::make_shared<InferenceEngine::FileStorageCacheManager>(std::move(it->second));
|
||||
} else {
|
||||
_cacheConfig._cacheManager = nullptr;
|
||||
}
|
||||
@ -182,10 +185,8 @@ class CoreImpl : public InferenceEngine::ICore, public std::enable_shared_from_t
|
||||
|
||||
bool DeviceSupportsImportExport(const InferenceEngine::InferencePlugin& plugin) const {
|
||||
std::vector<std::string> supportedMetricKeys = plugin.GetMetric(METRIC_KEY(SUPPORTED_METRICS), {});
|
||||
auto it = std::find(supportedMetricKeys.begin(), supportedMetricKeys.end(),
|
||||
METRIC_KEY(IMPORT_EXPORT_SUPPORT));
|
||||
bool supported = (it != supportedMetricKeys.end()) &&
|
||||
plugin.GetMetric(METRIC_KEY(IMPORT_EXPORT_SUPPORT), {});
|
||||
auto it = std::find(supportedMetricKeys.begin(), supportedMetricKeys.end(), METRIC_KEY(IMPORT_EXPORT_SUPPORT));
|
||||
bool supported = (it != supportedMetricKeys.end()) && plugin.GetMetric(METRIC_KEY(IMPORT_EXPORT_SUPPORT), {});
|
||||
return supported;
|
||||
}
|
||||
|
||||
@ -198,11 +199,10 @@ class CoreImpl : public InferenceEngine::ICore, public std::enable_shared_from_t
|
||||
std::vector<std::string> supportedMetricKeys;
|
||||
try {
|
||||
// If plugin doesn't support 'SUPPORTED_METRICS' - treat it as config is not supported as well
|
||||
supportedMetricKeys =
|
||||
plugin.GetMetric(METRIC_KEY(SUPPORTED_METRICS), {}).as<std::vector<std::string>>();
|
||||
} catch(...) {}
|
||||
auto it = std::find(supportedMetricKeys.begin(), supportedMetricKeys.end(),
|
||||
METRIC_KEY(SUPPORTED_CONFIG_KEYS));
|
||||
supportedMetricKeys = plugin.GetMetric(METRIC_KEY(SUPPORTED_METRICS), {}).as<std::vector<std::string>>();
|
||||
} catch (...) {
|
||||
}
|
||||
auto it = std::find(supportedMetricKeys.begin(), supportedMetricKeys.end(), METRIC_KEY(SUPPORTED_CONFIG_KEYS));
|
||||
if (it != supportedMetricKeys.end()) {
|
||||
std::vector<std::string> configKeys = plugin.GetMetric(METRIC_KEY(SUPPORTED_CONFIG_KEYS), {});
|
||||
supported = std::find(configKeys.begin(), configKeys.end(), key) != configKeys.end();
|
||||
@ -211,24 +211,25 @@ class CoreImpl : public InferenceEngine::ICore, public std::enable_shared_from_t
|
||||
}
|
||||
|
||||
InferenceEngine::SoExecutableNetworkInternal LoadNetworkImpl(const InferenceEngine::CNNNetwork& network,
|
||||
InferenceEngine::InferencePlugin& plugin,
|
||||
const std::map<std::string, std::string>& parsedConfig,
|
||||
const InferenceEngine::RemoteContext::Ptr& context,
|
||||
const std::string& blobID,
|
||||
const std::string& modelPath = std::string(),
|
||||
bool forceDisableCache = false) {
|
||||
OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "Core::Impl::LoadNetworkImpl");
|
||||
InferenceEngine::InferencePlugin& plugin,
|
||||
const std::map<std::string, std::string>& parsedConfig,
|
||||
const InferenceEngine::RemoteContext::Ptr& context,
|
||||
const std::string& blobID,
|
||||
const std::string& modelPath = std::string(),
|
||||
bool forceDisableCache = false) {
|
||||
OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "CoreImpl::LoadNetworkImpl");
|
||||
InferenceEngine::SoExecutableNetworkInternal execNetwork;
|
||||
execNetwork = context ? plugin.LoadNetwork(network, context, parsedConfig) :
|
||||
plugin.LoadNetwork(network, parsedConfig);
|
||||
execNetwork =
|
||||
context ? plugin.LoadNetwork(network, context, parsedConfig) : plugin.LoadNetwork(network, parsedConfig);
|
||||
auto cacheManager = coreConfig.getCacheConfig()._cacheManager;
|
||||
if (!forceDisableCache && cacheManager && DeviceSupportsImportExport(plugin)) {
|
||||
try {
|
||||
// need to export network for further import from "cache"
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, InferenceEngine::itt::domains::IE_LT, "Core::LoadNetwork::Export");
|
||||
cacheManager->writeCacheEntry(blobID, [&](std::ostream& networkStream) {
|
||||
networkStream << InferenceEngine::CompiledBlobHeader(InferenceEngine::GetInferenceEngineVersion()->buildNumber,
|
||||
InferenceEngine::NetworkCompilationContext::calculateFileInfo(modelPath));
|
||||
networkStream << InferenceEngine::CompiledBlobHeader(
|
||||
InferenceEngine::GetInferenceEngineVersion()->buildNumber,
|
||||
InferenceEngine::NetworkCompilationContext::calculateFileInfo(modelPath));
|
||||
execNetwork->Export(networkStream);
|
||||
});
|
||||
} catch (...) {
|
||||
@ -239,20 +240,23 @@ class CoreImpl : public InferenceEngine::ICore, public std::enable_shared_from_t
|
||||
return execNetwork;
|
||||
}
|
||||
|
||||
InferenceEngine::SoExecutableNetworkInternal LoadNetworkFromCache(const std::shared_ptr<InferenceEngine::ICacheManager>& cacheManager,
|
||||
const std::string& blobId,
|
||||
InferenceEngine::InferencePlugin& plugin,
|
||||
const std::map<std::string, std::string>& config,
|
||||
const InferenceEngine::RemoteContext::Ptr& context,
|
||||
bool& networkIsImported,
|
||||
const std::string& modelPath = std::string()) {
|
||||
InferenceEngine::SoExecutableNetworkInternal LoadNetworkFromCache(
|
||||
const std::shared_ptr<InferenceEngine::ICacheManager>& cacheManager,
|
||||
const std::string& blobId,
|
||||
InferenceEngine::InferencePlugin& plugin,
|
||||
const std::map<std::string, std::string>& config,
|
||||
const InferenceEngine::RemoteContext::Ptr& context,
|
||||
bool& networkIsImported,
|
||||
const std::string& modelPath = std::string()) {
|
||||
InferenceEngine::SoExecutableNetworkInternal execNetwork;
|
||||
struct HeaderException {};
|
||||
|
||||
IE_ASSERT(cacheManager != nullptr);
|
||||
try {
|
||||
cacheManager->readCacheEntry(blobId, [&](std::istream &networkStream) {
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, InferenceEngine::itt::domains::IE_LT, "Core::LoadNetworkFromCache::ReadStreamAndImport");
|
||||
cacheManager->readCacheEntry(blobId, [&](std::istream& networkStream) {
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE,
|
||||
InferenceEngine::itt::domains::IE_LT,
|
||||
"Core::LoadNetworkFromCache::ReadStreamAndImport");
|
||||
try {
|
||||
InferenceEngine::CompiledBlobHeader header;
|
||||
networkStream >> header;
|
||||
@ -260,7 +264,8 @@ class CoreImpl : public InferenceEngine::ICore, public std::enable_shared_from_t
|
||||
// Build number mismatch, don't use this cache
|
||||
throw InferenceEngine::NetworkNotRead("Version does not match");
|
||||
}
|
||||
if (header.getFileInfo() != InferenceEngine::NetworkCompilationContext::calculateFileInfo(modelPath)) {
|
||||
if (header.getFileInfo() !=
|
||||
InferenceEngine::NetworkCompilationContext::calculateFileInfo(modelPath)) {
|
||||
// Original file is changed, don't use cache
|
||||
throw InferenceEngine::NetworkNotRead("Original model file is changed");
|
||||
}
|
||||
@ -268,9 +273,8 @@ class CoreImpl : public InferenceEngine::ICore, public std::enable_shared_from_t
|
||||
throw HeaderException();
|
||||
}
|
||||
|
||||
execNetwork = context ?
|
||||
plugin.ImportNetwork(networkStream, context, config) :
|
||||
plugin.ImportNetwork(networkStream, config);
|
||||
execNetwork = context ? plugin.ImportNetwork(networkStream, context, config)
|
||||
: plugin.ImportNetwork(networkStream, config);
|
||||
networkIsImported = true;
|
||||
});
|
||||
} catch (const HeaderException&) {
|
||||
@ -307,10 +311,9 @@ class CoreImpl : public InferenceEngine::ICore, public std::enable_shared_from_t
|
||||
}
|
||||
|
||||
// 2. replace it with DEVICE_ARCHITECTURE value
|
||||
std::vector<std::string> supportedMetricKeys =
|
||||
plugin.GetMetric(METRIC_KEY(SUPPORTED_METRICS), getMetricConfig);
|
||||
auto archIt = std::find(supportedMetricKeys.begin(), supportedMetricKeys.end(),
|
||||
METRIC_KEY(DEVICE_ARCHITECTURE));
|
||||
std::vector<std::string> supportedMetricKeys = plugin.GetMetric(METRIC_KEY(SUPPORTED_METRICS), getMetricConfig);
|
||||
auto archIt =
|
||||
std::find(supportedMetricKeys.begin(), supportedMetricKeys.end(), METRIC_KEY(DEVICE_ARCHITECTURE));
|
||||
if (archIt != supportedMetricKeys.end()) {
|
||||
auto value = plugin.GetMetric(METRIC_KEY(DEVICE_ARCHITECTURE), getMetricConfig);
|
||||
compileConfig[METRIC_KEY(DEVICE_ARCHITECTURE)] = value.as<std::string>();
|
||||
@ -321,14 +324,16 @@ class CoreImpl : public InferenceEngine::ICore, public std::enable_shared_from_t
|
||||
return compileConfig;
|
||||
}
|
||||
|
||||
std::string CalculateNetworkHash(const InferenceEngine::CNNNetwork& network, const std::string& deviceFamily,
|
||||
std::string CalculateNetworkHash(const InferenceEngine::CNNNetwork& network,
|
||||
const std::string& deviceFamily,
|
||||
const InferenceEngine::InferencePlugin& plugin,
|
||||
const std::map<std::string, std::string>& config) const {
|
||||
auto compileConfig = CreateCompileConfig(plugin, deviceFamily, config);
|
||||
return InferenceEngine::NetworkCompilationContext::computeHash(network, compileConfig);
|
||||
}
|
||||
|
||||
std::string CalculateFileHash(const std::string& modelName, const std::string& deviceFamily,
|
||||
std::string CalculateFileHash(const std::string& modelName,
|
||||
const std::string& deviceFamily,
|
||||
const InferenceEngine::InferencePlugin& plugin,
|
||||
const std::map<std::string, std::string>& config) const {
|
||||
auto compileConfig = CreateCompileConfig(plugin, deviceFamily, config);
|
||||
@ -349,7 +354,8 @@ public:
|
||||
~CoreImpl() override = default;
|
||||
|
||||
/**
|
||||
* @brief Register plugins for devices which are located in .xml configuration file. The function supports UNICODE path
|
||||
* @brief Register plugins for devices which are located in .xml configuration file. The function supports UNICODE
|
||||
* path
|
||||
* @param xmlConfigFile An .xml configuraion with device / plugin information
|
||||
*/
|
||||
void RegisterPluginsInRegistry(const std::string& xmlConfigFile) {
|
||||
@ -366,7 +372,7 @@ public:
|
||||
pugi::xml_node ieNode = xmlDoc.document_element();
|
||||
pugi::xml_node devicesNode = ieNode.child("plugins");
|
||||
|
||||
FOREACH_CHILD(pluginNode, devicesNode, "plugin") {
|
||||
FOREACH_CHILD (pluginNode, devicesNode, "plugin") {
|
||||
std::string deviceName = GetStrAttr(pluginNode, "name");
|
||||
FileUtils::FilePath pluginPath = FileUtils::toFilePath(GetStrAttr(pluginNode, "location").c_str());
|
||||
|
||||
@ -376,8 +382,10 @@ public:
|
||||
|
||||
// append IR library path for default IE plugins
|
||||
{
|
||||
FileUtils::FilePath absFilePath = FileUtils::makePath(InferenceEngine::getInferenceEngineLibraryPath(), pluginPath);
|
||||
if (FileUtils::fileExist(absFilePath)) pluginPath = absFilePath;
|
||||
FileUtils::FilePath absFilePath =
|
||||
FileUtils::makePath(InferenceEngine::getInferenceEngineLibraryPath(), pluginPath);
|
||||
if (FileUtils::fileExist(absFilePath))
|
||||
pluginPath = absFilePath;
|
||||
}
|
||||
|
||||
// check properties
|
||||
@ -385,7 +393,7 @@ public:
|
||||
std::map<std::string, std::string> config;
|
||||
|
||||
if (propertiesNode) {
|
||||
FOREACH_CHILD(propertyNode, propertiesNode, "property") {
|
||||
FOREACH_CHILD (propertyNode, propertiesNode, "property") {
|
||||
std::string key = GetStrAttr(propertyNode, "key");
|
||||
std::string value = GetStrAttr(propertyNode, "value");
|
||||
config[key] = value;
|
||||
@ -397,8 +405,9 @@ public:
|
||||
std::vector<FileUtils::FilePath> listOfExtentions;
|
||||
|
||||
if (extensionsNode) {
|
||||
FOREACH_CHILD(extensionNode, extensionsNode, "extension") {
|
||||
FileUtils::FilePath extensionLocation = FileUtils::toFilePath(GetStrAttr(extensionNode, "location").c_str());
|
||||
FOREACH_CHILD (extensionNode, extensionsNode, "extension") {
|
||||
FileUtils::FilePath extensionLocation =
|
||||
FileUtils::toFilePath(GetStrAttr(extensionNode, "location").c_str());
|
||||
listOfExtentions.push_back(extensionLocation);
|
||||
}
|
||||
}
|
||||
@ -424,18 +433,20 @@ public:
|
||||
}
|
||||
|
||||
InferenceEngine::CNNNetwork ReadNetwork(const std::string& modelPath, const std::string& binPath) const override {
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::IE_RT, "Core::Impl::ReadNetwork from file");
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::IE_RT, "CoreImpl::ReadNetwork from file");
|
||||
return InferenceEngine::details::ReadNetwork(modelPath, binPath, extensions);
|
||||
}
|
||||
|
||||
InferenceEngine::CNNNetwork ReadNetwork(const std::string& model, const InferenceEngine::Blob::CPtr& weights) const override {
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::IE_RT, "Core::Impl::ReadNetwork from memory");
|
||||
InferenceEngine::CNNNetwork ReadNetwork(const std::string& model,
|
||||
const InferenceEngine::Blob::CPtr& weights) const override {
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::IE_RT, "CoreImpl::ReadNetwork from memory");
|
||||
return InferenceEngine::details::ReadNetwork(model, weights, extensions);
|
||||
}
|
||||
|
||||
// TODO: In future this method can be added to ICore interface
|
||||
InferenceEngine::SoExecutableNetworkInternal LoadNetwork(const InferenceEngine::CNNNetwork& network, const InferenceEngine::RemoteContext::Ptr& context,
|
||||
const std::map<std::string, std::string>& config) {
|
||||
InferenceEngine::SoExecutableNetworkInternal LoadNetwork(const InferenceEngine::CNNNetwork& network,
|
||||
const InferenceEngine::RemoteContext::Ptr& context,
|
||||
const std::map<std::string, std::string>& config) {
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, InferenceEngine::itt::domains::IE_LT, "Core::LoadNetwork::RemoteContext");
|
||||
if (context == nullptr) {
|
||||
IE_THROW() << "Remote context is null";
|
||||
@ -458,9 +469,10 @@ public:
|
||||
return res;
|
||||
}
|
||||
|
||||
InferenceEngine::SoExecutableNetworkInternal LoadNetwork(const InferenceEngine::CNNNetwork& network,
|
||||
const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config) override {
|
||||
InferenceEngine::SoExecutableNetworkInternal LoadNetwork(
|
||||
const InferenceEngine::CNNNetwork& network,
|
||||
const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config) override {
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, InferenceEngine::itt::domains::IE_LT, "Core::LoadNetwork::CNN");
|
||||
bool forceDisableCache = config.count(CONFIG_KEY_INTERNAL(FORCE_DISABLE_CACHE)) > 0;
|
||||
auto parsed = parseDeviceNameIntoConfig(deviceName, config);
|
||||
@ -485,9 +497,10 @@ public:
|
||||
return res;
|
||||
}
|
||||
|
||||
InferenceEngine::SoExecutableNetworkInternal LoadNetwork(const std::string& modelPath,
|
||||
const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config) override {
|
||||
InferenceEngine::SoExecutableNetworkInternal LoadNetwork(
|
||||
const std::string& modelPath,
|
||||
const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config) override {
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, InferenceEngine::itt::domains::IE_LT, "Core::LoadNetwork::Path");
|
||||
auto parsed = parseDeviceNameIntoConfig(deviceName, config);
|
||||
auto plugin = GetCPPPluginByName(parsed._deviceName);
|
||||
@ -497,8 +510,7 @@ public:
|
||||
bool loadedFromCache = false;
|
||||
auto hash = CalculateFileHash(modelPath, parsed._deviceName, plugin, parsed._config);
|
||||
auto lock = cacheGuard.getHashLock(hash);
|
||||
res = LoadNetworkFromCache(cacheManager, hash, plugin, parsed._config,
|
||||
nullptr, loadedFromCache, modelPath);
|
||||
res = LoadNetworkFromCache(cacheManager, hash, plugin, parsed._config, nullptr, loadedFromCache, modelPath);
|
||||
if (!loadedFromCache) {
|
||||
auto cnnNetwork = ReadNetwork(modelPath, std::string());
|
||||
res = LoadNetworkImpl(cnnNetwork, plugin, parsed._config, nullptr, hash, modelPath);
|
||||
@ -512,14 +524,17 @@ public:
|
||||
return res;
|
||||
}
|
||||
|
||||
InferenceEngine::SoExecutableNetworkInternal ImportNetwork(std::istream& networkModel, const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config) override {
|
||||
InferenceEngine::SoExecutableNetworkInternal ImportNetwork(
|
||||
std::istream& networkModel,
|
||||
const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config) override {
|
||||
auto parsed = parseDeviceNameIntoConfig(deviceName, config);
|
||||
return GetCPPPluginByName(parsed._deviceName).ImportNetwork(networkModel, parsed._config);
|
||||
}
|
||||
|
||||
InferenceEngine::QueryNetworkResult QueryNetwork(const InferenceEngine::CNNNetwork& network, const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config) const override {
|
||||
InferenceEngine::QueryNetworkResult QueryNetwork(const InferenceEngine::CNNNetwork& network,
|
||||
const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config) const override {
|
||||
OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "Core::QueryNetwork");
|
||||
auto parsed = parseDeviceNameIntoConfig(deviceName, config);
|
||||
auto res = GetCPPPluginByName(parsed._deviceName).QueryNetwork(network, parsed._config);
|
||||
@ -591,10 +606,10 @@ public:
|
||||
// plugin is not created by e.g. invalid env
|
||||
} catch (const std::exception& ex) {
|
||||
IE_THROW() << "An exception is thrown while trying to create the " << deviceName
|
||||
<< " device and call GetMetric: " << ex.what();
|
||||
<< " device and call GetMetric: " << ex.what();
|
||||
} catch (...) {
|
||||
IE_THROW() << "Unknown exception is thrown while trying to create the " << deviceName
|
||||
<< " device and call GetMetric";
|
||||
<< " device and call GetMetric";
|
||||
}
|
||||
|
||||
if (devicesIDs.size() > 1) {
|
||||
@ -615,7 +630,7 @@ public:
|
||||
* @return Reference to a CPP plugin wrapper
|
||||
*/
|
||||
InferenceEngine::InferencePlugin GetCPPPluginByName(const std::string& deviceName) const {
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, InferenceEngine::itt::domains::IE_LT, "Core::Impl::GetCPPPluginByName");
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, InferenceEngine::itt::domains::IE_LT, "CoreImpl::GetCPPPluginByName");
|
||||
|
||||
std::lock_guard<std::mutex> lock(pluginsMutex);
|
||||
|
||||
@ -636,13 +651,13 @@ public:
|
||||
plugin.SetName(deviceName);
|
||||
|
||||
// Set Inference Engine class reference to plugins
|
||||
std::weak_ptr<InferenceEngine::ICore> mutableCore = std::const_pointer_cast<InferenceEngine::ICore>(
|
||||
shared_from_this());
|
||||
std::weak_ptr<InferenceEngine::ICore> mutableCore =
|
||||
std::const_pointer_cast<InferenceEngine::ICore>(shared_from_this());
|
||||
plugin.SetCore(mutableCore);
|
||||
}
|
||||
|
||||
// Add registered extensions to new plugin
|
||||
allowNotImplemented([&](){
|
||||
allowNotImplemented([&]() {
|
||||
for (const auto& ext : extensions) {
|
||||
plugin.AddExtension(ext);
|
||||
}
|
||||
@ -669,10 +684,10 @@ public:
|
||||
|
||||
plugins[deviceName] = plugin;
|
||||
} catch (const InferenceEngine::Exception& ex) {
|
||||
IE_THROW() << "Failed to create plugin " << FileUtils::fromFilePath(desc.libraryLocation) << " for device " << deviceName
|
||||
<< "\n"
|
||||
<< "Please, check your environment\n"
|
||||
<< ex.what() << "\n";
|
||||
IE_THROW() << "Failed to create plugin " << FileUtils::fromFilePath(desc.libraryLocation)
|
||||
<< " for device " << deviceName << "\n"
|
||||
<< "Please, check your environment\n"
|
||||
<< ex.what() << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
@ -714,8 +729,10 @@ public:
|
||||
{
|
||||
pluginPath = FileUtils::makePluginLibraryName({}, FileUtils::toFilePath(pluginName.c_str()));
|
||||
|
||||
FileUtils::FilePath absFilePath = FileUtils::makePath(InferenceEngine::getInferenceEngineLibraryPath(), pluginPath);
|
||||
if (FileUtils::fileExist(absFilePath)) pluginPath = absFilePath;
|
||||
FileUtils::FilePath absFilePath =
|
||||
FileUtils::makePath(InferenceEngine::getInferenceEngineLibraryPath(), pluginPath);
|
||||
if (FileUtils::fileExist(absFilePath))
|
||||
pluginPath = absFilePath;
|
||||
}
|
||||
|
||||
PluginDescriptor desc = {pluginPath, {}, {}};
|
||||
@ -795,7 +812,8 @@ public:
|
||||
std::map<std::string, ngraph::OpSet> opsets = extension->getOpSets();
|
||||
for (const auto& it : opsets) {
|
||||
if (opsetNames.find(it.first) != opsetNames.end())
|
||||
IE_THROW() << "Cannot add opset with name: " << it.first << ". Opset with the same name already exists.";
|
||||
IE_THROW() << "Cannot add opset with name: " << it.first
|
||||
<< ". Opset with the same name already exists.";
|
||||
opsetNames.insert(it.first);
|
||||
}
|
||||
|
||||
@ -803,7 +821,8 @@ public:
|
||||
for (auto& plugin : plugins) {
|
||||
try {
|
||||
plugin.second.AddExtension(extension);
|
||||
} catch (...) {}
|
||||
} catch (...) {
|
||||
}
|
||||
}
|
||||
extensions.emplace_back(extension);
|
||||
}
|
||||
@ -860,7 +879,6 @@ public:
|
||||
|
||||
} // namespace core_detail
|
||||
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
DeviceIDParser::DeviceIDParser(const std::string& deviceNameWithID) {
|
||||
@ -893,7 +911,8 @@ std::vector<std::string> DeviceIDParser::getHeteroDevices(std::string fallbackDe
|
||||
fallbackDevice.erase(0, pos + 1);
|
||||
}
|
||||
|
||||
if (!fallbackDevice.empty()) deviceNames.push_back(fallbackDevice);
|
||||
if (!fallbackDevice.empty())
|
||||
deviceNames.push_back(fallbackDevice);
|
||||
|
||||
return deviceNames;
|
||||
}
|
||||
@ -915,7 +934,8 @@ std::vector<std::string> DeviceIDParser::getMultiDevices(std::string devicesList
|
||||
devicesList.erase(0, pos + 1);
|
||||
}
|
||||
|
||||
if (!devicesList.empty()) deviceNames.push_back(trim_request_info(devicesList));
|
||||
if (!devicesList.empty())
|
||||
deviceNames.push_back(trim_request_info(devicesList));
|
||||
|
||||
return deviceNames;
|
||||
}
|
||||
@ -935,8 +955,7 @@ std::map<std::string, Version> Core::GetVersions(const std::string& deviceName)
|
||||
#ifdef ENABLE_UNICODE_PATH_SUPPORT
|
||||
|
||||
CNNNetwork Core::ReadNetwork(const std::wstring& modelPath, const std::wstring& binPath) const {
|
||||
return ReadNetwork(FileUtils::wStringtoMBCSstringChar(modelPath),
|
||||
FileUtils::wStringtoMBCSstringChar(binPath));
|
||||
return ReadNetwork(FileUtils::wStringtoMBCSstringChar(modelPath), FileUtils::wStringtoMBCSstringChar(binPath));
|
||||
}
|
||||
|
||||
#endif
|
||||
@ -949,22 +968,25 @@ CNNNetwork Core::ReadNetwork(const std::string& model, const Blob::CPtr& weights
|
||||
return _impl->ReadNetwork(model, weights);
|
||||
}
|
||||
|
||||
ExecutableNetwork Core::LoadNetwork(const CNNNetwork& network, const std::string& deviceName,
|
||||
ExecutableNetwork Core::LoadNetwork(const CNNNetwork& network,
|
||||
const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config) {
|
||||
auto exec = _impl->LoadNetwork(network, deviceName, config);
|
||||
return { exec, exec };
|
||||
return {exec, exec};
|
||||
}
|
||||
|
||||
ExecutableNetwork Core::LoadNetwork(const CNNNetwork& network, RemoteContext::Ptr context,
|
||||
ExecutableNetwork Core::LoadNetwork(const CNNNetwork& network,
|
||||
RemoteContext::Ptr context,
|
||||
const std::map<std::string, std::string>& config) {
|
||||
auto exec = _impl->LoadNetwork(network, context, config);
|
||||
return { exec, exec };
|
||||
return {exec, exec};
|
||||
}
|
||||
|
||||
ExecutableNetwork Core::LoadNetwork(const std::string& modelPath, const std::string& deviceName,
|
||||
ExecutableNetwork Core::LoadNetwork(const std::string& modelPath,
|
||||
const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config) {
|
||||
auto exec = _impl->LoadNetwork(modelPath, deviceName, config);
|
||||
return { exec, exec };
|
||||
return {exec, exec};
|
||||
}
|
||||
|
||||
RemoteContext::Ptr Core::CreateContext(const std::string& deviceName, const ParamMap& params) {
|
||||
@ -999,16 +1021,13 @@ RemoteContext::Ptr Core::GetDefaultContext(const std::string& deviceName) {
|
||||
|
||||
void Core::AddExtension(IExtensionPtr extension, const std::string& deviceName_) {
|
||||
if (deviceName_.find("HETERO") == 0) {
|
||||
IE_THROW()
|
||||
<< "HETERO device does not support extensions. Please, set extensions directly to fallback devices";
|
||||
IE_THROW() << "HETERO device does not support extensions. Please, set extensions directly to fallback devices";
|
||||
}
|
||||
if (deviceName_.find("MULTI") == 0) {
|
||||
IE_THROW()
|
||||
<< "MULTI device does not support extensions. Please, set extensions directly to fallback devices";
|
||||
IE_THROW() << "MULTI device does not support extensions. Please, set extensions directly to fallback devices";
|
||||
}
|
||||
if (deviceName_.find("AUTO") == 0) {
|
||||
IE_THROW()
|
||||
<< "AUTO device does not support extensions. Please, set extensions directly to fallback devices";
|
||||
IE_THROW() << "AUTO device does not support extensions. Please, set extensions directly to fallback devices";
|
||||
}
|
||||
|
||||
_impl->AddExtension(extension);
|
||||
@ -1018,19 +1037,21 @@ void Core::AddExtension(const IExtensionPtr& extension) {
|
||||
_impl->AddExtension(extension);
|
||||
}
|
||||
|
||||
ExecutableNetwork Core::ImportNetwork(const std::string& modelFileName, const std::string& deviceName,
|
||||
ExecutableNetwork Core::ImportNetwork(const std::string& modelFileName,
|
||||
const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config) {
|
||||
OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "Core::ImportNetwork");
|
||||
auto parsed = core_detail::parseDeviceNameIntoConfig(deviceName, config);
|
||||
auto exec = _impl->GetCPPPluginByName(parsed._deviceName).ImportNetwork(modelFileName, parsed._config);
|
||||
return { exec, exec };
|
||||
return {exec, exec};
|
||||
}
|
||||
|
||||
ExecutableNetwork Core::ImportNetwork(std::istream& networkModel, const std::string& deviceName,
|
||||
ExecutableNetwork Core::ImportNetwork(std::istream& networkModel,
|
||||
const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config) {
|
||||
OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "Core::ImportNetwork");
|
||||
auto exec = _impl->ImportNetwork(networkModel, deviceName, config);
|
||||
return { exec, exec };
|
||||
return {exec, exec};
|
||||
}
|
||||
|
||||
ExecutableNetwork Core::ImportNetwork(std::istream& networkModel) {
|
||||
@ -1047,12 +1068,12 @@ ExecutableNetwork Core::ImportNetwork(std::istream& networkModel) {
|
||||
std::getline(networkModel, deviceName);
|
||||
} else {
|
||||
IE_THROW() << "Passed compiled stream does not contain device name. "
|
||||
"Please, provide device name manually";
|
||||
"Please, provide device name manually";
|
||||
}
|
||||
networkModel.seekg(currentPos, networkModel.beg);
|
||||
|
||||
auto exec = _impl->GetCPPPluginByName(deviceName).ImportNetwork(networkModel, {});
|
||||
return { exec, exec };
|
||||
return {exec, exec};
|
||||
}
|
||||
|
||||
ExecutableNetwork Core::ImportNetwork(std::istream& networkModel,
|
||||
@ -1070,10 +1091,11 @@ ExecutableNetwork Core::ImportNetwork(std::istream& networkModel,
|
||||
|
||||
auto parsed = core_detail::parseDeviceNameIntoConfig(deviceName, config);
|
||||
auto exec = _impl->GetCPPPluginByName(deviceName).ImportNetwork(networkModel, context, parsed._config);
|
||||
return { exec, exec };
|
||||
return {exec, exec};
|
||||
}
|
||||
|
||||
QueryNetworkResult Core::QueryNetwork(const CNNNetwork& network, const std::string& deviceName,
|
||||
QueryNetworkResult Core::QueryNetwork(const CNNNetwork& network,
|
||||
const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config) const {
|
||||
return _impl->QueryNetwork(network, deviceName, config);
|
||||
}
|
||||
@ -1082,25 +1104,26 @@ void Core::SetConfig(const std::map<std::string, std::string>& config, const std
|
||||
// HETERO case
|
||||
if (deviceName.find("HETERO:") == 0) {
|
||||
IE_THROW() << "SetConfig is supported only for HETERO itself (without devices). "
|
||||
"You can configure the devices with SetConfig before creating the HETERO on top.";
|
||||
"You can configure the devices with SetConfig before creating the HETERO on top.";
|
||||
}
|
||||
|
||||
// MULTI case
|
||||
if (deviceName.find("MULTI:") == 0) {
|
||||
IE_THROW() << "SetConfig is supported only for MULTI itself (without devices). "
|
||||
"You can configure the devices with SetConfig before creating the MULTI on top.";
|
||||
"You can configure the devices with SetConfig before creating the MULTI on top.";
|
||||
}
|
||||
|
||||
// AUTO case
|
||||
if (deviceName.find("AUTO:") == 0) {
|
||||
IE_THROW() << "SetConfig is supported only for AUTO itself (without devices). "
|
||||
"You can configure the devices with SetConfig before creating the AUTO on top.";
|
||||
"You can configure the devices with SetConfig before creating the AUTO on top.";
|
||||
}
|
||||
|
||||
// GPU.0, FPGA.1 cases
|
||||
if (deviceName.find(".") != std::string::npos) {
|
||||
IE_THROW() << "SetConfig is supported only for device family itself (without particular device .#). "
|
||||
"You can pass .# as a particular device instance to QueryNetwork, LoadNetwork, ImportNetwork only";
|
||||
IE_THROW()
|
||||
<< "SetConfig is supported only for device family itself (without particular device .#). "
|
||||
"You can pass .# as a particular device instance to QueryNetwork, LoadNetwork, ImportNetwork only";
|
||||
}
|
||||
|
||||
if (deviceName.empty()) {
|
||||
@ -1115,25 +1138,22 @@ Parameter Core::GetConfig(const std::string& deviceName, const std::string& name
|
||||
// HETERO case
|
||||
{
|
||||
if (deviceName.find("HETERO:") == 0) {
|
||||
IE_THROW()
|
||||
<< "You can only GetConfig of the HETERO itself (without devices). "
|
||||
"GetConfig is also possible for the individual devices before creating the HETERO on top.";
|
||||
IE_THROW() << "You can only GetConfig of the HETERO itself (without devices). "
|
||||
"GetConfig is also possible for the individual devices before creating the HETERO on top.";
|
||||
}
|
||||
}
|
||||
// MULTI case
|
||||
{
|
||||
if (deviceName.find("MULTI:") == 0) {
|
||||
IE_THROW()
|
||||
<< "You can only GetConfig of the MULTI itself (without devices). "
|
||||
"GetConfig is also possible for the individual devices before creating the MULTI on top.";
|
||||
IE_THROW() << "You can only GetConfig of the MULTI itself (without devices). "
|
||||
"GetConfig is also possible for the individual devices before creating the MULTI on top.";
|
||||
}
|
||||
}
|
||||
// AUTO case
|
||||
{
|
||||
if (deviceName.find("AUTO:") == 0) {
|
||||
IE_THROW()
|
||||
<< "You can only GetConfig of the AUTO itself (without devices). "
|
||||
"GetConfig is also possible for the individual devices before creating the AUTO on top.";
|
||||
IE_THROW() << "You can only GetConfig of the AUTO itself (without devices). "
|
||||
"GetConfig is also possible for the individual devices before creating the AUTO on top.";
|
||||
}
|
||||
}
|
||||
|
||||
@ -1142,7 +1162,8 @@ Parameter Core::GetConfig(const std::string& deviceName, const std::string& name
|
||||
// we need to return a copy of Parameter object which is created on Core side,
|
||||
// not in InferenceEngine plugin side, which can be unloaded from Core in a parallel thread
|
||||
// TODO: remove this WA after *-31417 is resolved
|
||||
return core_detail::copyParameterValue(_impl->GetCPPPluginByName(parsed._deviceName).GetConfig(name, parsed._config));
|
||||
return core_detail::copyParameterValue(
|
||||
_impl->GetCPPPluginByName(parsed._deviceName).GetConfig(name, parsed._config));
|
||||
}
|
||||
|
||||
Parameter Core::GetMetric(const std::string& deviceName, const std::string& name) const {
|
||||
@ -1173,7 +1194,7 @@ void Core::UnregisterPlugin(const std::string& deviceName_) {
|
||||
namespace ov {
|
||||
namespace runtime {
|
||||
|
||||
class Core::Impl: public core_detail::CoreImpl {};
|
||||
class Core::Impl : public core_detail::CoreImpl {};
|
||||
|
||||
Core::Core(const std::string& xmlConfigFile) {
|
||||
_impl = std::make_shared<Impl>();
|
||||
@ -1187,31 +1208,40 @@ std::map<std::string, InferenceEngine::Version> Core::get_versions(const std::st
|
||||
|
||||
#ifdef ENABLE_UNICODE_PATH_SUPPORT
|
||||
std::shared_ptr<ngraph::Function> Core::read_model(const std::wstring& modelPath, const std::wstring& binPath) const {
|
||||
return _impl->ReadNetwork(FileUtils::wStringtoMBCSstringChar(modelPath),
|
||||
FileUtils::wStringtoMBCSstringChar(binPath)).getFunction();
|
||||
return _impl
|
||||
->ReadNetwork(FileUtils::wStringtoMBCSstringChar(modelPath), FileUtils::wStringtoMBCSstringChar(binPath))
|
||||
.getFunction();
|
||||
}
|
||||
#endif
|
||||
std::shared_ptr<ngraph::Function> Core::read_model(const std::string& modelPath, const std::string& binPath) const {
|
||||
return _impl->ReadNetwork(modelPath, binPath).getFunction();
|
||||
}
|
||||
std::shared_ptr<ngraph::Function> Core::read_model(const std::string& model, const InferenceEngine::Blob::CPtr& weights) const {
|
||||
std::shared_ptr<ngraph::Function> Core::read_model(const std::string& model,
|
||||
const InferenceEngine::Blob::CPtr& weights) const {
|
||||
return _impl->ReadNetwork(model, weights).getFunction();
|
||||
}
|
||||
InferenceEngine::ExecutableNetwork Core::compile_model(const std::shared_ptr<const ngraph::Function>& network,
|
||||
const std::string& deviceName, const std::map<std::string, std::string>& config) {
|
||||
auto exec = _impl->LoadNetwork(InferenceEngine::CNNNetwork(std::const_pointer_cast<ngraph::Function>(network)), deviceName, config);
|
||||
return { exec, exec };
|
||||
const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config) {
|
||||
auto exec = _impl->LoadNetwork(InferenceEngine::CNNNetwork(std::const_pointer_cast<ngraph::Function>(network)),
|
||||
deviceName,
|
||||
config);
|
||||
return {exec, exec};
|
||||
}
|
||||
InferenceEngine::ExecutableNetwork Core::compile_model(const std::string& modelPath,
|
||||
const std::string& deviceName, const std::map<std::string, std::string>& config) {
|
||||
const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config) {
|
||||
auto exec = _impl->LoadNetwork(modelPath, deviceName, config);
|
||||
return { exec, exec };
|
||||
return {exec, exec};
|
||||
}
|
||||
|
||||
InferenceEngine::ExecutableNetwork Core::compile_model(const std::shared_ptr<const ngraph::Function>& network,
|
||||
const InferenceEngine::RemoteContext::Ptr& context, const std::map<std::string, std::string>& config) {
|
||||
auto exec = _impl->LoadNetwork(InferenceEngine::CNNNetwork(std::const_pointer_cast<ngraph::Function>(network)), context, config);
|
||||
return { exec, exec };
|
||||
const InferenceEngine::RemoteContext::Ptr& context,
|
||||
const std::map<std::string, std::string>& config) {
|
||||
auto exec = _impl->LoadNetwork(InferenceEngine::CNNNetwork(std::const_pointer_cast<ngraph::Function>(network)),
|
||||
context,
|
||||
config);
|
||||
return {exec, exec};
|
||||
}
|
||||
|
||||
void Core::add_extension(const InferenceEngine::IExtensionPtr& extension) {
|
||||
@ -1219,13 +1249,15 @@ void Core::add_extension(const InferenceEngine::IExtensionPtr& extension) {
|
||||
}
|
||||
|
||||
InferenceEngine::ExecutableNetwork Core::import_model(std::istream& networkModel,
|
||||
const std::string& deviceName, const std::map<std::string, std::string>& config) {
|
||||
const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config) {
|
||||
OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "Core::import_model");
|
||||
auto exec = _impl->ImportNetwork(networkModel, deviceName, config);
|
||||
return { exec, exec };
|
||||
return {exec, exec};
|
||||
}
|
||||
|
||||
InferenceEngine::ExecutableNetwork Core::import_model(std::istream& networkModel, const InferenceEngine::RemoteContext::Ptr& context,
|
||||
InferenceEngine::ExecutableNetwork Core::import_model(std::istream& networkModel,
|
||||
const InferenceEngine::RemoteContext::Ptr& context,
|
||||
const std::map<std::string, std::string>& config) {
|
||||
OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "Core::import_model");
|
||||
|
||||
@ -1240,42 +1272,45 @@ InferenceEngine::ExecutableNetwork Core::import_model(std::istream& networkModel
|
||||
std::getline(networkModel, deviceName);
|
||||
} else {
|
||||
IE_THROW() << "Passed compiled stream does not contain device name. "
|
||||
"Please, provide device name manually";
|
||||
"Please, provide device name manually";
|
||||
}
|
||||
networkModel.seekg(currentPos, networkModel.beg);
|
||||
|
||||
auto exec = _impl->GetCPPPluginByName(deviceName).ImportNetwork(networkModel, {});
|
||||
return { exec, exec };
|
||||
return {exec, exec};
|
||||
}
|
||||
|
||||
InferenceEngine::QueryNetworkResult Core::query_model(const std::shared_ptr<const ngraph::Function>& network,
|
||||
const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config) const {
|
||||
return _impl->QueryNetwork(InferenceEngine::CNNNetwork(std::const_pointer_cast<ngraph::Function>(network)), deviceName, config);
|
||||
return _impl->QueryNetwork(InferenceEngine::CNNNetwork(std::const_pointer_cast<ngraph::Function>(network)),
|
||||
deviceName,
|
||||
config);
|
||||
}
|
||||
void Core::set_config(const std::map<std::string, std::string>& config, const std::string& deviceName) {
|
||||
// HETERO case
|
||||
if (deviceName.find("HETERO:") == 0) {
|
||||
IE_THROW() << "SetConfig is supported only for HETERO itself (without devices). "
|
||||
"You can configure the devices with SetConfig before creating the HETERO on top.";
|
||||
"You can configure the devices with SetConfig before creating the HETERO on top.";
|
||||
}
|
||||
|
||||
// MULTI case
|
||||
if (deviceName.find("MULTI:") == 0) {
|
||||
IE_THROW() << "SetConfig is supported only for MULTI itself (without devices). "
|
||||
"You can configure the devices with SetConfig before creating the MULTI on top.";
|
||||
"You can configure the devices with SetConfig before creating the MULTI on top.";
|
||||
}
|
||||
|
||||
// AUTO case
|
||||
if (deviceName.find("AUTO:") == 0) {
|
||||
IE_THROW() << "SetConfig is supported only for AUTO itself (without devices). "
|
||||
"You can configure the devices with SetConfig before creating the AUTO on top.";
|
||||
"You can configure the devices with SetConfig before creating the AUTO on top.";
|
||||
}
|
||||
|
||||
// GPU.0, FPGA.1 cases
|
||||
if (deviceName.find(".") != std::string::npos) {
|
||||
IE_THROW() << "SetConfig is supported only for device family itself (without particular device .#). "
|
||||
"You can pass .# as a particular device instance to QueryNetwork, LoadNetwork, ImportNetwork only";
|
||||
IE_THROW()
|
||||
<< "SetConfig is supported only for device family itself (without particular device .#). "
|
||||
"You can pass .# as a particular device instance to QueryNetwork, LoadNetwork, ImportNetwork only";
|
||||
}
|
||||
|
||||
if (deviceName.empty()) {
|
||||
@ -1290,25 +1325,22 @@ InferenceEngine::Parameter Core::get_config(const std::string& deviceName, const
|
||||
// HETERO case
|
||||
{
|
||||
if (deviceName.find("HETERO:") == 0) {
|
||||
IE_THROW()
|
||||
<< "You can only GetConfig of the HETERO itself (without devices). "
|
||||
"GetConfig is also possible for the individual devices before creating the HETERO on top.";
|
||||
IE_THROW() << "You can only GetConfig of the HETERO itself (without devices). "
|
||||
"GetConfig is also possible for the individual devices before creating the HETERO on top.";
|
||||
}
|
||||
}
|
||||
// MULTI case
|
||||
{
|
||||
if (deviceName.find("MULTI:") == 0) {
|
||||
IE_THROW()
|
||||
<< "You can only GetConfig of the MULTI itself (without devices). "
|
||||
"GetConfig is also possible for the individual devices before creating the MULTI on top.";
|
||||
IE_THROW() << "You can only GetConfig of the MULTI itself (without devices). "
|
||||
"GetConfig is also possible for the individual devices before creating the MULTI on top.";
|
||||
}
|
||||
}
|
||||
// AUTO case
|
||||
{
|
||||
if (deviceName.find("AUTO:") == 0) {
|
||||
IE_THROW()
|
||||
<< "You can only GetConfig of the AUTO itself (without devices). "
|
||||
"GetConfig is also possible for the individual devices before creating the AUTO on top.";
|
||||
IE_THROW() << "You can only GetConfig of the AUTO itself (without devices). "
|
||||
"GetConfig is also possible for the individual devices before creating the AUTO on top.";
|
||||
}
|
||||
}
|
||||
|
||||
@ -1317,7 +1349,8 @@ InferenceEngine::Parameter Core::get_config(const std::string& deviceName, const
|
||||
// we need to return a copy of Parameter object which is created on Core side,
|
||||
// not in InferenceEngine plugin side, which can be unloaded from Core in a parallel thread
|
||||
// TODO: remove this WA after *-31417 is resolved
|
||||
return core_detail::copyParameterValue(_impl->GetCPPPluginByName(parsed._deviceName).GetConfig(name, parsed._config));
|
||||
return core_detail::copyParameterValue(
|
||||
_impl->GetCPPPluginByName(parsed._deviceName).GetConfig(name, parsed._config));
|
||||
}
|
||||
|
||||
InferenceEngine::Parameter Core::get_metric(const std::string& deviceName, const std::string& name) const {
|
||||
@ -1343,7 +1376,8 @@ void Core::register_plugins(const std::string& xmlConfigFile) {
|
||||
_impl->RegisterPluginsInRegistry(xmlConfigFile);
|
||||
}
|
||||
|
||||
InferenceEngine::RemoteContext::Ptr Core::create_context(const std::string& deviceName, const InferenceEngine::ParamMap& params) {
|
||||
InferenceEngine::RemoteContext::Ptr Core::create_context(const std::string& deviceName,
|
||||
const InferenceEngine::ParamMap& params) {
|
||||
if (deviceName.find("HETERO") == 0) {
|
||||
IE_THROW() << "HETERO device does not support remote context";
|
||||
}
|
||||
@ -1374,5 +1408,5 @@ InferenceEngine::RemoteContext::Ptr Core::get_default_context(const std::string&
|
||||
return _impl->GetCPPPluginByName(parsed._deviceName).GetDefaultContext(parsed._config);
|
||||
}
|
||||
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
} // namespace runtime
|
||||
} // namespace ov
|
||||
|
@ -71,11 +71,13 @@ public:
|
||||
};
|
||||
|
||||
Data::Data(const std::string& name, Precision _precision, Layout layout)
|
||||
: name(name), userObject({0}), tensorDesc(_precision, layout) {
|
||||
: name(name),
|
||||
userObject({0}),
|
||||
tensorDesc(_precision, layout) {
|
||||
_impl = std::make_shared<Impl>();
|
||||
}
|
||||
|
||||
Data::Data(const std::string& name, const TensorDesc& desc): name(name), userObject({0}), tensorDesc(desc) {
|
||||
Data::Data(const std::string& name, const TensorDesc& desc) : name(name), userObject({0}), tensorDesc(desc) {
|
||||
_impl = std::make_shared<Impl>();
|
||||
}
|
||||
|
||||
@ -103,14 +105,13 @@ void Data::reshape(const SizeVector& a_dims, Layout a_layout) {
|
||||
tensorDesc.reshape(a_dims, a_layout);
|
||||
}
|
||||
|
||||
Data::Data(const Data& data) :
|
||||
name(data.name), userObject(data.userObject), tensorDesc(data.tensorDesc) {
|
||||
Data::Data(const Data& data) : name(data.name), userObject(data.userObject), tensorDesc(data.tensorDesc) {
|
||||
_impl = std::make_shared<Impl>();
|
||||
_impl->creatorLayer = data._impl->creatorLayer;
|
||||
_impl->inputTo = data._impl->inputTo;
|
||||
}
|
||||
|
||||
Data & Data::operator = (const Data& data) {
|
||||
Data& Data::operator=(const Data& data) {
|
||||
if (this != &data) {
|
||||
name = data.name;
|
||||
userObject = data.userObject;
|
||||
@ -151,15 +152,15 @@ const SizeVector& Data::getDims() const {
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
INFERENCE_ENGINE_API_CPP(CNNLayerWeakPtr&) getCreatorLayer(const DataPtr & data) {
|
||||
INFERENCE_ENGINE_API_CPP(CNNLayerWeakPtr&) getCreatorLayer(const DataPtr& data) {
|
||||
return data->_impl->creatorLayer;
|
||||
}
|
||||
|
||||
INFERENCE_ENGINE_API_CPP(std::map<std::string, CNNLayerPtr>&) getInputTo(const DataPtr & data) {
|
||||
INFERENCE_ENGINE_API_CPP(std::map<std::string, CNNLayerPtr>&) getInputTo(const DataPtr& data) {
|
||||
return data->_impl->inputTo;
|
||||
}
|
||||
|
||||
INFERENCE_ENGINE_API_CPP(std::map<std::string, CNNLayerPtr>&) getInputTo(Data * data) {
|
||||
INFERENCE_ENGINE_API_CPP(std::map<std::string, CNNLayerPtr>&) getInputTo(Data* data) {
|
||||
return data->_impl->inputTo;
|
||||
}
|
||||
|
||||
|
@ -14,7 +14,7 @@
|
||||
namespace InferenceEngine {
|
||||
namespace itt {
|
||||
namespace domains {
|
||||
OV_ITT_DOMAIN(IE_LT);
|
||||
OV_ITT_DOMAIN(IE_LT);
|
||||
} // namespace domains
|
||||
} // namespace itt
|
||||
} // namespace InferenceEngine
|
||||
@ -22,8 +22,8 @@ namespace domains {
|
||||
namespace ov {
|
||||
namespace itt {
|
||||
namespace domains {
|
||||
OV_ITT_DOMAIN(IE);
|
||||
OV_ITT_DOMAIN(IE_RT);
|
||||
OV_ITT_DOMAIN(IE);
|
||||
OV_ITT_DOMAIN(IE_RT);
|
||||
} // namespace domains
|
||||
} // namespace itt
|
||||
} // namespace ov
|
||||
|
@ -10,17 +10,20 @@
|
||||
using namespace InferenceEngine;
|
||||
|
||||
TensorDesc::TensorDesc(const Precision& precision, const SizeVector& dims, Layout layout)
|
||||
: precision(precision), blockingDesc(dims, layout) {
|
||||
: precision(precision),
|
||||
blockingDesc(dims, layout) {
|
||||
this->dims = dims;
|
||||
this->layout = layout;
|
||||
}
|
||||
|
||||
TensorDesc::TensorDesc(const Precision& precision, Layout layout): precision(precision), blockingDesc() {
|
||||
TensorDesc::TensorDesc(const Precision& precision, Layout layout) : precision(precision), blockingDesc() {
|
||||
this->layout = layout;
|
||||
}
|
||||
|
||||
TensorDesc::TensorDesc(const Precision& precision, const SizeVector& dims, const BlockingDesc& blockDesc)
|
||||
: dims(dims), precision(precision), blockingDesc(blockDesc) {
|
||||
: dims(dims),
|
||||
precision(precision),
|
||||
blockingDesc(blockDesc) {
|
||||
if (dims.size() == 0 || blockingDesc.getBlockDims().size() == 0) {
|
||||
layout = Layout::SCALAR;
|
||||
return;
|
||||
@ -43,7 +46,8 @@ TensorDesc::TensorDesc(const Precision& precision, const SizeVector& dims, const
|
||||
case 3:
|
||||
if (blockingDesc.getOrder()[0] == 0 && blockingDesc.getOrder()[1] == 1 && blockingDesc.getOrder()[2] == 2) {
|
||||
layout = Layout::CHW;
|
||||
} else if (blockingDesc.getOrder()[0] == 1 && blockingDesc.getOrder()[1] == 2 && blockingDesc.getOrder()[2] == 0) {
|
||||
} else if (blockingDesc.getOrder()[0] == 1 && blockingDesc.getOrder()[1] == 2 &&
|
||||
blockingDesc.getOrder()[2] == 0) {
|
||||
layout = Layout::HWC;
|
||||
}
|
||||
break;
|
||||
@ -81,7 +85,8 @@ void TensorDesc::setDims(const SizeVector& dims) {
|
||||
if (layout == Layout::BLOCKED) {
|
||||
auto newDims = blockingDesc.getBlockDims();
|
||||
auto newOrder = blockingDesc.getOrder();
|
||||
if (newDims.empty()) newDims = dims;
|
||||
if (newDims.empty())
|
||||
newDims = dims;
|
||||
if (newOrder.empty()) {
|
||||
for (size_t i = 0; i < newDims.size(); i++) {
|
||||
newOrder.push_back(i);
|
||||
@ -93,7 +98,8 @@ void TensorDesc::setDims(const SizeVector& dims) {
|
||||
IE_THROW() << "Cannot set dimensions for SCALAR layout!";
|
||||
blockingDesc = BlockingDesc(dims, layout);
|
||||
}
|
||||
if (layout != Layout::SCALAR) this->dims = dims;
|
||||
if (layout != Layout::SCALAR)
|
||||
this->dims = dims;
|
||||
}
|
||||
|
||||
void TensorDesc::setLayout(Layout l) {
|
||||
@ -138,13 +144,12 @@ void TensorDesc::setLayout(Layout l) {
|
||||
}
|
||||
|
||||
if (inconsistentLayout) {
|
||||
IE_THROW() << "Size of dims(" << std::to_string(dims.size()) << ") and format(" << l
|
||||
<< ") are inconsistent.";
|
||||
IE_THROW() << "Size of dims(" << std::to_string(dims.size()) << ") and format(" << l << ") are inconsistent.";
|
||||
}
|
||||
|
||||
// HACK: we need to update BlockingDesc after layout change, but if it was set manually not sure how to di this properly
|
||||
const bool hasDefaultBlockingDesc =
|
||||
blockingDesc == BlockingDesc(dims, layout);
|
||||
// HACK: we need to update BlockingDesc after layout change, but if it was set manually not sure how to di this
|
||||
// properly
|
||||
const bool hasDefaultBlockingDesc = blockingDesc == BlockingDesc(dims, layout);
|
||||
|
||||
layout = l;
|
||||
|
||||
@ -185,9 +190,11 @@ Layout TensorDesc::getLayoutByDims(const SizeVector& dims) {
|
||||
}
|
||||
|
||||
size_t TensorDesc::offset(const SizeVector& v) const {
|
||||
if (layout == Layout::ANY) IE_THROW() << "Cannot calculate offset for any format!";
|
||||
if (layout == Layout::ANY)
|
||||
IE_THROW() << "Cannot calculate offset for any format!";
|
||||
|
||||
if (layout == Layout::SCALAR) return blockingDesc.getOffsetPadding();
|
||||
if (layout == Layout::SCALAR)
|
||||
return blockingDesc.getOffsetPadding();
|
||||
|
||||
SizeVector off_v = v;
|
||||
const SizeVector& blockedDims = blockingDesc.getBlockDims();
|
||||
@ -225,7 +232,8 @@ size_t TensorDesc::offset(size_t l) const {
|
||||
|
||||
void TensorDesc::reshape(const SizeVector& dims, Layout layout) {
|
||||
for (auto& padd : blockingDesc.getOffsetPaddingToData()) {
|
||||
if (padd) IE_THROW() << "Cannot reshape a non-packaged blob!";
|
||||
if (padd)
|
||||
IE_THROW() << "Cannot reshape a non-packaged blob!";
|
||||
}
|
||||
if (layout != Layout::ANY) {
|
||||
blockingDesc = BlockingDesc(dims, layout);
|
||||
@ -242,20 +250,23 @@ void TensorDesc::reshape(const SizeVector& dims, const BlockingDesc& blockDesc)
|
||||
this->layout = Layout::BLOCKED;
|
||||
}
|
||||
|
||||
BlockingDesc::BlockingDesc(const SizeVector& block_dims, const SizeVector& order): offsetPadding(0) {
|
||||
BlockingDesc::BlockingDesc(const SizeVector& block_dims, const SizeVector& order) : offsetPadding(0) {
|
||||
this->order = order;
|
||||
if (block_dims.empty() || order.empty()) return;
|
||||
if (block_dims.empty() || order.empty())
|
||||
return;
|
||||
fillDesc(block_dims, order);
|
||||
}
|
||||
|
||||
BlockingDesc::BlockingDesc(): BlockingDesc({}, Layout::ANY) {}
|
||||
BlockingDesc::BlockingDesc() : BlockingDesc({}, Layout::ANY) {}
|
||||
|
||||
BlockingDesc::BlockingDesc(const SizeVector& blocked_dims, const SizeVector& order, size_t offset)
|
||||
: BlockingDesc(blocked_dims, order) {
|
||||
this->offsetPadding = offset;
|
||||
}
|
||||
|
||||
BlockingDesc::BlockingDesc(const SizeVector& blocked_dims, const SizeVector& order, size_t offset,
|
||||
BlockingDesc::BlockingDesc(const SizeVector& blocked_dims,
|
||||
const SizeVector& order,
|
||||
size_t offset,
|
||||
const SizeVector& dimOffsets)
|
||||
: BlockingDesc(blocked_dims, order) {
|
||||
this->offsetPadding = offset;
|
||||
@ -264,23 +275,29 @@ BlockingDesc::BlockingDesc(const SizeVector& blocked_dims, const SizeVector& ord
|
||||
this->offsetPaddingToData = dimOffsets;
|
||||
}
|
||||
|
||||
BlockingDesc::BlockingDesc(const SizeVector& blocked_dims, const SizeVector& order, size_t offset,
|
||||
const SizeVector& dimOffsets, const SizeVector& strides)
|
||||
BlockingDesc::BlockingDesc(const SizeVector& blocked_dims,
|
||||
const SizeVector& order,
|
||||
size_t offset,
|
||||
const SizeVector& dimOffsets,
|
||||
const SizeVector& strides)
|
||||
: BlockingDesc(blocked_dims, order) {
|
||||
this->offsetPadding = offset;
|
||||
if (blocked_dims.size() != strides.size()) IE_THROW() << "Strides are not initialized for all dimensions.";
|
||||
if (blocked_dims.size() != strides.size())
|
||||
IE_THROW() << "Strides are not initialized for all dimensions.";
|
||||
this->strides = strides;
|
||||
if (blocked_dims.size() != dimOffsets.size())
|
||||
IE_THROW() << "Offsets are not initialized for all dimensions.";
|
||||
this->offsetPaddingToData = dimOffsets;
|
||||
}
|
||||
|
||||
BlockingDesc::BlockingDesc(const SizeVector& dims, Layout layout): offsetPadding(0) {
|
||||
if (dims.empty()) return;
|
||||
BlockingDesc::BlockingDesc(const SizeVector& dims, Layout layout) : offsetPadding(0) {
|
||||
if (dims.empty())
|
||||
return;
|
||||
|
||||
offsetPadding = 0;
|
||||
auto checkDims = [](size_t r_size, size_t e_size) {
|
||||
if (r_size != e_size) IE_THROW() << "Dims and format are inconsistent.";
|
||||
if (r_size != e_size)
|
||||
IE_THROW() << "Dims and format are inconsistent.";
|
||||
};
|
||||
SizeVector l_order;
|
||||
SizeVector l_dims;
|
||||
@ -344,7 +361,8 @@ BlockingDesc::BlockingDesc(const SizeVector& dims, Layout layout): offsetPadding
|
||||
break;
|
||||
case Layout::BLOCKED:
|
||||
l_order.clear();
|
||||
for (size_t i = 0; i < dims.size(); i++) l_order.push_back(i);
|
||||
for (size_t i = 0; i < dims.size(); i++)
|
||||
l_order.push_back(i);
|
||||
l_dims = dims;
|
||||
break;
|
||||
}
|
||||
@ -389,22 +407,16 @@ struct DimSlice {
|
||||
|
||||
DimSlice() = default;
|
||||
|
||||
DimSlice(size_t startInd, size_t size) :
|
||||
startInd(startInd), size(size) {
|
||||
}
|
||||
DimSlice(size_t startInd, size_t size) : startInd(startInd), size(size) {}
|
||||
};
|
||||
|
||||
using TensorSlice = std::vector<DimSlice>;
|
||||
|
||||
void checkROI(
|
||||
const TensorDesc& origDesc,
|
||||
const TensorSlice& roi) {
|
||||
void checkROI(const TensorDesc& origDesc, const TensorSlice& roi) {
|
||||
const auto numDims = origDesc.getDims().size();
|
||||
|
||||
if (roi.size() != numDims) {
|
||||
IE_THROW()
|
||||
<< "ROI num dims " << roi.size() <<
|
||||
" differs from original num dims " << numDims;
|
||||
IE_THROW() << "ROI num dims " << roi.size() << " differs from original num dims " << numDims;
|
||||
}
|
||||
|
||||
// TensorDesc stores dimensions in standard layout, as well as roi vector
|
||||
@ -415,18 +427,13 @@ void checkROI(
|
||||
const auto endInd = roiSlice.startInd + roiSlice.size;
|
||||
|
||||
if (endInd > fullSize) {
|
||||
IE_THROW()
|
||||
<< "ROI [" << roiSlice.startInd << ", " << endInd << ")"
|
||||
<< " is out of range " << fullSize
|
||||
<< " for dimension " << dimInd;
|
||||
IE_THROW() << "ROI [" << roiSlice.startInd << ", " << endInd << ")"
|
||||
<< " is out of range " << fullSize << " for dimension " << dimInd;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TensorDesc make_roi_desc(
|
||||
const TensorDesc& origDesc,
|
||||
const TensorSlice& roi,
|
||||
bool useOrigMemDesc) {
|
||||
TensorDesc make_roi_desc(const TensorDesc& origDesc, const TensorSlice& roi, bool useOrigMemDesc) {
|
||||
const auto numDims = origDesc.getDims().size();
|
||||
|
||||
checkROI(origDesc, roi);
|
||||
@ -447,7 +454,8 @@ TensorDesc make_roi_desc(
|
||||
IE_ASSERT(roiBlkDimOffsets.size() == numDims);
|
||||
|
||||
// BlockingDesc stores dimensions in memory order, so we need to use origOrder array.
|
||||
// Offsets in `roi` relates to `origDesc` dimensions, while offsets in `BlockingDesc` relates to top parent tensor dimensions.
|
||||
// Offsets in `roi` relates to `origDesc` dimensions, while offsets in `BlockingDesc` relates to top parent tensor
|
||||
// dimensions.
|
||||
for (size_t memInd = 0; memInd < numDims; ++memInd) {
|
||||
const auto dimInd = origBlkOrder[memInd];
|
||||
const auto& roiSlice = roi[dimInd];
|
||||
@ -458,39 +466,32 @@ TensorDesc make_roi_desc(
|
||||
roiBlkOffset += roiSlice.startInd * origBlkStrides[memInd];
|
||||
}
|
||||
|
||||
const auto roiBlkDesc =
|
||||
useOrigMemDesc ?
|
||||
BlockingDesc(roiBlkDims, origBlkOrder, roiBlkOffset, roiBlkDimOffsets, origBlkStrides) :
|
||||
BlockingDesc(roiBlkDims, origBlkOrder);
|
||||
const auto roiBlkDesc = useOrigMemDesc
|
||||
? BlockingDesc(roiBlkDims, origBlkOrder, roiBlkOffset, roiBlkDimOffsets, origBlkStrides)
|
||||
: BlockingDesc(roiBlkDims, origBlkOrder);
|
||||
|
||||
const auto roiDesc = TensorDesc(origPrecision, roiDims, roiBlkDesc);
|
||||
|
||||
return roiDesc;
|
||||
}
|
||||
|
||||
TensorSlice make_roi_slice(
|
||||
const TensorDesc& origDesc,
|
||||
const ROI& roi) {
|
||||
TensorSlice make_roi_slice(const TensorDesc& origDesc, const ROI& roi) {
|
||||
const auto layout = origDesc.getLayout();
|
||||
if (layout != Layout::NCHW && layout != Layout::NHWC) {
|
||||
IE_THROW()
|
||||
<< "Unsupported layout " << layout;
|
||||
IE_THROW() << "Unsupported layout " << layout;
|
||||
}
|
||||
|
||||
TensorSlice roiSlice(4);
|
||||
roiSlice[0] = DimSlice {roi.id, 1}; // N
|
||||
roiSlice[1] = DimSlice {0, origDesc.getDims()[1]}; // C
|
||||
roiSlice[2] = DimSlice {roi.posY, roi.sizeY}; // H
|
||||
roiSlice[3] = DimSlice {roi.posX, roi.sizeX}; // W
|
||||
roiSlice[0] = DimSlice{roi.id, 1}; // N
|
||||
roiSlice[1] = DimSlice{0, origDesc.getDims()[1]}; // C
|
||||
roiSlice[2] = DimSlice{roi.posY, roi.sizeY}; // H
|
||||
roiSlice[3] = DimSlice{roi.posX, roi.sizeX}; // W
|
||||
|
||||
return roiSlice;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TensorDesc InferenceEngine::make_roi_desc(
|
||||
const TensorDesc& origDesc,
|
||||
const ROI& roi,
|
||||
bool useOrigMemDesc) {
|
||||
TensorDesc InferenceEngine::make_roi_desc(const TensorDesc& origDesc, const ROI& roi, bool useOrigMemDesc) {
|
||||
return make_roi_desc(origDesc, make_roi_slice(origDesc, roi), useOrigMemDesc);
|
||||
}
|
||||
|
@ -16,6 +16,7 @@ int ie_memcpy(void* dest, size_t destsz, void const* src, size_t count) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (i = 0; i < count; ++i) (reinterpret_cast<uint8_t*>(dest))[i] = (reinterpret_cast<const uint8_t*>(src))[i];
|
||||
for (i = 0; i < count; ++i)
|
||||
(reinterpret_cast<uint8_t*>(dest))[i] = (reinterpret_cast<const uint8_t*>(src))[i];
|
||||
return 0;
|
||||
}
|
||||
|
@ -3,18 +3,18 @@
|
||||
//
|
||||
|
||||
#include "ie_network_reader.hpp"
|
||||
#include "ie_itt.hpp"
|
||||
|
||||
#include <details/ie_so_pointer.hpp>
|
||||
#include <file_utils.h>
|
||||
#include <ie_reader.hpp>
|
||||
#include <ie_ir_version.hpp>
|
||||
#include <frontend_manager/frontend_manager.hpp>
|
||||
|
||||
#include <fstream>
|
||||
#include <istream>
|
||||
#include <mutex>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
|
||||
#include "details/ie_so_pointer.hpp"
|
||||
#include "file_utils.h"
|
||||
#include "frontend_manager/frontend_manager.hpp"
|
||||
#include "ie_ir_version.hpp"
|
||||
#include "ie_itt.hpp"
|
||||
#include "ie_reader.hpp"
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
@ -37,16 +37,17 @@ public:
|
||||
/**
|
||||
* @brief This class is a wrapper for reader interfaces
|
||||
*/
|
||||
class Reader: public IReader {
|
||||
class Reader : public IReader {
|
||||
InferenceEngine::details::SOPointer<IReader> ptr;
|
||||
std::once_flag readFlag;
|
||||
std::string name;
|
||||
std::string location;
|
||||
|
||||
InferenceEngine::details::SOPointer<IReader> getReaderPtr() {
|
||||
std::call_once(readFlag, [&] () {
|
||||
std::call_once(readFlag, [&]() {
|
||||
FileUtils::FilePath libraryName = FileUtils::toFilePath(location);
|
||||
FileUtils::FilePath readersLibraryPath = FileUtils::makePluginLibraryName(getInferenceEngineLibraryPath(), libraryName);
|
||||
FileUtils::FilePath readersLibraryPath =
|
||||
FileUtils::makePluginLibraryName(getInferenceEngineLibraryPath(), libraryName);
|
||||
|
||||
if (!FileUtils::fileExist(readersLibraryPath)) {
|
||||
IE_THROW() << "Please, make sure that Inference Engine ONNX reader library "
|
||||
@ -65,7 +66,7 @@ class Reader: public IReader {
|
||||
|
||||
public:
|
||||
using Ptr = std::shared_ptr<Reader>;
|
||||
Reader(const std::string& name, const std::string location): name(name), location(location) {}
|
||||
Reader(const std::string& name, const std::string location) : name(name), location(location) {}
|
||||
bool supportModel(std::istream& model) const override {
|
||||
OV_ITT_SCOPED_TASK(ov::itt::domains::IE, "Reader::supportModel");
|
||||
auto reader = getReaderPtr();
|
||||
@ -75,7 +76,9 @@ public:
|
||||
auto reader = getReaderPtr();
|
||||
return reader->read(model, exts);
|
||||
}
|
||||
CNNNetwork read(std::istream& model, const Blob::CPtr& weights, const std::vector<IExtensionPtr>& exts) const override {
|
||||
CNNNetwork read(std::istream& model,
|
||||
const Blob::CPtr& weights,
|
||||
const std::vector<IExtensionPtr>& exts) const override {
|
||||
auto reader = getReaderPtr();
|
||||
return reader->read(model, weights, exts);
|
||||
}
|
||||
@ -98,12 +101,14 @@ void registerReaders() {
|
||||
static bool initialized = false;
|
||||
static std::mutex readerMutex;
|
||||
std::lock_guard<std::mutex> lock(readerMutex);
|
||||
if (initialized) return;
|
||||
if (initialized)
|
||||
return;
|
||||
|
||||
// TODO: Read readers info from XML
|
||||
auto create_if_exists = [] (const std::string name, const std::string library_name) {
|
||||
auto create_if_exists = [](const std::string name, const std::string library_name) {
|
||||
FileUtils::FilePath libraryName = FileUtils::toFilePath(library_name);
|
||||
FileUtils::FilePath readersLibraryPath = FileUtils::makePluginLibraryName(getInferenceEngineLibraryPath(), libraryName);
|
||||
FileUtils::FilePath readersLibraryPath =
|
||||
FileUtils::makePluginLibraryName(getInferenceEngineLibraryPath(), libraryName);
|
||||
|
||||
if (!FileUtils::fileExist(readersLibraryPath))
|
||||
return std::shared_ptr<Reader>();
|
||||
@ -111,47 +116,53 @@ void registerReaders() {
|
||||
};
|
||||
|
||||
// try to load ONNX reader if library exists
|
||||
auto onnxReader = create_if_exists("ONNX", std::string("inference_engine_onnx_reader") + std::string(IE_BUILD_POSTFIX));
|
||||
auto onnxReader =
|
||||
create_if_exists("ONNX", std::string("inference_engine_onnx_reader") + std::string(IE_BUILD_POSTFIX));
|
||||
if (onnxReader) {
|
||||
readers.emplace("onnx", onnxReader);
|
||||
readers.emplace("prototxt", onnxReader);
|
||||
}
|
||||
|
||||
// try to load IR reader v10 if library exists
|
||||
auto irReaderv10 = create_if_exists("IRv10", std::string("inference_engine_ir_reader") + std::string(IE_BUILD_POSTFIX));
|
||||
auto irReaderv10 =
|
||||
create_if_exists("IRv10", std::string("inference_engine_ir_reader") + std::string(IE_BUILD_POSTFIX));
|
||||
if (irReaderv10)
|
||||
readers.emplace("xml", irReaderv10);
|
||||
|
||||
// try to load IR reader v7 if library exists
|
||||
auto irReaderv7 = create_if_exists("IRv7", std::string("inference_engine_ir_v7_reader") + std::string(IE_BUILD_POSTFIX));
|
||||
auto irReaderv7 =
|
||||
create_if_exists("IRv7", std::string("inference_engine_ir_v7_reader") + std::string(IE_BUILD_POSTFIX));
|
||||
if (irReaderv7)
|
||||
readers.emplace("xml", irReaderv7);
|
||||
|
||||
initialized = true;
|
||||
}
|
||||
|
||||
void assertIfIRv7LikeModel(std::istream & modelStream) {
|
||||
void assertIfIRv7LikeModel(std::istream& modelStream) {
|
||||
auto irVersion = details::GetIRVersion(modelStream);
|
||||
bool isIRv7 = irVersion > 1 && irVersion <= 7;
|
||||
|
||||
if (!isIRv7)
|
||||
return;
|
||||
|
||||
for (auto && kvp : readers) {
|
||||
for (auto&& kvp : readers) {
|
||||
Reader::Ptr reader = kvp.second;
|
||||
if (reader->getName() == "IRv7") {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
IE_THROW() << "The support of IR v" << irVersion << " has been removed from the product. "
|
||||
"Please, convert the original model using the Model Optimizer which comes with this "
|
||||
"version of the OpenVINO to generate supported IR version.";
|
||||
IE_THROW() << "The support of IR v" << irVersion
|
||||
<< " has been removed from the product. "
|
||||
"Please, convert the original model using the Model Optimizer which comes with this "
|
||||
"version of the OpenVINO to generate supported IR version.";
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
CNNNetwork details::ReadNetwork(const std::string& modelPath, const std::string& binPath, const std::vector<IExtensionPtr>& exts) {
|
||||
CNNNetwork details::ReadNetwork(const std::string& modelPath,
|
||||
const std::string& binPath,
|
||||
const std::vector<IExtensionPtr>& exts) {
|
||||
// Register readers if it is needed
|
||||
registerReaders();
|
||||
|
||||
@ -183,7 +194,8 @@ CNNNetwork details::ReadNetwork(const std::string& modelPath, const std::string&
|
||||
if (bPath.empty()) {
|
||||
auto pathWoExt = modelPath;
|
||||
auto pos = modelPath.rfind('.');
|
||||
if (pos != std::string::npos) pathWoExt = modelPath.substr(0, pos);
|
||||
if (pos != std::string::npos)
|
||||
pathWoExt = modelPath.substr(0, pos);
|
||||
for (const auto& ext : reader->getDataFileExtensions()) {
|
||||
bPath = pathWoExt + "." + ext;
|
||||
if (!FileUtils::fileExist(bPath)) {
|
||||
@ -209,7 +221,7 @@ CNNNetwork details::ReadNetwork(const std::string& modelPath, const std::string&
|
||||
size_t fileSize = binStream.tellg();
|
||||
binStream.seekg(0, std::ios::beg);
|
||||
|
||||
Blob::Ptr weights = make_shared_blob<uint8_t>({Precision::U8, { fileSize }, C });
|
||||
Blob::Ptr weights = make_shared_blob<uint8_t>({Precision::U8, {fileSize}, C});
|
||||
|
||||
{
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::IE_RT, "ReadNetworkWeights");
|
||||
@ -238,20 +250,24 @@ CNNNetwork details::ReadNetwork(const std::string& modelPath, const std::string&
|
||||
std::string weights_path = binPath;
|
||||
#endif
|
||||
FE = manager.load_by_model(model_path, weights_path);
|
||||
if (FE) inputModel = FE->load(model_path, weights_path);
|
||||
if (FE)
|
||||
inputModel = FE->load(model_path, weights_path);
|
||||
} else {
|
||||
FE = manager.load_by_model(model_path);
|
||||
if (FE) inputModel = FE->load(model_path);
|
||||
if (FE)
|
||||
inputModel = FE->load(model_path);
|
||||
}
|
||||
if (inputModel) {
|
||||
auto ngFunc = FE->convert(inputModel);
|
||||
return CNNNetwork(ngFunc);
|
||||
}
|
||||
IE_THROW() << "Unknown model format! Cannot find reader for model format: " << fileExt << " and read the model: " << modelPath <<
|
||||
". Please check that reader library exists in your PATH.";
|
||||
IE_THROW() << "Unknown model format! Cannot find reader for model format: " << fileExt
|
||||
<< " and read the model: " << modelPath << ". Please check that reader library exists in your PATH.";
|
||||
}
|
||||
|
||||
CNNNetwork details::ReadNetwork(const std::string& model, const Blob::CPtr& weights, const std::vector<IExtensionPtr>& exts) {
|
||||
CNNNetwork details::ReadNetwork(const std::string& model,
|
||||
const Blob::CPtr& weights,
|
||||
const std::vector<IExtensionPtr>& exts) {
|
||||
// Register readers if it is needed
|
||||
registerReaders();
|
||||
std::istringstream modelStream(model);
|
||||
@ -266,7 +282,8 @@ CNNNetwork details::ReadNetwork(const std::string& model, const Blob::CPtr& weig
|
||||
return reader->read(modelStream, exts);
|
||||
}
|
||||
}
|
||||
IE_THROW() << "Unknown model format! Cannot find reader for the model and read it. Please check that reader library exists in your PATH.";
|
||||
IE_THROW() << "Unknown model format! Cannot find reader for the model and read it. Please check that reader "
|
||||
"library exists in your PATH.";
|
||||
}
|
||||
|
||||
} // namespace InferenceEngine
|
||||
|
@ -4,11 +4,12 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cpp/ie_cnn_network.h>
|
||||
#include <ie_iextension.h>
|
||||
#include <ie_blob.h>
|
||||
#include <string>
|
||||
|
||||
#include "cpp/ie_cnn_network.h"
|
||||
#include "ie_blob.h"
|
||||
#include "ie_iextension.h"
|
||||
|
||||
namespace InferenceEngine {
|
||||
namespace details {
|
||||
|
||||
@ -20,7 +21,9 @@ namespace details {
|
||||
* @param exts vector with extensions
|
||||
* @return CNNNetwork
|
||||
*/
|
||||
CNNNetwork ReadNetwork(const std::string& modelPath, const std::string& binPath, const std::vector<IExtensionPtr>& exts);
|
||||
CNNNetwork ReadNetwork(const std::string& modelPath,
|
||||
const std::string& binPath,
|
||||
const std::vector<IExtensionPtr>& exts);
|
||||
/**
|
||||
* @brief Reads IR xml and bin (with the same name) files
|
||||
* @param model string with IR
|
||||
|
@ -2,7 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <ie_ngraph_utils.hpp>
|
||||
#include "ie_ngraph_utils.hpp"
|
||||
|
||||
#include "cnn_network_ngraph_impl.hpp"
|
||||
#include "ie_itt.hpp"
|
||||
|
||||
|
@ -2,16 +2,17 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "ie_system_conf.h"
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <vector>
|
||||
|
||||
#include "threading/ie_parallel_custom_arena.hpp"
|
||||
#include "ie_system_conf.h"
|
||||
|
||||
# define XBYAK_NO_OP_NAMES
|
||||
# define XBYAK_UNDEF_JNL
|
||||
# include <xbyak/xbyak_util.h>
|
||||
#define XBYAK_NO_OP_NAMES
|
||||
#define XBYAK_UNDEF_JNL
|
||||
#include <xbyak/xbyak_util.h>
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
@ -37,9 +38,7 @@ bool with_cpu_x86_avx512f() {
|
||||
}
|
||||
|
||||
bool with_cpu_x86_avx512_core() {
|
||||
return get_cpu_info().has(Xbyak::util::Cpu::tAVX512F |
|
||||
Xbyak::util::Cpu::tAVX512DQ |
|
||||
Xbyak::util::Cpu::tAVX512BW);
|
||||
return get_cpu_info().has(Xbyak::util::Cpu::tAVX512F | Xbyak::util::Cpu::tAVX512DQ | Xbyak::util::Cpu::tAVX512BW);
|
||||
}
|
||||
|
||||
bool with_cpu_x86_bfloat16() {
|
||||
@ -47,38 +46,36 @@ bool with_cpu_x86_bfloat16() {
|
||||
}
|
||||
|
||||
bool checkOpenMpEnvVars(bool includeOMPNumThreads) {
|
||||
for (auto&& var : {
|
||||
"GOMP_CPU_AFFINITY",
|
||||
"GOMP_DEBUG"
|
||||
"GOMP_RTEMS_THREAD_POOLS",
|
||||
"GOMP_SPINCOUNT"
|
||||
"GOMP_STACKSIZE"
|
||||
"KMP_AFFINITY"
|
||||
"KMP_NUM_THREADS"
|
||||
"MIC_KMP_AFFINITY",
|
||||
"MIC_OMP_NUM_THREADS"
|
||||
"MIC_OMP_PROC_BIND"
|
||||
"MKL_DOMAIN_NUM_THREADS"
|
||||
"MKL_DYNAMIC"
|
||||
"MKL_NUM_THREADS",
|
||||
"OMP_CANCELLATION"
|
||||
"OMP_DEFAULT_DEVICE"
|
||||
"OMP_DISPLAY_ENV"
|
||||
"OMP_DYNAMIC",
|
||||
"OMP_MAX_ACTIVE_LEVELS"
|
||||
"OMP_MAX_TASK_PRIORITY"
|
||||
"OMP_NESTED",
|
||||
"OMP_NUM_THREADS"
|
||||
"OMP_PLACES"
|
||||
"OMP_PROC_BIND"
|
||||
"OMP_SCHEDULE"
|
||||
"OMP_STACKSIZE",
|
||||
"OMP_THREAD_LIMIT"
|
||||
"OMP_WAIT_POLICY"
|
||||
"PHI_KMP_AFFINITY",
|
||||
"PHI_KMP_PLACE_THREADS"
|
||||
"PHI_OMP_NUM_THREADS"
|
||||
}) {
|
||||
for (auto&& var : {"GOMP_CPU_AFFINITY",
|
||||
"GOMP_DEBUG"
|
||||
"GOMP_RTEMS_THREAD_POOLS",
|
||||
"GOMP_SPINCOUNT"
|
||||
"GOMP_STACKSIZE"
|
||||
"KMP_AFFINITY"
|
||||
"KMP_NUM_THREADS"
|
||||
"MIC_KMP_AFFINITY",
|
||||
"MIC_OMP_NUM_THREADS"
|
||||
"MIC_OMP_PROC_BIND"
|
||||
"MKL_DOMAIN_NUM_THREADS"
|
||||
"MKL_DYNAMIC"
|
||||
"MKL_NUM_THREADS",
|
||||
"OMP_CANCELLATION"
|
||||
"OMP_DEFAULT_DEVICE"
|
||||
"OMP_DISPLAY_ENV"
|
||||
"OMP_DYNAMIC",
|
||||
"OMP_MAX_ACTIVE_LEVELS"
|
||||
"OMP_MAX_TASK_PRIORITY"
|
||||
"OMP_NESTED",
|
||||
"OMP_NUM_THREADS"
|
||||
"OMP_PLACES"
|
||||
"OMP_PROC_BIND"
|
||||
"OMP_SCHEDULE"
|
||||
"OMP_STACKSIZE",
|
||||
"OMP_THREAD_LIMIT"
|
||||
"OMP_WAIT_POLICY"
|
||||
"PHI_KMP_AFFINITY",
|
||||
"PHI_KMP_PLACE_THREADS"
|
||||
"PHI_OMP_NUM_THREADS"}) {
|
||||
if (getenv(var)) {
|
||||
if (0 != strcmp(var, "OMP_NUM_THREADS") || includeOMPNumThreads)
|
||||
return true;
|
||||
@ -90,10 +87,14 @@ bool checkOpenMpEnvVars(bool includeOMPNumThreads) {
|
||||
#if defined(__APPLE__)
|
||||
// for Linux and Windows the getNumberOfCPUCores (that accounts only for physical cores) implementation is OS-specific
|
||||
// (see cpp files in corresponding folders), for __APPLE__ it is default :
|
||||
int getNumberOfCPUCores(bool) { return parallel_get_max_threads();}
|
||||
#if !((IE_THREAD == IE_THREAD_TBB) || (IE_THREAD == IE_THREAD_TBB_AUTO))
|
||||
std::vector<int> getAvailableNUMANodes() { return {-1}; }
|
||||
#endif
|
||||
int getNumberOfCPUCores(bool) {
|
||||
return parallel_get_max_threads();
|
||||
}
|
||||
# if !((IE_THREAD == IE_THREAD_TBB) || (IE_THREAD == IE_THREAD_TBB_AUTO))
|
||||
std::vector<int> getAvailableNUMANodes() {
|
||||
return {-1};
|
||||
}
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if ((IE_THREAD == IE_THREAD_TBB) || (IE_THREAD == IE_THREAD_TBB_AUTO))
|
||||
|
@ -3,12 +3,13 @@
|
||||
//
|
||||
|
||||
#include "ie_transformations.hpp"
|
||||
#include <ngraph/pass/low_latency.hpp>
|
||||
#include <ngraph/pass/manager.hpp>
|
||||
|
||||
#include "ngraph/pass/low_latency.hpp"
|
||||
#include "ngraph/pass/manager.hpp"
|
||||
|
||||
using namespace InferenceEngine;
|
||||
|
||||
void InferenceEngine::LowLatency(InferenceEngine::CNNNetwork &network) {
|
||||
void InferenceEngine::LowLatency(InferenceEngine::CNNNetwork& network) {
|
||||
auto function = network.getFunction();
|
||||
ngraph::pass::Manager manager;
|
||||
NGRAPH_SUPPRESS_DEPRECATED_START
|
||||
@ -17,8 +18,7 @@ void InferenceEngine::LowLatency(InferenceEngine::CNNNetwork &network) {
|
||||
manager.run_passes(function);
|
||||
}
|
||||
|
||||
void InferenceEngine::lowLatency2(InferenceEngine::CNNNetwork &network,
|
||||
bool use_const_initializer) {
|
||||
void InferenceEngine::lowLatency2(InferenceEngine::CNNNetwork& network, bool use_const_initializer) {
|
||||
auto function = network.getFunction();
|
||||
ngraph::pass::Manager manager;
|
||||
manager.register_pass<ngraph::pass::LowLatency2>(use_const_initializer);
|
||||
|
@ -3,11 +3,11 @@
|
||||
//
|
||||
|
||||
#include <dlfcn.h>
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "details/ie_so_loader.h"
|
||||
#include "file_utils.h"
|
||||
#include <iostream>
|
||||
|
||||
namespace InferenceEngine {
|
||||
namespace details {
|
||||
@ -25,8 +25,7 @@ public:
|
||||
}
|
||||
|
||||
#ifdef ENABLE_UNICODE_PATH_SUPPORT
|
||||
explicit Impl(const wchar_t* pluginName) : Impl(FileUtils::wStringtoMBCSstringChar(pluginName).c_str()) {
|
||||
}
|
||||
explicit Impl(const wchar_t* pluginName) : Impl(FileUtils::wStringtoMBCSstringChar(pluginName).c_str()) {}
|
||||
#endif // ENABLE_UNICODE_PATH_SUPPORT
|
||||
|
||||
~Impl() {
|
||||
@ -46,8 +45,7 @@ public:
|
||||
|
||||
procAddr = dlsym(shared_object, symbolName);
|
||||
if (procAddr == nullptr)
|
||||
IE_THROW(NotFound)
|
||||
<< "dlSym cannot locate method '" << symbolName << "': " << dlerror();
|
||||
IE_THROW(NotFound) << "dlSym cannot locate method '" << symbolName << "': " << dlerror();
|
||||
return procAddr;
|
||||
}
|
||||
};
|
||||
@ -58,7 +56,7 @@ SharedObjectLoader::SharedObjectLoader(const wchar_t* pluginName) {
|
||||
}
|
||||
#endif
|
||||
|
||||
SharedObjectLoader::SharedObjectLoader(const char * pluginName) {
|
||||
SharedObjectLoader::SharedObjectLoader(const char* pluginName) {
|
||||
_impl.reset(new Impl(pluginName));
|
||||
}
|
||||
|
||||
|
@ -2,35 +2,36 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <sched.h>
|
||||
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <numeric>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <numeric>
|
||||
#include <sched.h>
|
||||
|
||||
#include "ie_common.h"
|
||||
#include "ie_system_conf.h"
|
||||
#include "threading/ie_parallel_custom_arena.hpp"
|
||||
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
struct CPU {
|
||||
int _processors = 0;
|
||||
int _sockets = 0;
|
||||
int _cores = 0;
|
||||
int _sockets = 0;
|
||||
int _cores = 0;
|
||||
|
||||
CPU() {
|
||||
std::ifstream cpuinfo("/proc/cpuinfo");
|
||||
std::vector<int> processors;
|
||||
std::map<int, int> sockets;
|
||||
std::vector<int> processors;
|
||||
std::map<int, int> sockets;
|
||||
int socketId = 0;
|
||||
while (!cpuinfo.eof()) {
|
||||
std::string line;
|
||||
std::getline(cpuinfo, line);
|
||||
if (line.empty()) continue;
|
||||
if (line.empty())
|
||||
continue;
|
||||
auto delimeter = line.find(':');
|
||||
auto key = line.substr(0, delimeter);
|
||||
auto value = line.substr(delimeter + 1);
|
||||
@ -83,14 +84,13 @@ int getNumberOfCPUCores(bool bigCoresOnly) {
|
||||
}
|
||||
}
|
||||
int phys_cores = CPU_COUNT(¤tCoreSet);
|
||||
#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
|
||||
#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
|
||||
auto core_types = custom::info::core_types();
|
||||
if (bigCoresOnly && core_types.size() > 1) /*Hybrid CPU*/ {
|
||||
phys_cores = custom::info::default_concurrency(custom::task_arena::constraints{}
|
||||
.set_core_type(core_types.back())
|
||||
.set_max_threads_per_core(1));
|
||||
phys_cores = custom::info::default_concurrency(
|
||||
custom::task_arena::constraints{}.set_core_type(core_types.back()).set_max_threads_per_core(1));
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
return phys_cores;
|
||||
}
|
||||
|
||||
|
@ -4,10 +4,12 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ngraph/ngraph.hpp>
|
||||
#include <ngraph/ops.hpp>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cctype>
|
||||
#include <cmath>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <functional>
|
||||
#include <initializer_list>
|
||||
#include <iterator>
|
||||
@ -25,8 +27,5 @@
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <cassert>
|
||||
#include <cctype>
|
||||
#include <cmath>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include "ngraph/ngraph.hpp"
|
||||
#include "ngraph/ops.hpp"
|
||||
|
@ -2,24 +2,25 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "threading/ie_cpu_streams_executor.hpp"
|
||||
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
#include <climits>
|
||||
#include <condition_variable>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <condition_variable>
|
||||
#include <thread>
|
||||
#include <openvino/itt.hpp>
|
||||
#include <queue>
|
||||
#include <atomic>
|
||||
#include <climits>
|
||||
#include <cassert>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "threading/ie_thread_local.hpp"
|
||||
#include "ie_parallel_custom_arena.hpp"
|
||||
#include "ie_system_conf.h"
|
||||
#include "threading/ie_thread_affinity.hpp"
|
||||
#include "threading/ie_cpu_streams_executor.hpp"
|
||||
#include <openvino/itt.hpp>
|
||||
#include "threading/ie_thread_local.hpp"
|
||||
|
||||
using namespace openvino;
|
||||
|
||||
@ -27,26 +28,28 @@ namespace InferenceEngine {
|
||||
struct CPUStreamsExecutor::Impl {
|
||||
struct Stream {
|
||||
#if IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO
|
||||
struct Observer: public custom::task_scheduler_observer {
|
||||
CpuSet _mask;
|
||||
int _ncpus = 0;
|
||||
int _threadBindingStep = 0;
|
||||
int _offset = 0;
|
||||
Observer(custom::task_arena& arena,
|
||||
CpuSet mask,
|
||||
int ncpus,
|
||||
const int streamId,
|
||||
const int threadsPerStream,
|
||||
const int threadBindingStep,
|
||||
const int threadBindingOffset) :
|
||||
custom::task_scheduler_observer(arena),
|
||||
_mask{std::move(mask)},
|
||||
_ncpus(ncpus),
|
||||
_threadBindingStep(threadBindingStep),
|
||||
_offset{streamId * threadsPerStream + threadBindingOffset} {
|
||||
}
|
||||
struct Observer : public custom::task_scheduler_observer {
|
||||
CpuSet _mask;
|
||||
int _ncpus = 0;
|
||||
int _threadBindingStep = 0;
|
||||
int _offset = 0;
|
||||
Observer(custom::task_arena& arena,
|
||||
CpuSet mask,
|
||||
int ncpus,
|
||||
const int streamId,
|
||||
const int threadsPerStream,
|
||||
const int threadBindingStep,
|
||||
const int threadBindingOffset)
|
||||
: custom::task_scheduler_observer(arena),
|
||||
_mask{std::move(mask)},
|
||||
_ncpus(ncpus),
|
||||
_threadBindingStep(threadBindingStep),
|
||||
_offset{streamId * threadsPerStream + threadBindingOffset} {}
|
||||
void on_scheduler_entry(bool) override {
|
||||
PinThreadToVacantCore(_offset + tbb::this_task_arena::current_thread_index(), _threadBindingStep, _ncpus, _mask);
|
||||
PinThreadToVacantCore(_offset + tbb::this_task_arena::current_thread_index(),
|
||||
_threadBindingStep,
|
||||
_ncpus,
|
||||
_mask);
|
||||
}
|
||||
void on_scheduler_exit(bool) override {
|
||||
PinCurrentThreadByMask(_ncpus, _mask);
|
||||
@ -54,8 +57,7 @@ struct CPUStreamsExecutor::Impl {
|
||||
~Observer() override = default;
|
||||
};
|
||||
#endif
|
||||
explicit Stream(Impl* impl) :
|
||||
_impl(impl) {
|
||||
explicit Stream(Impl* impl) : _impl(impl) {
|
||||
{
|
||||
std::lock_guard<std::mutex> lock{_impl->_streamIdMutex};
|
||||
if (_impl->_streamIdQueue.empty()) {
|
||||
@ -66,40 +68,52 @@ struct CPUStreamsExecutor::Impl {
|
||||
}
|
||||
}
|
||||
_numaNodeId = _impl->_config._streams
|
||||
? _impl->_usedNumaNodes.at(
|
||||
(_streamId % _impl->_config._streams)/
|
||||
((_impl->_config._streams + _impl->_usedNumaNodes.size() - 1)/_impl->_usedNumaNodes.size()))
|
||||
: _impl->_usedNumaNodes.at(_streamId % _impl->_usedNumaNodes.size());
|
||||
? _impl->_usedNumaNodes.at((_streamId % _impl->_config._streams) /
|
||||
((_impl->_config._streams + _impl->_usedNumaNodes.size() - 1) /
|
||||
_impl->_usedNumaNodes.size()))
|
||||
: _impl->_usedNumaNodes.at(_streamId % _impl->_usedNumaNodes.size());
|
||||
#if IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO
|
||||
const auto concurrency = (0 == _impl->_config._threadsPerStream) ? custom::task_arena::automatic : _impl->_config._threadsPerStream;
|
||||
const auto concurrency = (0 == _impl->_config._threadsPerStream) ? custom::task_arena::automatic
|
||||
: _impl->_config._threadsPerStream;
|
||||
if (ThreadBindingType::HYBRID_AWARE == _impl->_config._threadBindingType) {
|
||||
if (Config::PreferredCoreType::ROUND_ROBIN != _impl->_config._threadPreferredCoreType) {
|
||||
if (Config::PreferredCoreType::ANY == _impl->_config._threadPreferredCoreType) {
|
||||
_taskArena.reset(new custom::task_arena{concurrency});
|
||||
} else {
|
||||
const auto selected_core_type = Config::PreferredCoreType::BIG == _impl->_config._threadPreferredCoreType
|
||||
? custom::info::core_types().back() // running on Big cores only
|
||||
: custom::info::core_types().front(); // running on Little cores only
|
||||
_taskArena.reset(new custom::task_arena{
|
||||
custom::task_arena::constraints{}.set_core_type(selected_core_type).set_max_concurrency(concurrency)});
|
||||
}
|
||||
if (Config::PreferredCoreType::ANY == _impl->_config._threadPreferredCoreType) {
|
||||
_taskArena.reset(new custom::task_arena{concurrency});
|
||||
} else {
|
||||
const auto selected_core_type =
|
||||
Config::PreferredCoreType::BIG == _impl->_config._threadPreferredCoreType
|
||||
? custom::info::core_types().back() // running on Big cores only
|
||||
: custom::info::core_types().front(); // running on Little cores only
|
||||
_taskArena.reset(new custom::task_arena{custom::task_arena::constraints{}
|
||||
.set_core_type(selected_core_type)
|
||||
.set_max_concurrency(concurrency)});
|
||||
}
|
||||
} else {
|
||||
// assigning the stream to the core type in the round-robin fashion
|
||||
// wrapping around total_streams (i.e. how many streams all different core types can handle together)
|
||||
// wrapping around total_streams (i.e. how many streams all different core types can handle
|
||||
// together)
|
||||
const auto total_streams = _impl->total_streams_on_core_types.back().second;
|
||||
const auto streamId_wrapped = _streamId % total_streams;
|
||||
const auto& selected_core_type = std::find_if(_impl->total_streams_on_core_types.cbegin(), _impl->total_streams_on_core_types.cend(),
|
||||
[streamId_wrapped](const decltype(_impl->total_streams_on_core_types)::value_type & p) { return p.second > streamId_wrapped; })->first;
|
||||
_taskArena.reset(new custom::task_arena{
|
||||
custom::task_arena::constraints{}.set_core_type(selected_core_type).set_max_concurrency(concurrency)});
|
||||
const auto& selected_core_type =
|
||||
std::find_if(
|
||||
_impl->total_streams_on_core_types.cbegin(),
|
||||
_impl->total_streams_on_core_types.cend(),
|
||||
[streamId_wrapped](const decltype(_impl->total_streams_on_core_types)::value_type& p) {
|
||||
return p.second > streamId_wrapped;
|
||||
})
|
||||
->first;
|
||||
_taskArena.reset(new custom::task_arena{custom::task_arena::constraints{}
|
||||
.set_core_type(selected_core_type)
|
||||
.set_max_concurrency(concurrency)});
|
||||
}
|
||||
} else if (ThreadBindingType::NUMA == _impl->_config._threadBindingType) {
|
||||
_taskArena.reset(new custom::task_arena{custom::task_arena::constraints{_numaNodeId, concurrency}});
|
||||
} else if ((0 != _impl->_config._threadsPerStream) || (ThreadBindingType::CORES == _impl->_config._threadBindingType)) {
|
||||
} else if ((0 != _impl->_config._threadsPerStream) ||
|
||||
(ThreadBindingType::CORES == _impl->_config._threadBindingType)) {
|
||||
_taskArena.reset(new custom::task_arena{concurrency});
|
||||
if (ThreadBindingType::CORES == _impl->_config._threadBindingType) {
|
||||
CpuSet processMask;
|
||||
int ncpus = 0;
|
||||
int ncpus = 0;
|
||||
std::tie(processMask, ncpus) = GetProcessMask();
|
||||
if (nullptr != processMask) {
|
||||
_observer.reset(new Observer{*_taskArena,
|
||||
@ -117,11 +131,12 @@ struct CPUStreamsExecutor::Impl {
|
||||
omp_set_num_threads(_impl->_config._threadsPerStream);
|
||||
if (!checkOpenMpEnvVars(false) && (ThreadBindingType::NONE != _impl->_config._threadBindingType)) {
|
||||
CpuSet processMask;
|
||||
int ncpus = 0;
|
||||
int ncpus = 0;
|
||||
std::tie(processMask, ncpus) = GetProcessMask();
|
||||
if (nullptr != processMask) {
|
||||
parallel_nt(_impl->_config._threadsPerStream, [&] (int threadIndex, int threadsPerStream) {
|
||||
int thrIdx = _streamId * _impl->_config._threadsPerStream + threadIndex + _impl->_config._threadBindingOffset;
|
||||
parallel_nt(_impl->_config._threadsPerStream, [&](int threadIndex, int threadsPerStream) {
|
||||
int thrIdx = _streamId * _impl->_config._threadsPerStream + threadIndex +
|
||||
_impl->_config._threadBindingOffset;
|
||||
PinThreadToVacantCore(thrIdx, _impl->_config._threadBindingStep, ncpus, processMask);
|
||||
});
|
||||
}
|
||||
@ -131,10 +146,13 @@ struct CPUStreamsExecutor::Impl {
|
||||
PinCurrentThreadToSocket(_numaNodeId);
|
||||
} else if (ThreadBindingType::CORES == _impl->_config._threadBindingType) {
|
||||
CpuSet processMask;
|
||||
int ncpus = 0;
|
||||
int ncpus = 0;
|
||||
std::tie(processMask, ncpus) = GetProcessMask();
|
||||
if (nullptr != processMask) {
|
||||
PinThreadToVacantCore(_streamId + _impl->_config._threadBindingOffset, _impl->_config._threadBindingStep, ncpus, processMask);
|
||||
PinThreadToVacantCore(_streamId + _impl->_config._threadBindingOffset,
|
||||
_impl->_config._threadBindingStep,
|
||||
ncpus,
|
||||
processMask);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -151,22 +169,22 @@ struct CPUStreamsExecutor::Impl {
|
||||
#endif
|
||||
}
|
||||
|
||||
Impl* _impl = nullptr;
|
||||
int _streamId = 0;
|
||||
Impl* _impl = nullptr;
|
||||
int _streamId = 0;
|
||||
int _numaNodeId = 0;
|
||||
bool _execute = false;
|
||||
std::queue<Task> _taskQueue;
|
||||
#if IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO
|
||||
std::unique_ptr<custom::task_arena> _taskArena;
|
||||
std::unique_ptr<Observer> _observer;
|
||||
std::unique_ptr<Observer> _observer;
|
||||
#endif
|
||||
};
|
||||
|
||||
explicit Impl(const Config& config) :
|
||||
_config{config},
|
||||
_streams([this] {
|
||||
return std::make_shared<Impl::Stream>(this);
|
||||
}) {
|
||||
explicit Impl(const Config& config)
|
||||
: _config{config},
|
||||
_streams([this] {
|
||||
return std::make_shared<Impl::Stream>(this);
|
||||
}) {
|
||||
auto numaNodes = getAvailableNUMANodes();
|
||||
if (_config._streams != 0) {
|
||||
std::copy_n(std::begin(numaNodes),
|
||||
@ -175,25 +193,28 @@ struct CPUStreamsExecutor::Impl {
|
||||
} else {
|
||||
_usedNumaNodes = numaNodes;
|
||||
}
|
||||
#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
|
||||
#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
|
||||
if (ThreadBindingType::HYBRID_AWARE == config._threadBindingType) {
|
||||
const auto core_types = custom::info::core_types();
|
||||
const int threadsPerStream = (0 == config._threadsPerStream) ? std::thread::hardware_concurrency() : config._threadsPerStream;
|
||||
const int threadsPerStream =
|
||||
(0 == config._threadsPerStream) ? std::thread::hardware_concurrency() : config._threadsPerStream;
|
||||
int sum = 0;
|
||||
// reversed order, so BIG cores are first
|
||||
for (auto iter = core_types.rbegin(); iter < core_types.rend(); iter++) {
|
||||
const auto& type = *iter;
|
||||
// calculating the #streams per core type
|
||||
const int num_streams_for_core_type = std::max(1,
|
||||
custom::info::default_concurrency(
|
||||
custom::task_arena::constraints{}.set_core_type(type)) / threadsPerStream);
|
||||
const int num_streams_for_core_type =
|
||||
std::max(1,
|
||||
custom::info::default_concurrency(custom::task_arena::constraints{}.set_core_type(type)) /
|
||||
threadsPerStream);
|
||||
sum += num_streams_for_core_type;
|
||||
// prefix sum, so the core type for a given stream id will be deduced just as a upper_bound
|
||||
// (notice that the map keeps the elements in the descending order, so the big cores are populated first)
|
||||
// (notice that the map keeps the elements in the descending order, so the big cores are populated
|
||||
// first)
|
||||
total_streams_on_core_types.push_back({type, sum});
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
for (auto streamId = 0; streamId < _config._streams; ++streamId) {
|
||||
_threads.emplace_back([this, streamId] {
|
||||
openvino::itt::threadName(_config._name + "_" + std::to_string(streamId));
|
||||
@ -201,7 +222,9 @@ struct CPUStreamsExecutor::Impl {
|
||||
Task task;
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(_mutex);
|
||||
_queueCondVar.wait(lock, [&] { return !_taskQueue.empty() || (stopped = _isStopped); });
|
||||
_queueCondVar.wait(lock, [&] {
|
||||
return !_taskQueue.empty() || (stopped = _isStopped);
|
||||
});
|
||||
if (!_taskQueue.empty()) {
|
||||
task = std::move(_taskQueue.front());
|
||||
_taskQueue.pop();
|
||||
@ -246,33 +269,33 @@ struct CPUStreamsExecutor::Impl {
|
||||
Execute(stream._taskQueue.front(), stream);
|
||||
stream._taskQueue.pop();
|
||||
}
|
||||
} catch(...) {}
|
||||
} catch (...) {
|
||||
}
|
||||
stream._execute = false;
|
||||
}
|
||||
}
|
||||
|
||||
Config _config;
|
||||
std::mutex _streamIdMutex;
|
||||
int _streamId = 0;
|
||||
std::queue<int> _streamIdQueue;
|
||||
std::vector<std::thread> _threads;
|
||||
std::mutex _mutex;
|
||||
std::condition_variable _queueCondVar;
|
||||
std::queue<Task> _taskQueue;
|
||||
bool _isStopped = false;
|
||||
std::vector<int> _usedNumaNodes;
|
||||
ThreadLocal<std::shared_ptr<Stream>> _streams;
|
||||
#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
|
||||
Config _config;
|
||||
std::mutex _streamIdMutex;
|
||||
int _streamId = 0;
|
||||
std::queue<int> _streamIdQueue;
|
||||
std::vector<std::thread> _threads;
|
||||
std::mutex _mutex;
|
||||
std::condition_variable _queueCondVar;
|
||||
std::queue<Task> _taskQueue;
|
||||
bool _isStopped = false;
|
||||
std::vector<int> _usedNumaNodes;
|
||||
ThreadLocal<std::shared_ptr<Stream>> _streams;
|
||||
#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
|
||||
// stream id mapping to the core type
|
||||
// stored in the reversed order (so the big cores, with the highest core_type_id value, are populated first)
|
||||
// every entry is the core type and #streams that this AND ALL EARLIER entries can handle (prefix sum)
|
||||
// (so mapping is actually just an upper_bound: core type is deduced from the entry for which the id < #streams)
|
||||
using StreamIdToCoreTypes = std::vector<std::pair<custom::core_type_id, int>>;
|
||||
StreamIdToCoreTypes total_streams_on_core_types;
|
||||
#endif
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
int CPUStreamsExecutor::GetStreamId() {
|
||||
auto stream = _impl->_streams.local();
|
||||
return stream->_streamId;
|
||||
@ -283,9 +306,7 @@ int CPUStreamsExecutor::GetNumaNodeId() {
|
||||
return stream->_numaNodeId;
|
||||
}
|
||||
|
||||
CPUStreamsExecutor::CPUStreamsExecutor(const IStreamsExecutor::Config& config) :
|
||||
_impl{new Impl{config}} {
|
||||
}
|
||||
CPUStreamsExecutor::CPUStreamsExecutor(const IStreamsExecutor::Config& config) : _impl{new Impl{config}} {}
|
||||
|
||||
CPUStreamsExecutor::~CPUStreamsExecutor() {
|
||||
{
|
||||
|
@ -2,11 +2,12 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "threading/ie_executor_manager.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include "threading/ie_executor_manager.hpp"
|
||||
#include "threading/ie_cpu_streams_executor.hpp"
|
||||
|
||||
namespace InferenceEngine {
|
||||
@ -30,15 +31,14 @@ IStreamsExecutor::Ptr ExecutorManagerImpl::getIdleCPUStreamsExecutor(const IStre
|
||||
continue;
|
||||
|
||||
const auto& executorConfig = it.first;
|
||||
if (executorConfig._name == config._name &&
|
||||
executorConfig._streams == config._streams &&
|
||||
if (executorConfig._name == config._name && executorConfig._streams == config._streams &&
|
||||
executorConfig._threadsPerStream == config._threadsPerStream &&
|
||||
executorConfig._threadBindingType == config._threadBindingType &&
|
||||
executorConfig._threadBindingStep == config._threadBindingStep &&
|
||||
executorConfig._threadBindingOffset == config._threadBindingOffset)
|
||||
if (executorConfig._threadBindingType != IStreamsExecutor::ThreadBindingType::HYBRID_AWARE
|
||||
|| executorConfig._threadPreferredCoreType == config._threadPreferredCoreType)
|
||||
return executor;
|
||||
if (executorConfig._threadBindingType != IStreamsExecutor::ThreadBindingType::HYBRID_AWARE ||
|
||||
executorConfig._threadPreferredCoreType == config._threadPreferredCoreType)
|
||||
return executor;
|
||||
}
|
||||
auto newExec = std::make_shared<CPUStreamsExecutor>(config);
|
||||
cpuStreamsExecutors.emplace_back(std::make_pair(config, newExec));
|
||||
@ -64,9 +64,10 @@ void ExecutorManagerImpl::clear(const std::string& id) {
|
||||
} else {
|
||||
executors.erase(id);
|
||||
cpuStreamsExecutors.erase(
|
||||
std::remove_if(cpuStreamsExecutors.begin(), cpuStreamsExecutors.end(),
|
||||
std::remove_if(cpuStreamsExecutors.begin(),
|
||||
cpuStreamsExecutors.end(),
|
||||
[&](const std::pair<IStreamsExecutor::Config, IStreamsExecutor::Ptr>& it) {
|
||||
return it.first._name == id;
|
||||
return it.first._name == id;
|
||||
}),
|
||||
cpuStreamsExecutors.end());
|
||||
}
|
||||
|
@ -3,17 +3,18 @@
|
||||
//
|
||||
|
||||
#include "threading/ie_istreams_executor.hpp"
|
||||
#include "ie_plugin_config.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
|
||||
#include "ie_parallel.hpp"
|
||||
#include "ie_parallel_custom_arena.hpp"
|
||||
#include "ie_system_conf.h"
|
||||
#include "ie_parameter.hpp"
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <thread>
|
||||
|
||||
#include "ie_plugin_config.hpp"
|
||||
#include "ie_system_conf.h"
|
||||
|
||||
namespace InferenceEngine {
|
||||
IStreamsExecutor::~IStreamsExecutor() {}
|
||||
@ -28,98 +29,98 @@ std::vector<std::string> IStreamsExecutor::Config::SupportedKeys() {
|
||||
}
|
||||
|
||||
void IStreamsExecutor::Config::SetConfig(const std::string& key, const std::string& value) {
|
||||
if (key == CONFIG_KEY(CPU_BIND_THREAD)) {
|
||||
if (value == CONFIG_VALUE(YES) || value == CONFIG_VALUE(NUMA)) {
|
||||
#if (defined(__APPLE__) || defined(_WIN32))
|
||||
_threadBindingType = IStreamsExecutor::ThreadBindingType::NUMA;
|
||||
#else
|
||||
_threadBindingType = (value == CONFIG_VALUE(YES))
|
||||
? IStreamsExecutor::ThreadBindingType::CORES : IStreamsExecutor::ThreadBindingType::NUMA;
|
||||
#endif
|
||||
} else if (value == CONFIG_VALUE(HYBRID_AWARE)) {
|
||||
_threadBindingType = IStreamsExecutor::ThreadBindingType::HYBRID_AWARE;
|
||||
} else if (value == CONFIG_VALUE(NO)) {
|
||||
_threadBindingType = IStreamsExecutor::ThreadBindingType::NONE;
|
||||
} else {
|
||||
IE_THROW() << "Wrong value for property key " << CONFIG_KEY(CPU_BIND_THREAD)
|
||||
<< ". Expected only YES(binds to cores) / NO(no binding) / NUMA(binds to NUMA nodes) / "
|
||||
"HYBRID_AWARE (let the runtime recognize and use the hybrid cores)";
|
||||
}
|
||||
} else if (key == CONFIG_KEY(CPU_THROUGHPUT_STREAMS)) {
|
||||
if (value == CONFIG_VALUE(CPU_THROUGHPUT_NUMA)) {
|
||||
_streams = static_cast<int>(getAvailableNUMANodes().size());
|
||||
} else if (value == CONFIG_VALUE(CPU_THROUGHPUT_AUTO)) {
|
||||
const int sockets = static_cast<int>(getAvailableNUMANodes().size());
|
||||
// bare minimum of streams (that evenly divides available number of cores)
|
||||
const int num_cores = sockets == 1 ? std::thread::hardware_concurrency() : getNumberOfCPUCores();
|
||||
if (0 == num_cores % 4)
|
||||
_streams = std::max(4, num_cores / 4);
|
||||
else if (0 == num_cores % 5)
|
||||
_streams = std::max(5, num_cores / 5);
|
||||
else if (0 == num_cores % 3)
|
||||
_streams = std::max(3, num_cores / 3);
|
||||
else // if user disables some cores say in BIOS, so we got weird #cores which is not easy to divide
|
||||
_streams = 1;
|
||||
} else {
|
||||
int val_i;
|
||||
try {
|
||||
val_i = std::stoi(value);
|
||||
} catch (const std::exception&) {
|
||||
IE_THROW() << "Wrong value for property key " << CONFIG_KEY(CPU_THROUGHPUT_STREAMS)
|
||||
<< ". Expected only positive numbers (#streams) or "
|
||||
<< "PluginConfigParams::CPU_THROUGHPUT_NUMA/CPU_THROUGHPUT_AUTO";
|
||||
}
|
||||
if (val_i < 0) {
|
||||
IE_THROW() << "Wrong value for property key " << CONFIG_KEY(CPU_THROUGHPUT_STREAMS)
|
||||
<< ". Expected only positive numbers (#streams)";
|
||||
}
|
||||
_streams = val_i;
|
||||
}
|
||||
} else if (key == CONFIG_KEY(CPU_THREADS_NUM)) {
|
||||
int val_i;
|
||||
try {
|
||||
val_i = std::stoi(value);
|
||||
} catch (const std::exception&) {
|
||||
IE_THROW() << "Wrong value for property key " << CONFIG_KEY(CPU_THREADS_NUM)
|
||||
<< ". Expected only positive numbers (#threads)";
|
||||
}
|
||||
if (val_i < 0) {
|
||||
IE_THROW() << "Wrong value for property key " << CONFIG_KEY(CPU_THREADS_NUM)
|
||||
<< ". Expected only positive numbers (#threads)";
|
||||
}
|
||||
_threads = val_i;
|
||||
} else if (key == CONFIG_KEY_INTERNAL(CPU_THREADS_PER_STREAM)) {
|
||||
int val_i;
|
||||
try {
|
||||
val_i = std::stoi(value);
|
||||
} catch (const std::exception&) {
|
||||
IE_THROW() << "Wrong value for property key " << CONFIG_KEY_INTERNAL(CPU_THREADS_PER_STREAM)
|
||||
<< ". Expected only non negative numbers (#threads)";
|
||||
}
|
||||
if (val_i < 0) {
|
||||
IE_THROW() << "Wrong value for property key " << CONFIG_KEY_INTERNAL(CPU_THREADS_PER_STREAM)
|
||||
<< ". Expected only non negative numbers (#threads)";
|
||||
}
|
||||
_threadsPerStream = val_i;
|
||||
if (key == CONFIG_KEY(CPU_BIND_THREAD)) {
|
||||
if (value == CONFIG_VALUE(YES) || value == CONFIG_VALUE(NUMA)) {
|
||||
#if (defined(__APPLE__) || defined(_WIN32))
|
||||
_threadBindingType = IStreamsExecutor::ThreadBindingType::NUMA;
|
||||
#else
|
||||
_threadBindingType = (value == CONFIG_VALUE(YES)) ? IStreamsExecutor::ThreadBindingType::CORES
|
||||
: IStreamsExecutor::ThreadBindingType::NUMA;
|
||||
#endif
|
||||
} else if (value == CONFIG_VALUE(HYBRID_AWARE)) {
|
||||
_threadBindingType = IStreamsExecutor::ThreadBindingType::HYBRID_AWARE;
|
||||
} else if (value == CONFIG_VALUE(NO)) {
|
||||
_threadBindingType = IStreamsExecutor::ThreadBindingType::NONE;
|
||||
} else {
|
||||
IE_THROW() << "Wrong value for property key " << key;
|
||||
IE_THROW() << "Wrong value for property key " << CONFIG_KEY(CPU_BIND_THREAD)
|
||||
<< ". Expected only YES(binds to cores) / NO(no binding) / NUMA(binds to NUMA nodes) / "
|
||||
"HYBRID_AWARE (let the runtime recognize and use the hybrid cores)";
|
||||
}
|
||||
} else if (key == CONFIG_KEY(CPU_THROUGHPUT_STREAMS)) {
|
||||
if (value == CONFIG_VALUE(CPU_THROUGHPUT_NUMA)) {
|
||||
_streams = static_cast<int>(getAvailableNUMANodes().size());
|
||||
} else if (value == CONFIG_VALUE(CPU_THROUGHPUT_AUTO)) {
|
||||
const int sockets = static_cast<int>(getAvailableNUMANodes().size());
|
||||
// bare minimum of streams (that evenly divides available number of cores)
|
||||
const int num_cores = sockets == 1 ? std::thread::hardware_concurrency() : getNumberOfCPUCores();
|
||||
if (0 == num_cores % 4)
|
||||
_streams = std::max(4, num_cores / 4);
|
||||
else if (0 == num_cores % 5)
|
||||
_streams = std::max(5, num_cores / 5);
|
||||
else if (0 == num_cores % 3)
|
||||
_streams = std::max(3, num_cores / 3);
|
||||
else // if user disables some cores say in BIOS, so we got weird #cores which is not easy to divide
|
||||
_streams = 1;
|
||||
} else {
|
||||
int val_i;
|
||||
try {
|
||||
val_i = std::stoi(value);
|
||||
} catch (const std::exception&) {
|
||||
IE_THROW() << "Wrong value for property key " << CONFIG_KEY(CPU_THROUGHPUT_STREAMS)
|
||||
<< ". Expected only positive numbers (#streams) or "
|
||||
<< "PluginConfigParams::CPU_THROUGHPUT_NUMA/CPU_THROUGHPUT_AUTO";
|
||||
}
|
||||
if (val_i < 0) {
|
||||
IE_THROW() << "Wrong value for property key " << CONFIG_KEY(CPU_THROUGHPUT_STREAMS)
|
||||
<< ". Expected only positive numbers (#streams)";
|
||||
}
|
||||
_streams = val_i;
|
||||
}
|
||||
} else if (key == CONFIG_KEY(CPU_THREADS_NUM)) {
|
||||
int val_i;
|
||||
try {
|
||||
val_i = std::stoi(value);
|
||||
} catch (const std::exception&) {
|
||||
IE_THROW() << "Wrong value for property key " << CONFIG_KEY(CPU_THREADS_NUM)
|
||||
<< ". Expected only positive numbers (#threads)";
|
||||
}
|
||||
if (val_i < 0) {
|
||||
IE_THROW() << "Wrong value for property key " << CONFIG_KEY(CPU_THREADS_NUM)
|
||||
<< ". Expected only positive numbers (#threads)";
|
||||
}
|
||||
_threads = val_i;
|
||||
} else if (key == CONFIG_KEY_INTERNAL(CPU_THREADS_PER_STREAM)) {
|
||||
int val_i;
|
||||
try {
|
||||
val_i = std::stoi(value);
|
||||
} catch (const std::exception&) {
|
||||
IE_THROW() << "Wrong value for property key " << CONFIG_KEY_INTERNAL(CPU_THREADS_PER_STREAM)
|
||||
<< ". Expected only non negative numbers (#threads)";
|
||||
}
|
||||
if (val_i < 0) {
|
||||
IE_THROW() << "Wrong value for property key " << CONFIG_KEY_INTERNAL(CPU_THREADS_PER_STREAM)
|
||||
<< ". Expected only non negative numbers (#threads)";
|
||||
}
|
||||
_threadsPerStream = val_i;
|
||||
} else {
|
||||
IE_THROW() << "Wrong value for property key " << key;
|
||||
}
|
||||
}
|
||||
|
||||
Parameter IStreamsExecutor::Config::GetConfig(const std::string& key) {
|
||||
if (key == CONFIG_KEY(CPU_BIND_THREAD)) {
|
||||
switch (_threadBindingType) {
|
||||
case IStreamsExecutor::ThreadBindingType::NONE:
|
||||
return {CONFIG_VALUE(NO)};
|
||||
case IStreamsExecutor::ThreadBindingType::NONE:
|
||||
return {CONFIG_VALUE(NO)};
|
||||
break;
|
||||
case IStreamsExecutor::ThreadBindingType::CORES:
|
||||
return {CONFIG_VALUE(YES)};
|
||||
case IStreamsExecutor::ThreadBindingType::CORES:
|
||||
return {CONFIG_VALUE(YES)};
|
||||
break;
|
||||
case IStreamsExecutor::ThreadBindingType::NUMA:
|
||||
return {CONFIG_VALUE(NUMA)};
|
||||
case IStreamsExecutor::ThreadBindingType::NUMA:
|
||||
return {CONFIG_VALUE(NUMA)};
|
||||
break;
|
||||
case IStreamsExecutor::ThreadBindingType::HYBRID_AWARE:
|
||||
return {CONFIG_VALUE(HYBRID_AWARE)};
|
||||
case IStreamsExecutor::ThreadBindingType::HYBRID_AWARE:
|
||||
return {CONFIG_VALUE(HYBRID_AWARE)};
|
||||
break;
|
||||
}
|
||||
} else if (key == CONFIG_KEY(CPU_THROUGHPUT_STREAMS)) {
|
||||
@ -134,7 +135,8 @@ Parameter IStreamsExecutor::Config::GetConfig(const std::string& key) {
|
||||
return {};
|
||||
}
|
||||
|
||||
IStreamsExecutor::Config IStreamsExecutor::Config::MakeDefaultMultiThreaded(const IStreamsExecutor::Config& initial, const bool fp_intesive) {
|
||||
IStreamsExecutor::Config IStreamsExecutor::Config::MakeDefaultMultiThreaded(const IStreamsExecutor::Config& initial,
|
||||
const bool fp_intesive) {
|
||||
const auto envThreads = parallel_get_env_threads();
|
||||
const auto& numaNodes = getAvailableNUMANodes();
|
||||
const int numaNodesNum = numaNodes.size();
|
||||
@ -143,47 +145,49 @@ IStreamsExecutor::Config IStreamsExecutor::Config::MakeDefaultMultiThreaded(cons
|
||||
|
||||
// by default, do not use the hyper-threading (to minimize threads synch overheads)
|
||||
int num_cores_default = getNumberOfCPUCores();
|
||||
#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
|
||||
//additional latency-case logic for hybrid processors:
|
||||
#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
|
||||
// additional latency-case logic for hybrid processors:
|
||||
if (ThreadBindingType::HYBRID_AWARE == streamExecutorConfig._threadBindingType) {
|
||||
const auto core_types = custom::info::core_types();
|
||||
const auto num_little_cores = custom::info::default_concurrency(custom::task_arena::constraints{}.set_core_type(core_types.front()));
|
||||
const auto num_little_cores =
|
||||
custom::info::default_concurrency(custom::task_arena::constraints{}.set_core_type(core_types.front()));
|
||||
const auto num_big_cores_phys = getNumberOfCPUCores(true);
|
||||
const int int8_threshold = 4; // ~relative efficiency of the VNNI-intensive code for Big vs Little cores;
|
||||
const int fp32_threshold = 2; // ~relative efficiency of the AVX2 fp32 code for Big vs Little cores;
|
||||
const int int8_threshold = 4; // ~relative efficiency of the VNNI-intensive code for Big vs Little cores;
|
||||
const int fp32_threshold = 2; // ~relative efficiency of the AVX2 fp32 code for Big vs Little cores;
|
||||
// by default the latency case uses (faster) Big cores only, depending on the compute ratio
|
||||
const bool bLatencyCaseBigOnly = num_big_cores_phys > (num_little_cores / (fp_intesive ? fp32_threshold : int8_threshold));
|
||||
const bool bLatencyCaseBigOnly =
|
||||
num_big_cores_phys > (num_little_cores / (fp_intesive ? fp32_threshold : int8_threshold));
|
||||
// selecting the preferred core type
|
||||
streamExecutorConfig._threadPreferredCoreType =
|
||||
bLatencyCase
|
||||
? (bLatencyCaseBigOnly
|
||||
? IStreamsExecutor::Config::PreferredCoreType::BIG
|
||||
: IStreamsExecutor::Config::PreferredCoreType::ANY)
|
||||
: IStreamsExecutor::Config::PreferredCoreType::ROUND_ROBIN;
|
||||
bLatencyCase ? (bLatencyCaseBigOnly ? IStreamsExecutor::Config::PreferredCoreType::BIG
|
||||
: IStreamsExecutor::Config::PreferredCoreType::ANY)
|
||||
: IStreamsExecutor::Config::PreferredCoreType::ROUND_ROBIN;
|
||||
// additionally selecting the #cores to use in the "Big-only" case
|
||||
if (bLatencyCaseBigOnly) {
|
||||
const int hyper_threading_threshold = 2; // min #cores, for which the hyper-threading becomes useful for the latency case
|
||||
const auto num_big_cores = custom::info::default_concurrency(custom::task_arena::constraints{}.set_core_type(core_types.back()));
|
||||
const int hyper_threading_threshold =
|
||||
2; // min #cores, for which the hyper-threading becomes useful for the latency case
|
||||
const auto num_big_cores =
|
||||
custom::info::default_concurrency(custom::task_arena::constraints{}.set_core_type(core_types.back()));
|
||||
num_cores_default = (num_big_cores_phys <= hyper_threading_threshold) ? num_big_cores : num_big_cores_phys;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
const auto hwCores = !bLatencyCase && numaNodesNum == 1
|
||||
// throughput case on a single-NUMA node machine uses all available cores
|
||||
? parallel_get_max_threads()
|
||||
// in the rest of cases:
|
||||
// multi-node machine
|
||||
// or
|
||||
// latency case, single-node yet hybrid case that uses
|
||||
// all core types
|
||||
// or
|
||||
// big-cores only, but the #cores is "enough" (pls see the logic above)
|
||||
// it is usually beneficial not to use the hyper-threading (which is default)
|
||||
: num_cores_default;
|
||||
const auto threads = streamExecutorConfig._threads ? streamExecutorConfig._threads : (envThreads ? envThreads : hwCores);
|
||||
streamExecutorConfig._threadsPerStream = streamExecutorConfig._streams
|
||||
? std::max(1, threads/streamExecutorConfig._streams)
|
||||
: threads;
|
||||
// throughput case on a single-NUMA node machine uses all available cores
|
||||
? parallel_get_max_threads()
|
||||
// in the rest of cases:
|
||||
// multi-node machine
|
||||
// or
|
||||
// latency case, single-node yet hybrid case that uses
|
||||
// all core types
|
||||
// or
|
||||
// big-cores only, but the #cores is "enough" (pls see the logic above)
|
||||
// it is usually beneficial not to use the hyper-threading (which is default)
|
||||
: num_cores_default;
|
||||
const auto threads =
|
||||
streamExecutorConfig._threads ? streamExecutorConfig._threads : (envThreads ? envThreads : hwCores);
|
||||
streamExecutorConfig._threadsPerStream =
|
||||
streamExecutorConfig._streams ? std::max(1, threads / streamExecutorConfig._streams) : threads;
|
||||
return streamExecutorConfig;
|
||||
}
|
||||
|
||||
|
@ -15,11 +15,15 @@ void ITaskExecutor::runAndWait(const std::vector<Task>& tasks) {
|
||||
std::vector<std::packaged_task<void()>> packagedTasks;
|
||||
std::vector<std::future<void>> futures;
|
||||
for (std::size_t i = 0; i < tasks.size(); ++i) {
|
||||
packagedTasks.emplace_back([&tasks, i] {tasks[i]();});
|
||||
packagedTasks.emplace_back([&tasks, i] {
|
||||
tasks[i]();
|
||||
});
|
||||
futures.emplace_back(packagedTasks.back().get_future());
|
||||
}
|
||||
for (std::size_t i = 0; i < tasks.size(); ++i) {
|
||||
run([&packagedTasks, i]{packagedTasks[i]();});
|
||||
run([&packagedTasks, i] {
|
||||
packagedTasks[i]();
|
||||
});
|
||||
}
|
||||
// std::future::get will rethrow exception from task.
|
||||
// We should wait all tasks before any exception is thrown.
|
||||
|
198
inference-engine/src/inference_engine/src/threading/ie_parallel_custom_arena.cpp
Executable file → Normal file
198
inference-engine/src/inference_engine/src/threading/ie_parallel_custom_arena.cpp
Executable file → Normal file
@ -6,29 +6,32 @@
|
||||
|
||||
#if IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO
|
||||
|
||||
#ifndef TBBBIND_2_4_AVAILABLE
|
||||
# define TBBBIND_2_4_AVAILABLE 0
|
||||
#endif
|
||||
# ifndef TBBBIND_2_4_AVAILABLE
|
||||
# define TBBBIND_2_4_AVAILABLE 0
|
||||
# endif
|
||||
|
||||
#define USE_TBBBIND_2_4 (TBBBIND_2_4_AVAILABLE && TBB_INTERFACE_VERSION < 12020)
|
||||
#define TBB_NUMA_SUPPORT_PRESENT (TBB_INTERFACE_VERSION >= 11100)
|
||||
#define TBB_HYBRID_CPUS_SUPPORT_PRESENT (TBB_INTERFACE_VERSION >= 12020)
|
||||
# define USE_TBBBIND_2_4 (TBBBIND_2_4_AVAILABLE && TBB_INTERFACE_VERSION < 12020)
|
||||
# define TBB_NUMA_SUPPORT_PRESENT (TBB_INTERFACE_VERSION >= 11100)
|
||||
# define TBB_HYBRID_CPUS_SUPPORT_PRESENT (TBB_INTERFACE_VERSION >= 12020)
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
#include <windows.h>
|
||||
#endif
|
||||
# if defined(_WIN32) || defined(_WIN64)
|
||||
# include <windows.h>
|
||||
# endif
|
||||
|
||||
namespace custom {
|
||||
namespace detail {
|
||||
|
||||
#if USE_TBBBIND_2_4
|
||||
# if USE_TBBBIND_2_4
|
||||
extern "C" {
|
||||
void __TBB_internal_initialize_system_topology(
|
||||
std::size_t groups_num,
|
||||
int& numa_nodes_count, int*& numa_indexes_list,
|
||||
int& core_types_count, int*& core_types_indexes_list
|
||||
);
|
||||
binding_handler* __TBB_internal_allocate_binding_handler(int number_of_slots, int numa_id, int core_type_id, int max_threads_per_core);
|
||||
void __TBB_internal_initialize_system_topology(std::size_t groups_num,
|
||||
int& numa_nodes_count,
|
||||
int*& numa_indexes_list,
|
||||
int& core_types_count,
|
||||
int*& core_types_indexes_list);
|
||||
binding_handler* __TBB_internal_allocate_binding_handler(int number_of_slots,
|
||||
int numa_id,
|
||||
int core_type_id,
|
||||
int max_threads_per_core);
|
||||
void __TBB_internal_deallocate_binding_handler(binding_handler* handler_ptr);
|
||||
void __TBB_internal_apply_affinity(binding_handler* handler_ptr, int slot_num);
|
||||
void __TBB_internal_restore_affinity(binding_handler* handler_ptr, int slot_num);
|
||||
@ -36,7 +39,7 @@ int __TBB_internal_get_default_concurrency(int numa_id, int core_type_id, int ma
|
||||
}
|
||||
|
||||
int get_processors_group_num() {
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
# if defined(_WIN32) || defined(_WIN64)
|
||||
SYSTEM_INFO si;
|
||||
GetNativeSystemInfo(&si);
|
||||
|
||||
@ -44,46 +47,48 @@ int get_processors_group_num() {
|
||||
GetProcessAffinityMask(GetCurrentProcess(), &pam, &sam);
|
||||
int nproc = 0;
|
||||
for (std::size_t i = 0; i < sizeof(DWORD_PTR) * CHAR_BIT; ++i, m <<= 1) {
|
||||
if ( pam & m )
|
||||
if (pam & m)
|
||||
++nproc;
|
||||
}
|
||||
if (nproc == static_cast<int>(si.dwNumberOfProcessors)) {
|
||||
return GetActiveProcessorGroupCount();
|
||||
}
|
||||
#endif
|
||||
# endif
|
||||
return 1;
|
||||
}
|
||||
|
||||
bool is_binding_environment_valid() {
|
||||
#if defined(_WIN32) && !defined(_WIN64)
|
||||
# if defined(_WIN32) && !defined(_WIN64)
|
||||
static bool result = [] {
|
||||
// For 32-bit Windows applications, process affinity masks can only support up to 32 logical CPUs.
|
||||
SYSTEM_INFO si;
|
||||
GetNativeSystemInfo(&si);
|
||||
if (si.dwNumberOfProcessors > 32) return false;
|
||||
if (si.dwNumberOfProcessors > 32)
|
||||
return false;
|
||||
return true;
|
||||
}();
|
||||
return result;
|
||||
#else
|
||||
# else
|
||||
return true;
|
||||
#endif /* _WIN32 && !_WIN64 */
|
||||
# endif /* _WIN32 && !_WIN64 */
|
||||
}
|
||||
|
||||
static int numa_nodes_count = 0;
|
||||
static int numa_nodes_count = 0;
|
||||
static int* numa_nodes_indexes = nullptr;
|
||||
|
||||
static int core_types_count = 0;
|
||||
static int core_types_count = 0;
|
||||
static int* core_types_indexes = nullptr;
|
||||
|
||||
void initialize_system_topology() {
|
||||
static std::once_flag is_topology_initialized;
|
||||
|
||||
std::call_once(is_topology_initialized, [&]{
|
||||
std::call_once(is_topology_initialized, [&] {
|
||||
if (is_binding_environment_valid()) {
|
||||
__TBB_internal_initialize_system_topology(
|
||||
get_processors_group_num(),
|
||||
numa_nodes_count, numa_nodes_indexes,
|
||||
core_types_count, core_types_indexes);
|
||||
__TBB_internal_initialize_system_topology(get_processors_group_num(),
|
||||
numa_nodes_count,
|
||||
numa_nodes_indexes,
|
||||
core_types_count,
|
||||
core_types_indexes);
|
||||
} else {
|
||||
static int dummy_index = task_arena::automatic;
|
||||
|
||||
@ -99,7 +104,8 @@ void initialize_system_topology() {
|
||||
binding_observer::binding_observer(tbb::task_arena& ta, int num_slots, const constraints& c)
|
||||
: task_scheduler_observer(ta) {
|
||||
detail::initialize_system_topology();
|
||||
my_binding_handler = detail::__TBB_internal_allocate_binding_handler(num_slots, c.numa_id, c.core_type, c.max_threads_per_core);
|
||||
my_binding_handler =
|
||||
detail::__TBB_internal_allocate_binding_handler(num_slots, c.numa_id, c.core_type, c.max_threads_per_core);
|
||||
}
|
||||
|
||||
binding_observer::~binding_observer() {
|
||||
@ -117,89 +123,91 @@ void binding_observer::on_scheduler_exit(bool) {
|
||||
binding_oberver_ptr construct_binding_observer(tbb::task_arena& ta, int num_slots, const constraints& c) {
|
||||
binding_oberver_ptr observer{};
|
||||
if (detail::is_binding_environment_valid() &&
|
||||
((c.core_type >= 0 && info::core_types().size() > 1) || (c.numa_id >= 0 && info::numa_nodes().size() > 1) || c.max_threads_per_core > 0)) {
|
||||
((c.core_type >= 0 && info::core_types().size() > 1) || (c.numa_id >= 0 && info::numa_nodes().size() > 1) ||
|
||||
c.max_threads_per_core > 0)) {
|
||||
observer.reset(new binding_observer{ta, num_slots, c});
|
||||
observer->observe(true);
|
||||
}
|
||||
return observer;
|
||||
}
|
||||
|
||||
#endif /*USE_TBBBIND_2_4*/
|
||||
# endif /*USE_TBBBIND_2_4*/
|
||||
|
||||
#if TBB_NUMA_SUPPORT_PRESENT
|
||||
# if TBB_NUMA_SUPPORT_PRESENT
|
||||
tbb::task_arena::constraints convert_constraints(const custom::task_arena::constraints& c) {
|
||||
tbb::task_arena::constraints result{};
|
||||
#if TBB_HYBRID_CPUS_SUPPORT_PRESENT
|
||||
# if TBB_HYBRID_CPUS_SUPPORT_PRESENT
|
||||
result.core_type = c.core_type;
|
||||
result.max_threads_per_core = c.max_threads_per_core;
|
||||
#endif
|
||||
# endif
|
||||
result.numa_id = c.numa_id;
|
||||
result.max_concurrency = c.max_concurrency;
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
} // namespace detail
|
||||
# endif
|
||||
} // namespace detail
|
||||
|
||||
task_arena::task_arena(int max_concurrency_, unsigned reserved_for_masters)
|
||||
: my_task_arena{max_concurrency_, reserved_for_masters}
|
||||
, my_initialization_state{}
|
||||
, my_constraints{}
|
||||
, my_binding_observer{}
|
||||
{}
|
||||
: my_task_arena{max_concurrency_, reserved_for_masters},
|
||||
my_initialization_state{},
|
||||
my_constraints{},
|
||||
my_binding_observer{} {}
|
||||
|
||||
task_arena::task_arena(const constraints& constraints_, unsigned reserved_for_masters)
|
||||
#if USE_TBBBIND_2_4
|
||||
: my_task_arena {info::default_concurrency(constraints_), reserved_for_masters}
|
||||
#elif TBB_NUMA_SUPPORT_PRESENT || TBB_HYBRID_CPUS_SUPPORT_PRESENT
|
||||
: my_task_arena {convert_constraints(constraints_), reserved_for_masters}
|
||||
#else
|
||||
: my_task_arena {constraints_.max_concurrency, reserved_for_masters}
|
||||
#endif
|
||||
, my_initialization_state{}
|
||||
, my_constraints{constraints_}
|
||||
, my_binding_observer{}
|
||||
{}
|
||||
# if USE_TBBBIND_2_4
|
||||
: my_task_arena {
|
||||
info::default_concurrency(constraints_), reserved_for_masters
|
||||
}
|
||||
# elif TBB_NUMA_SUPPORT_PRESENT || TBB_HYBRID_CPUS_SUPPORT_PRESENT
|
||||
: my_task_arena {
|
||||
convert_constraints(constraints_), reserved_for_masters
|
||||
}
|
||||
# else
|
||||
: my_task_arena {
|
||||
constraints_.max_concurrency, reserved_for_masters
|
||||
}
|
||||
# endif
|
||||
, my_initialization_state{}, my_constraints{constraints_}, my_binding_observer{} {}
|
||||
|
||||
task_arena::task_arena(const task_arena &s)
|
||||
: my_task_arena{s.my_task_arena}
|
||||
, my_initialization_state{}
|
||||
, my_constraints{s.my_constraints}
|
||||
, my_binding_observer{}
|
||||
{}
|
||||
task_arena::task_arena(const task_arena& s)
|
||||
: my_task_arena{s.my_task_arena},
|
||||
my_initialization_state{},
|
||||
my_constraints{s.my_constraints},
|
||||
my_binding_observer{} {}
|
||||
|
||||
void task_arena::initialize() {
|
||||
my_task_arena.initialize();
|
||||
#if USE_TBBBIND_2_4
|
||||
# if USE_TBBBIND_2_4
|
||||
std::call_once(my_initialization_state, [this] {
|
||||
my_binding_observer = detail::construct_binding_observer(
|
||||
my_task_arena, my_task_arena.max_concurrency(), my_constraints);
|
||||
my_binding_observer =
|
||||
detail::construct_binding_observer(my_task_arena, my_task_arena.max_concurrency(), my_constraints);
|
||||
});
|
||||
#endif
|
||||
# endif
|
||||
}
|
||||
|
||||
void task_arena::initialize(int max_concurrency_, unsigned reserved_for_masters) {
|
||||
my_task_arena.initialize(max_concurrency_, reserved_for_masters);
|
||||
#if USE_TBBBIND_2_4
|
||||
# if USE_TBBBIND_2_4
|
||||
std::call_once(my_initialization_state, [this] {
|
||||
my_binding_observer = detail::construct_binding_observer(
|
||||
my_task_arena, my_task_arena.max_concurrency(), my_constraints);
|
||||
my_binding_observer =
|
||||
detail::construct_binding_observer(my_task_arena, my_task_arena.max_concurrency(), my_constraints);
|
||||
});
|
||||
#endif
|
||||
# endif
|
||||
}
|
||||
|
||||
void task_arena::initialize(constraints constraints_, unsigned reserved_for_masters) {
|
||||
my_constraints = constraints_;
|
||||
#if USE_TBBBIND_2_4
|
||||
my_task_arena.initialize(info::default_concurrency(constraints_), reserved_for_masters);
|
||||
std::call_once(my_initialization_state, [this] {
|
||||
my_binding_observer = detail::construct_binding_observer(
|
||||
my_task_arena, my_task_arena.max_concurrency(), my_constraints);
|
||||
});
|
||||
#elif TBB_NUMA_SUPPORT_PRESENT || TBB_HYBRID_CPUS_SUPPORT_PRESENT
|
||||
my_task_arena.initialize(convert_constraints(my_constraints), reserved_for_masters);
|
||||
#else
|
||||
my_task_arena.initialize(my_constraints.max_concurrency, reserved_for_masters);
|
||||
#endif
|
||||
my_constraints = constraints_;
|
||||
# if USE_TBBBIND_2_4
|
||||
my_task_arena.initialize(info::default_concurrency(constraints_), reserved_for_masters);
|
||||
std::call_once(my_initialization_state, [this] {
|
||||
my_binding_observer =
|
||||
detail::construct_binding_observer(my_task_arena, my_task_arena.max_concurrency(), my_constraints);
|
||||
});
|
||||
# elif TBB_NUMA_SUPPORT_PRESENT || TBB_HYBRID_CPUS_SUPPORT_PRESENT
|
||||
my_task_arena.initialize(convert_constraints(my_constraints), reserved_for_masters);
|
||||
# else
|
||||
my_task_arena.initialize(my_constraints.max_concurrency, reserved_for_masters);
|
||||
# endif
|
||||
}
|
||||
|
||||
task_arena::operator tbb::task_arena&() {
|
||||
@ -213,54 +221,54 @@ int task_arena::max_concurrency() {
|
||||
|
||||
namespace info {
|
||||
std::vector<numa_node_id> numa_nodes() {
|
||||
#if USE_TBBBIND_2_4
|
||||
# if USE_TBBBIND_2_4
|
||||
detail::initialize_system_topology();
|
||||
std::vector<numa_node_id> node_indexes(detail::numa_nodes_count);
|
||||
std::memcpy(node_indexes.data(), detail::numa_nodes_indexes, detail::numa_nodes_count * sizeof(int));
|
||||
return node_indexes;
|
||||
#elif TBB_NUMA_SUPPORT_PRESENT
|
||||
# elif TBB_NUMA_SUPPORT_PRESENT
|
||||
return tbb::info::numa_nodes();
|
||||
#else
|
||||
# else
|
||||
return {tbb::task_arena::automatic};
|
||||
#endif
|
||||
# endif
|
||||
}
|
||||
|
||||
std::vector<core_type_id> core_types() {
|
||||
#if USE_TBBBIND_2_4
|
||||
# if USE_TBBBIND_2_4
|
||||
detail::initialize_system_topology();
|
||||
std::vector<numa_node_id> core_type_indexes(detail::core_types_count);
|
||||
std::memcpy(core_type_indexes.data(), detail::core_types_indexes, detail::core_types_count * sizeof(int));
|
||||
return core_type_indexes;
|
||||
#elif TBB_HYBRID_CPUS_SUPPORT_PRESENT
|
||||
# elif TBB_HYBRID_CPUS_SUPPORT_PRESENT
|
||||
return tbb::info::core_types();
|
||||
#else
|
||||
# else
|
||||
return {tbb::task_arena::automatic};
|
||||
#endif
|
||||
# endif
|
||||
}
|
||||
|
||||
int default_concurrency(task_arena::constraints c) {
|
||||
if (c.max_concurrency > 0) {
|
||||
return c.max_concurrency;
|
||||
}
|
||||
#if USE_TBBBIND_2_4
|
||||
# if USE_TBBBIND_2_4
|
||||
if (detail::is_binding_environment_valid()) {
|
||||
detail::initialize_system_topology();
|
||||
return detail::__TBB_internal_get_default_concurrency(c.numa_id, c.core_type, c.max_threads_per_core);
|
||||
}
|
||||
return tbb::this_task_arena::max_concurrency();
|
||||
#elif TBB_HYBRID_CPUS_SUPPORT_PRESENT
|
||||
# elif TBB_HYBRID_CPUS_SUPPORT_PRESENT
|
||||
return tbb::info::default_concurrency(convert_constraints(c));
|
||||
#elif TBB_NUMA_SUPPORT_PRESENT
|
||||
# elif TBB_NUMA_SUPPORT_PRESENT
|
||||
return tbb::info::default_concurrency(c.numa_id);
|
||||
#else
|
||||
# else
|
||||
return tbb::this_task_arena::max_concurrency();
|
||||
#endif
|
||||
# endif
|
||||
}
|
||||
|
||||
int default_concurrency(numa_node_id id) {
|
||||
return default_concurrency(task_arena::constraints{}.set_numa_id(id));
|
||||
}
|
||||
|
||||
} // namespace info
|
||||
} // namespace custom
|
||||
} // namespace info
|
||||
} // namespace custom
|
||||
#endif /*IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO*/
|
||||
|
@ -3,16 +3,17 @@
|
||||
//
|
||||
|
||||
#include "threading/ie_thread_affinity.hpp"
|
||||
#include "ie_system_conf.h"
|
||||
#include <climits>
|
||||
#include <cerrno>
|
||||
#include <utility>
|
||||
#include <tuple>
|
||||
|
||||
#include <cerrno>
|
||||
#include <climits>
|
||||
#include <tuple>
|
||||
#include <utility>
|
||||
|
||||
#include "ie_system_conf.h"
|
||||
|
||||
#if !(defined(__APPLE__) || defined(_WIN32))
|
||||
#include <sched.h>
|
||||
#include <unistd.h>
|
||||
# include <sched.h>
|
||||
# include <unistd.h>
|
||||
#endif
|
||||
|
||||
namespace InferenceEngine {
|
||||
@ -20,7 +21,8 @@ namespace InferenceEngine {
|
||||
std::tuple<CpuSet, int> GetProcessMask() {
|
||||
for (int ncpus = sizeof(cpu_set_t) / CHAR_BIT; ncpus < 32768 /* reasonable limit of #cores*/; ncpus <<= 1) {
|
||||
CpuSet mask{CPU_ALLOC(ncpus)};
|
||||
if (nullptr == mask) break;
|
||||
if (nullptr == mask)
|
||||
break;
|
||||
const size_t size = CPU_ALLOC_SIZE(ncpus);
|
||||
CPU_ZERO_S(size, mask.get());
|
||||
// the result fits the mask
|
||||
@ -28,14 +30,16 @@ std::tuple<CpuSet, int> GetProcessMask() {
|
||||
return std::make_tuple(std::move(mask), ncpus);
|
||||
}
|
||||
// other error
|
||||
if (errno != EINVAL) break;
|
||||
if (errno != EINVAL)
|
||||
break;
|
||||
}
|
||||
return std::make_tuple(nullptr, 0);
|
||||
}
|
||||
|
||||
/* Release the cores affinity mask for the current process */
|
||||
void ReleaseProcessMask(cpu_set_t* mask) {
|
||||
if (nullptr != mask) CPU_FREE(mask);
|
||||
if (nullptr != mask)
|
||||
CPU_FREE(mask);
|
||||
}
|
||||
|
||||
bool PinCurrentThreadByMask(int ncores, const CpuSet& procMask) {
|
||||
@ -74,7 +78,7 @@ bool PinThreadToVacantCore(int thrIdx, int hyperthreads, int ncores, const CpuSe
|
||||
bool PinCurrentThreadToSocket(int socket) {
|
||||
const int sockets = InferenceEngine::getAvailableNUMANodes().size();
|
||||
const int cores = InferenceEngine::getNumberOfCPUCores();
|
||||
const int cores_per_socket = cores/sockets;
|
||||
const int cores_per_socket = cores / sockets;
|
||||
|
||||
int ncpus = 0;
|
||||
CpuSet mask;
|
||||
@ -83,7 +87,7 @@ bool PinCurrentThreadToSocket(int socket) {
|
||||
const size_t size = CPU_ALLOC_SIZE(ncpus);
|
||||
CPU_ZERO_S(size, targetMask.get());
|
||||
|
||||
for (int core = socket*cores_per_socket; core < (socket+1)*cores_per_socket; core++) {
|
||||
for (int core = socket * cores_per_socket; core < (socket + 1) * cores_per_socket; core++) {
|
||||
CPU_SET_S(core, size, targetMask.get());
|
||||
}
|
||||
// respect the user-defined mask for the entire process
|
||||
|
@ -4,13 +4,13 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ie_api.h>
|
||||
|
||||
#include <tuple>
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
|
||||
#include "ie_api.h"
|
||||
|
||||
#if !(defined(__APPLE__) || defined(_WIN32))
|
||||
#include <sched.h>
|
||||
# include <sched.h>
|
||||
#endif
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
@ -16,14 +16,14 @@ int XMLParseUtils::GetIntAttr(const pugi::xml_node& node, const char* str) {
|
||||
auto attr = node.attribute(str);
|
||||
if (attr.empty())
|
||||
IE_THROW() << "node <" << node.name() << "> is missing mandatory attribute: " << str << " at offset "
|
||||
<< node.offset_debug();
|
||||
<< node.offset_debug();
|
||||
std::string str_value = std::string(attr.value());
|
||||
std::size_t idx = 0;
|
||||
int int_value = std::stoi(str_value, &idx, 10);
|
||||
if (idx != str_value.length())
|
||||
IE_THROW() << "node <" << node.name() << "> has attribute \"" << str << "\" = \"" << str_value
|
||||
<< "\" which is not an integer"
|
||||
<< " at offset " << node.offset_debug();
|
||||
<< "\" which is not an integer"
|
||||
<< " at offset " << node.offset_debug();
|
||||
return int_value;
|
||||
}
|
||||
|
||||
@ -31,14 +31,14 @@ int64_t XMLParseUtils::GetInt64Attr(const pugi::xml_node& node, const char* str)
|
||||
auto attr = node.attribute(str);
|
||||
if (attr.empty())
|
||||
IE_THROW() << "node <" << node.name() << "> is missing mandatory attribute: " << str << " at offset "
|
||||
<< node.offset_debug();
|
||||
<< node.offset_debug();
|
||||
std::string str_value = std::string(attr.value());
|
||||
std::size_t idx = 0;
|
||||
long long int_value = std::stoll(str_value, &idx, 10);
|
||||
if (idx != str_value.length())
|
||||
IE_THROW() << "node <" << node.name() << "> has attribute \"" << str << "\" = \"" << str_value
|
||||
<< "\" which is not a signed 64 bit integer"
|
||||
<< " at offset " << node.offset_debug();
|
||||
<< "\" which is not a signed 64 bit integer"
|
||||
<< " at offset " << node.offset_debug();
|
||||
return static_cast<int64_t>(int_value);
|
||||
}
|
||||
|
||||
@ -46,14 +46,14 @@ uint64_t XMLParseUtils::GetUInt64Attr(const pugi::xml_node& node, const char* st
|
||||
auto attr = node.attribute(str);
|
||||
if (attr.empty())
|
||||
IE_THROW() << "node <" << node.name() << "> is missing mandatory attribute: " << str << " at offset "
|
||||
<< node.offset_debug();
|
||||
<< node.offset_debug();
|
||||
std::string str_value = std::string(attr.value());
|
||||
std::size_t idx = 0;
|
||||
long long int_value = std::stoll(str_value, &idx, 10);
|
||||
if (idx != str_value.length() || int_value < 0)
|
||||
IE_THROW() << "node <" << node.name() << "> has attribute \"" << str << "\" = \"" << str_value
|
||||
<< "\" which is not an unsigned 64 bit integer"
|
||||
<< " at offset " << node.offset_debug();
|
||||
<< "\" which is not an unsigned 64 bit integer"
|
||||
<< " at offset " << node.offset_debug();
|
||||
return static_cast<uint64_t>(int_value);
|
||||
}
|
||||
|
||||
@ -61,14 +61,14 @@ unsigned int XMLParseUtils::GetUIntAttr(const pugi::xml_node& node, const char*
|
||||
auto attr = node.attribute(str);
|
||||
if (attr.empty())
|
||||
IE_THROW() << "node <" << node.name() << "> is missing mandatory attribute: " << str << " at offset "
|
||||
<< node.offset_debug();
|
||||
<< node.offset_debug();
|
||||
std::string str_value = std::string(attr.value());
|
||||
std::size_t idx = 0;
|
||||
long long int_value = std::stoll(str_value, &idx, 10);
|
||||
if (idx != str_value.length() || int_value < 0 || int_value > (std::numeric_limits<unsigned int>::max)())
|
||||
IE_THROW() << "node <" << node.name() << "> has attribute \"" << str << "\" = \"" << str_value
|
||||
<< "\" which is not an unsigned integer"
|
||||
<< " at offset " << node.offset_debug();
|
||||
<< "\" which is not an unsigned integer"
|
||||
<< " at offset " << node.offset_debug();
|
||||
return static_cast<unsigned int>(int_value);
|
||||
}
|
||||
|
||||
@ -76,25 +76,27 @@ std::string XMLParseUtils::GetStrAttr(const pugi::xml_node& node, const char* st
|
||||
auto attr = node.attribute(str);
|
||||
if (attr.empty())
|
||||
IE_THROW() << "node <" << node.name() << "> is missing mandatory attribute: '" << str << "' at offset "
|
||||
<< node.offset_debug();
|
||||
<< node.offset_debug();
|
||||
return attr.value();
|
||||
}
|
||||
|
||||
std::string XMLParseUtils::GetStrAttr(const pugi::xml_node& node, const char* str, const char* def) {
|
||||
auto attr = node.attribute(str);
|
||||
if (attr.empty()) return def;
|
||||
if (attr.empty())
|
||||
return def;
|
||||
return attr.value();
|
||||
}
|
||||
|
||||
bool XMLParseUtils::GetBoolAttr(const pugi::xml_node& node, const char* str, const bool def) {
|
||||
auto attr = node.attribute(str);
|
||||
if (attr.empty()) return def;
|
||||
if (attr.empty())
|
||||
return def;
|
||||
std::string string_attr = attr.value();
|
||||
std::transform(string_attr.begin(), string_attr.end(), string_attr.begin(), [](char ch) {
|
||||
return std::tolower(static_cast<unsigned char>(ch));
|
||||
});
|
||||
std::set<std::string> true_names {"true", "1"};
|
||||
std::set<std::string> false_names {"false", "0"};
|
||||
std::set<std::string> true_names{"true", "1"};
|
||||
std::set<std::string> false_names{"false", "0"};
|
||||
|
||||
bool is_true = true_names.find(string_attr) != true_names.end();
|
||||
bool is_false = false_names.find(string_attr) != false_names.end();
|
||||
@ -110,13 +112,13 @@ bool XMLParseUtils::GetBoolAttr(const pugi::xml_node& node, const char* str) {
|
||||
auto attr = node.attribute(str);
|
||||
if (attr.empty())
|
||||
IE_THROW() << "node <" << node.name() << "> is missing mandatory attribute: " << str << " at offset "
|
||||
<< node.offset_debug();
|
||||
<< node.offset_debug();
|
||||
std::string string_attr = attr.value();
|
||||
std::transform(string_attr.begin(), string_attr.end(), string_attr.begin(), [](char ch) {
|
||||
return std::tolower(static_cast<unsigned char>(ch));
|
||||
});
|
||||
std::set<std::string> true_names {"true", "1"};
|
||||
std::set<std::string> false_names {"false", "0"};
|
||||
std::set<std::string> true_names{"true", "1"};
|
||||
std::set<std::string> false_names{"false", "0"};
|
||||
|
||||
bool is_true = true_names.find(string_attr) != true_names.end();
|
||||
bool is_false = false_names.find(string_attr) != false_names.end();
|
||||
@ -132,7 +134,7 @@ float XMLParseUtils::GetFloatAttr(const pugi::xml_node& node, const char* str) {
|
||||
auto attr = node.attribute(str);
|
||||
if (attr.empty())
|
||||
IE_THROW() << "node <" << node.name() << "> is missing mandatory attribute: " << str << " at offset "
|
||||
<< node.offset_debug();
|
||||
<< node.offset_debug();
|
||||
std::string str_value = std::string(attr.value());
|
||||
std::stringstream str_stream(str_value);
|
||||
str_stream.imbue(std::locale("C"));
|
||||
@ -140,8 +142,8 @@ float XMLParseUtils::GetFloatAttr(const pugi::xml_node& node, const char* str) {
|
||||
str_stream >> float_value;
|
||||
if (!str_stream.eof())
|
||||
IE_THROW() << "node <" << node.name() << "> has attribute \"" << str << "\" = \"" << str_value
|
||||
<< "\" which is not a floating point"
|
||||
<< " at offset " << node.offset_debug();
|
||||
<< "\" which is not a floating point"
|
||||
<< " at offset " << node.offset_debug();
|
||||
return float_value;
|
||||
}
|
||||
|
||||
@ -149,49 +151,57 @@ InferenceEngine::Precision XMLParseUtils::GetPrecisionAttr(const pugi::xml_node&
|
||||
auto attr = node.attribute(str);
|
||||
if (attr.empty())
|
||||
IE_THROW() << "node <" << node.name() << "> is missing mandatory attribute: " << str << " at offset "
|
||||
<< node.offset_debug();
|
||||
<< node.offset_debug();
|
||||
return InferenceEngine::Precision::FromStr(attr.value());
|
||||
}
|
||||
|
||||
InferenceEngine::Precision XMLParseUtils::GetPrecisionAttr(const pugi::xml_node& node, const char* str,
|
||||
InferenceEngine::Precision XMLParseUtils::GetPrecisionAttr(const pugi::xml_node& node,
|
||||
const char* str,
|
||||
InferenceEngine::Precision def) {
|
||||
auto attr = node.attribute(str);
|
||||
if (attr.empty()) return InferenceEngine::Precision(def);
|
||||
if (attr.empty())
|
||||
return InferenceEngine::Precision(def);
|
||||
return InferenceEngine::Precision::FromStr(attr.value());
|
||||
}
|
||||
|
||||
int XMLParseUtils::GetIntAttr(const pugi::xml_node& node, const char* str, int defVal) {
|
||||
auto attr = node.attribute(str);
|
||||
if (attr.empty()) return defVal;
|
||||
if (attr.empty())
|
||||
return defVal;
|
||||
return GetIntAttr(node, str);
|
||||
}
|
||||
|
||||
int64_t XMLParseUtils::GetInt64Attr(const pugi::xml_node& node, const char* str, int64_t defVal) {
|
||||
auto attr = node.attribute(str);
|
||||
if (attr.empty()) return defVal;
|
||||
if (attr.empty())
|
||||
return defVal;
|
||||
return GetInt64Attr(node, str);
|
||||
}
|
||||
|
||||
uint64_t XMLParseUtils::GetUInt64Attr(const pugi::xml_node& node, const char* str, uint64_t defVal) {
|
||||
auto attr = node.attribute(str);
|
||||
if (attr.empty()) return defVal;
|
||||
if (attr.empty())
|
||||
return defVal;
|
||||
return GetUInt64Attr(node, str);
|
||||
}
|
||||
|
||||
unsigned int XMLParseUtils::GetUIntAttr(const pugi::xml_node& node, const char* str, unsigned int defVal) {
|
||||
auto attr = node.attribute(str);
|
||||
if (attr.empty()) return defVal;
|
||||
if (attr.empty())
|
||||
return defVal;
|
||||
return GetUIntAttr(node, str);
|
||||
}
|
||||
|
||||
float XMLParseUtils::GetFloatAttr(const pugi::xml_node& node, const char* str, float defVal) {
|
||||
auto attr = node.attribute(str);
|
||||
if (attr.empty()) return defVal;
|
||||
if (attr.empty())
|
||||
return defVal;
|
||||
return GetFloatAttr(node, str);
|
||||
}
|
||||
|
||||
int XMLParseUtils::GetIntChild(const pugi::xml_node& node, const char* str, int defVal) {
|
||||
auto child = node.child(str);
|
||||
if (child.empty()) return defVal;
|
||||
if (child.empty())
|
||||
return defVal;
|
||||
return atoi(child.child_value());
|
||||
}
|
||||
|
28
inference-engine/src/plugin_api/.clang-format
Normal file
28
inference-engine/src/plugin_api/.clang-format
Normal file
@ -0,0 +1,28 @@
|
||||
BasedOnStyle: Google
|
||||
IndentWidth: 4
|
||||
UseTab: Never
|
||||
ColumnLimit: 120
|
||||
|
||||
Language: Cpp
|
||||
Standard: Cpp11
|
||||
|
||||
AccessModifierOffset: -4
|
||||
AlignConsecutiveMacros: true
|
||||
AllowAllArgumentsOnNextLine: false
|
||||
AllowAllConstructorInitializersOnNextLine: false
|
||||
AllowAllParametersOfDeclarationOnNextLine: false
|
||||
AllowShortFunctionsOnASingleLine: Empty
|
||||
AllowShortIfStatementsOnASingleLine: Never
|
||||
AllowShortLambdasOnASingleLine: Empty
|
||||
AllowShortLoopsOnASingleLine: false
|
||||
AlwaysBreakBeforeMultilineStrings: false
|
||||
BinPackArguments: false
|
||||
BinPackParameters: false
|
||||
CommentPragmas: '^#'
|
||||
DerivePointerAlignment: false
|
||||
FixNamespaceComments: true
|
||||
IndentCaseLabels: false
|
||||
IndentPPDirectives: AfterHash
|
||||
ForEachMacros:
|
||||
- foreach
|
||||
- FOREACH_CHILD
|
@ -13,9 +13,9 @@
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "ie_memcpy.h"
|
||||
#include "ie_blob.h"
|
||||
#include "ie_data.h"
|
||||
#include "ie_memcpy.h"
|
||||
#include "ie_preprocess.hpp"
|
||||
|
||||
/**
|
||||
@ -101,9 +101,9 @@ make_plain_blob(InferenceEngine::Precision prec, const InferenceEngine::SizeVect
|
||||
*/
|
||||
template <class... Args>
|
||||
InferenceEngine::Blob::Ptr make_blob_with_precision(InferenceEngine::Precision precision, Args&&... args) {
|
||||
#define USE_FACTORY(precision) \
|
||||
case InferenceEngine::Precision::precision: \
|
||||
return make_shared_blob2<InferenceEngine::Precision::precision>(std::forward<Args>(args)...);
|
||||
#define USE_FACTORY(precision) \
|
||||
case InferenceEngine::Precision::precision: \
|
||||
return make_shared_blob2<InferenceEngine::Precision::precision>(std::forward<Args>(args)...);
|
||||
|
||||
switch (precision) {
|
||||
USE_FACTORY(FP32);
|
||||
@ -126,7 +126,7 @@ InferenceEngine::Blob::Ptr make_blob_with_precision(InferenceEngine::Precision p
|
||||
default:
|
||||
IE_THROW() << "cannot locate blob for precision: " << precision;
|
||||
}
|
||||
#undef USE_FACTORY
|
||||
#undef USE_FACTORY
|
||||
}
|
||||
|
||||
/**
|
||||
@ -138,7 +138,9 @@ InferenceEngine::Blob::Ptr make_blob_with_precision(InferenceEngine::Precision p
|
||||
*/
|
||||
template <typename T>
|
||||
void CopyVectorToBlob(const InferenceEngine::Blob::Ptr outputBlob, const std::vector<T>& inputVector) {
|
||||
if (outputBlob->size() != inputVector.size()) IE_THROW() << "Size mismatch between dims and vector";
|
||||
if (outputBlob->element_size() != sizeof(T)) IE_THROW() << "Element size mismatch between blob and vector";
|
||||
if (outputBlob->size() != inputVector.size())
|
||||
IE_THROW() << "Size mismatch between dims and vector";
|
||||
if (outputBlob->element_size() != sizeof(T))
|
||||
IE_THROW() << "Element size mismatch between blob and vector";
|
||||
ie_memcpy(outputBlob->buffer().as<T*>(), outputBlob->byteSize(), &inputVector[0], inputVector.size() * sizeof(T));
|
||||
}
|
||||
|
@ -21,14 +21,17 @@ namespace details {
|
||||
|
||||
/**
|
||||
* @brief Provides caseless comparison for STL algorithms
|
||||
*
|
||||
*
|
||||
* @tparam Key type, usually std::string
|
||||
*/
|
||||
template <class Key>
|
||||
class CaselessLess {
|
||||
class CaselessLess {
|
||||
public:
|
||||
bool operator()(const Key& a, const Key& b) const noexcept {
|
||||
return std::lexicographical_compare(std::begin(a), std::end(a), std::begin(b), std::end(b),
|
||||
return std::lexicographical_compare(std::begin(a),
|
||||
std::end(a),
|
||||
std::begin(b),
|
||||
std::end(b),
|
||||
[](const char& cha, const char& chb) {
|
||||
return std::tolower(cha) < std::tolower(chb);
|
||||
});
|
||||
|
@ -9,8 +9,8 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "cpp_interfaces/interface/ie_iexecutable_network_internal.hpp"
|
||||
#include "cpp_interfaces/impl/ie_infer_async_request_thread_safe_default.hpp"
|
||||
#include "cpp_interfaces/interface/ie_iexecutable_network_internal.hpp"
|
||||
#include "threading/ie_cpu_streams_executor.hpp"
|
||||
|
||||
namespace InferenceEngine {
|
||||
@ -33,14 +33,13 @@ public:
|
||||
* @param[in] taskExecutor The task executor used
|
||||
* @param[in] callbackExecutor The callback executor
|
||||
*/
|
||||
explicit
|
||||
ExecutableNetworkThreadSafeDefault(const ITaskExecutor::Ptr& taskExecutor
|
||||
= std::make_shared<CPUStreamsExecutor>(IStreamsExecutor::Config{"Default"}),
|
||||
const ITaskExecutor::Ptr& callbackExecutor
|
||||
= std::make_shared<CPUStreamsExecutor>(IStreamsExecutor::Config{"Callback"})) :
|
||||
_taskExecutor{taskExecutor},
|
||||
_callbackExecutor{callbackExecutor} {
|
||||
}
|
||||
explicit ExecutableNetworkThreadSafeDefault(
|
||||
const ITaskExecutor::Ptr& taskExecutor = std::make_shared<CPUStreamsExecutor>(IStreamsExecutor::Config{
|
||||
"Default"}),
|
||||
const ITaskExecutor::Ptr& callbackExecutor = std::make_shared<CPUStreamsExecutor>(IStreamsExecutor::Config{
|
||||
"Callback"}))
|
||||
: _taskExecutor{taskExecutor},
|
||||
_callbackExecutor{callbackExecutor} {}
|
||||
|
||||
/**
|
||||
* @brief Given optional implementation of creating asynchronous inference request to avoid
|
||||
@ -64,7 +63,7 @@ protected:
|
||||
return std::make_shared<AsyncInferRequestType>(syncRequestImpl, _taskExecutor, _callbackExecutor);
|
||||
}
|
||||
|
||||
ITaskExecutor::Ptr _taskExecutor = nullptr; //!< Holds a task executor
|
||||
ITaskExecutor::Ptr _taskExecutor = nullptr; //!< Holds a task executor
|
||||
ITaskExecutor::Ptr _callbackExecutor = nullptr; //!< Holds a callback executor
|
||||
};
|
||||
|
||||
|
@ -4,12 +4,6 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <threading/ie_immediate_executor.hpp>
|
||||
#include <threading/ie_itask_executor.hpp>
|
||||
#include <threading/ie_istreams_executor.hpp>
|
||||
|
||||
#include <cpp_interfaces/interface/ie_iinfer_request_internal.hpp>
|
||||
|
||||
#include <exception>
|
||||
#include <future>
|
||||
#include <map>
|
||||
@ -20,6 +14,11 @@
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "cpp_interfaces/interface/ie_iinfer_request_internal.hpp"
|
||||
#include "threading/ie_immediate_executor.hpp"
|
||||
#include "threading/ie_istreams_executor.hpp"
|
||||
#include "threading/ie_itask_executor.hpp"
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
/**
|
||||
@ -28,7 +27,8 @@ namespace InferenceEngine {
|
||||
* To customize pipeline stages derived class should change the content
|
||||
* of AsyncInferRequestThreadSafeDefault::_pipeline member container.
|
||||
* It consists of pairs of tasks and executors which will run the task.
|
||||
* The class is recommended to be used by plugins as a base class for asynchronous inference request implementation.
|
||||
* The class is recommended to be used by plugins as a base class for asynchronous inference request
|
||||
* implementation.
|
||||
* @note To synchronize derived context with stages
|
||||
* derived class should call AsyncInferRequestThreadSafeDefault::StopAndWait() function in destructor.
|
||||
* @par Example
|
||||
@ -38,7 +38,7 @@ namespace InferenceEngine {
|
||||
* @snippet example_async_infer_request.cpp async_infer_request:define_pipeline
|
||||
*/
|
||||
class AsyncInferRequestThreadSafeDefault : public IInferRequestInternal {
|
||||
enum InferState {Idle, Busy, Canceled, Stop};
|
||||
enum InferState { Idle, Busy, Canceled, Stop };
|
||||
using Futures = std::vector<std::shared_future<void>>;
|
||||
using Promise = std::shared_ptr<std::promise<void>>;
|
||||
enum Stage_e : std::uint8_t { executor, task };
|
||||
@ -46,11 +46,10 @@ class AsyncInferRequestThreadSafeDefault : public IInferRequestInternal {
|
||||
|
||||
friend struct DisableCallbackGuard;
|
||||
struct DisableCallbackGuard {
|
||||
explicit DisableCallbackGuard(AsyncInferRequestThreadSafeDefault* this_)
|
||||
: _this{this_} {
|
||||
std::lock_guard<std::mutex> lock{_this->_mutex};
|
||||
std::swap(_callback, _this->_callback);
|
||||
}
|
||||
explicit DisableCallbackGuard(AsyncInferRequestThreadSafeDefault* this_) : _this{this_} {
|
||||
std::lock_guard<std::mutex> lock{_this->_mutex};
|
||||
std::swap(_callback, _this->_callback);
|
||||
}
|
||||
~DisableCallbackGuard() {
|
||||
std::lock_guard<std::mutex> lock{_this->_mutex};
|
||||
_this->_callback = _callback;
|
||||
@ -60,12 +59,15 @@ class AsyncInferRequestThreadSafeDefault : public IInferRequestInternal {
|
||||
};
|
||||
|
||||
struct ImmediateStreamsExecutor : public InferenceEngine::ITaskExecutor {
|
||||
explicit ImmediateStreamsExecutor(const IStreamsExecutor::Ptr& streamsExecutor) : _streamsExecutor{streamsExecutor} {}
|
||||
void run(InferenceEngine::Task task) override {_streamsExecutor->Execute(std::move(task));}
|
||||
explicit ImmediateStreamsExecutor(const IStreamsExecutor::Ptr& streamsExecutor)
|
||||
: _streamsExecutor{streamsExecutor} {}
|
||||
void run(InferenceEngine::Task task) override {
|
||||
_streamsExecutor->Execute(std::move(task));
|
||||
}
|
||||
IStreamsExecutor::Ptr _streamsExecutor;
|
||||
};
|
||||
|
||||
template<typename F>
|
||||
template <typename F>
|
||||
void InferImpl(const F& f) {
|
||||
_syncRequest->checkBlobs();
|
||||
InferState state = InferState::Idle;
|
||||
@ -73,25 +75,27 @@ class AsyncInferRequestThreadSafeDefault : public IInferRequestInternal {
|
||||
std::lock_guard<std::mutex> lock{_mutex};
|
||||
state = _state;
|
||||
switch (_state) {
|
||||
case InferState::Busy :
|
||||
case InferState::Busy:
|
||||
IE_THROW(RequestBusy);
|
||||
case InferState::Canceled :
|
||||
case InferState::Canceled:
|
||||
IE_THROW(InferCancelled);
|
||||
case InferState::Idle : {
|
||||
_futures.erase(std::remove_if(std::begin(_futures), std::end(_futures),
|
||||
[](const std::shared_future<void>& future) {
|
||||
if (future.valid()) {
|
||||
return (std::future_status::ready ==
|
||||
future.wait_for(std::chrono::milliseconds {0}));
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}),
|
||||
_futures.end());
|
||||
case InferState::Idle: {
|
||||
_futures.erase(std::remove_if(std::begin(_futures),
|
||||
std::end(_futures),
|
||||
[](const std::shared_future<void>& future) {
|
||||
if (future.valid()) {
|
||||
return (std::future_status::ready ==
|
||||
future.wait_for(std::chrono::milliseconds{0}));
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}),
|
||||
_futures.end());
|
||||
_promise = {};
|
||||
_futures.emplace_back(_promise.get_future().share());
|
||||
} break;
|
||||
case InferState::Stop : break;
|
||||
case InferState::Stop:
|
||||
break;
|
||||
}
|
||||
_state = InferState::Busy;
|
||||
}
|
||||
@ -112,13 +116,14 @@ protected:
|
||||
* @brief Throws exception if inference request is busy or canceled
|
||||
*/
|
||||
void CheckState() const {
|
||||
std::lock_guard<std::mutex> lock {_mutex};
|
||||
std::lock_guard<std::mutex> lock{_mutex};
|
||||
switch (_state) {
|
||||
case InferState::Busy :
|
||||
case InferState::Busy:
|
||||
IE_THROW(RequestBusy);
|
||||
case InferState::Canceled :
|
||||
case InferState::Canceled:
|
||||
IE_THROW(InferCancelled);
|
||||
default: break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -139,15 +144,22 @@ public:
|
||||
*/
|
||||
AsyncInferRequestThreadSafeDefault(const IInferRequestInternal::Ptr& request,
|
||||
const ITaskExecutor::Ptr& taskExecutor,
|
||||
const ITaskExecutor::Ptr& callbackExecutor) :
|
||||
_syncRequest {request},
|
||||
_requestExecutor {taskExecutor},
|
||||
_callbackExecutor {callbackExecutor},
|
||||
_pipeline {{taskExecutor, [this] {_syncRequest->InferImpl();}}},
|
||||
_syncPipeline {{std::make_shared<ImmediateExecutor>(), [this] {_syncRequest->InferImpl();}}} {
|
||||
const ITaskExecutor::Ptr& callbackExecutor)
|
||||
: _syncRequest{request},
|
||||
_requestExecutor{taskExecutor},
|
||||
_callbackExecutor{callbackExecutor},
|
||||
_pipeline{{taskExecutor,
|
||||
[this] {
|
||||
_syncRequest->InferImpl();
|
||||
}}},
|
||||
_syncPipeline{{std::make_shared<ImmediateExecutor>(), [this] {
|
||||
_syncRequest->InferImpl();
|
||||
}}} {
|
||||
auto streamsExecutor = std::dynamic_pointer_cast<IStreamsExecutor>(taskExecutor);
|
||||
if (streamsExecutor != nullptr) {
|
||||
_syncPipeline = {{std::make_shared<ImmediateStreamsExecutor>(std::move(streamsExecutor)), [this] {_syncRequest->InferImpl();}}};
|
||||
_syncPipeline = {{std::make_shared<ImmediateStreamsExecutor>(std::move(streamsExecutor)), [this] {
|
||||
_syncRequest->InferImpl();
|
||||
}}};
|
||||
}
|
||||
}
|
||||
|
||||
@ -166,16 +178,15 @@ public:
|
||||
*/
|
||||
StatusCode Wait(int64_t millis_timeout) override {
|
||||
if (millis_timeout < InferRequest::WaitMode::RESULT_READY) {
|
||||
IE_THROW(ParameterMismatch)
|
||||
<< " Timeout can't be less "
|
||||
<< InferRequest::WaitMode::RESULT_READY << " for InferRequest::Wait\n";
|
||||
IE_THROW(ParameterMismatch) << " Timeout can't be less " << InferRequest::WaitMode::RESULT_READY
|
||||
<< " for InferRequest::Wait\n";
|
||||
}
|
||||
auto status = std::future_status::deferred;
|
||||
|
||||
// Just use the last '_futures' member to wait pipeline completion
|
||||
auto future = [&] {
|
||||
std::lock_guard<std::mutex> lock {_mutex};
|
||||
return _futures.empty() ? std::shared_future<void> {} : _futures.back();
|
||||
std::lock_guard<std::mutex> lock{_mutex};
|
||||
return _futures.empty() ? std::shared_future<void>{} : _futures.back();
|
||||
}();
|
||||
|
||||
if (!future.valid()) {
|
||||
@ -188,10 +199,10 @@ public:
|
||||
status = std::future_status::ready;
|
||||
} break;
|
||||
case InferRequest::WaitMode::STATUS_ONLY: {
|
||||
status = future.wait_for(std::chrono::milliseconds {0});
|
||||
status = future.wait_for(std::chrono::milliseconds{0});
|
||||
} break;
|
||||
default: {
|
||||
status = future.wait_for(std::chrono::milliseconds {millis_timeout});
|
||||
status = future.wait_for(std::chrono::milliseconds{millis_timeout});
|
||||
} break;
|
||||
}
|
||||
|
||||
@ -204,12 +215,16 @@ public:
|
||||
}
|
||||
|
||||
void StartAsync() override {
|
||||
InferImpl([&] {StartAsync_ThreadUnsafe();});
|
||||
InferImpl([&] {
|
||||
StartAsync_ThreadUnsafe();
|
||||
});
|
||||
}
|
||||
|
||||
void Infer() override {
|
||||
DisableCallbackGuard disableCallbackGuard{this};
|
||||
InferImpl([&] {Infer_ThreadUnsafe();});
|
||||
InferImpl([&] {
|
||||
Infer_ThreadUnsafe();
|
||||
});
|
||||
Wait(InferRequest::WaitMode::RESULT_READY);
|
||||
}
|
||||
|
||||
@ -284,7 +299,8 @@ protected:
|
||||
* @param[in] itEndStage End pipeline iterator
|
||||
* @param[in] callbackExecutor Final or error stage executor
|
||||
*/
|
||||
void RunFirstStage(const Pipeline::iterator itBeginStage, const Pipeline::iterator itEndStage,
|
||||
void RunFirstStage(const Pipeline::iterator itBeginStage,
|
||||
const Pipeline::iterator itEndStage,
|
||||
const ITaskExecutor::Ptr callbackExecutor = {}) {
|
||||
auto& firstStageExecutor = std::get<Stage_e::executor>(*itBeginStage);
|
||||
IE_ASSERT(nullptr != firstStageExecutor);
|
||||
@ -317,11 +333,10 @@ protected:
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
ITaskExecutor::Ptr _requestExecutor; //!< Used to run inference CPU tasks.
|
||||
ITaskExecutor::Ptr _callbackExecutor; //!< Used to run post inference callback in asynchronous pipline
|
||||
ITaskExecutor::Ptr _requestExecutor; //!< Used to run inference CPU tasks.
|
||||
ITaskExecutor::Ptr _callbackExecutor; //!< Used to run post inference callback in asynchronous pipline
|
||||
ITaskExecutor::Ptr _syncCallbackExecutor; //!< Used to run post inference callback in synchronous pipline
|
||||
Pipeline _pipeline; //!< Pipeline variable that should be filled by inherited class.
|
||||
Pipeline _pipeline; //!< Pipeline variable that should be filled by inherited class.
|
||||
Pipeline _syncPipeline; //!< Synchronous pipeline variable that should be filled by inherited class.
|
||||
|
||||
/**
|
||||
@ -360,57 +375,60 @@ private:
|
||||
* @param[in] callbackExecutor Executor that will run final stage with callback call
|
||||
* @return A next stage task
|
||||
*/
|
||||
Task MakeNextStageTask(const Pipeline::iterator itStage, const Pipeline::iterator itEndStage,
|
||||
Task MakeNextStageTask(const Pipeline::iterator itStage,
|
||||
const Pipeline::iterator itEndStage,
|
||||
const ITaskExecutor::Ptr callbackExecutor) {
|
||||
return std::bind([this, itStage, itEndStage](ITaskExecutor::Ptr& callbackExecutor) mutable {
|
||||
std::exception_ptr currentException = nullptr;
|
||||
auto& thisStage = *itStage;
|
||||
auto itNextStage = itStage + 1;
|
||||
try {
|
||||
auto& stageTask = std::get<Stage_e::task>(thisStage);
|
||||
IE_ASSERT(nullptr != stageTask);
|
||||
stageTask();
|
||||
if (itEndStage != itNextStage) {
|
||||
auto& nextStage = *itNextStage;
|
||||
auto& nextStageExecutor = std::get<Stage_e::executor>(nextStage);
|
||||
IE_ASSERT(nullptr != nextStageExecutor);
|
||||
nextStageExecutor->run(MakeNextStageTask(itNextStage, itEndStage, std::move(callbackExecutor)));
|
||||
}
|
||||
} catch (...) {
|
||||
currentException = std::current_exception();
|
||||
}
|
||||
|
||||
if ((itEndStage == itNextStage) || (nullptr != currentException)) {
|
||||
auto lastStageTask = [this, currentException]() mutable {
|
||||
auto promise = std::move(_promise);
|
||||
Callback callback;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock{_mutex};
|
||||
_state = InferState::Idle;
|
||||
callback = _callback;
|
||||
return std::bind(
|
||||
[this, itStage, itEndStage](ITaskExecutor::Ptr& callbackExecutor) mutable {
|
||||
std::exception_ptr currentException = nullptr;
|
||||
auto& thisStage = *itStage;
|
||||
auto itNextStage = itStage + 1;
|
||||
try {
|
||||
auto& stageTask = std::get<Stage_e::task>(thisStage);
|
||||
IE_ASSERT(nullptr != stageTask);
|
||||
stageTask();
|
||||
if (itEndStage != itNextStage) {
|
||||
auto& nextStage = *itNextStage;
|
||||
auto& nextStageExecutor = std::get<Stage_e::executor>(nextStage);
|
||||
IE_ASSERT(nullptr != nextStageExecutor);
|
||||
nextStageExecutor->run(MakeNextStageTask(itNextStage, itEndStage, std::move(callbackExecutor)));
|
||||
}
|
||||
if (callback) {
|
||||
try {
|
||||
auto local_callback = std::move(callback);
|
||||
local_callback(currentException);
|
||||
} catch (...) {
|
||||
currentException = std::current_exception();
|
||||
} catch (...) {
|
||||
currentException = std::current_exception();
|
||||
}
|
||||
|
||||
if ((itEndStage == itNextStage) || (nullptr != currentException)) {
|
||||
auto lastStageTask = [this, currentException]() mutable {
|
||||
auto promise = std::move(_promise);
|
||||
Callback callback;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock{_mutex};
|
||||
_state = InferState::Idle;
|
||||
callback = _callback;
|
||||
}
|
||||
}
|
||||
if (nullptr == currentException) {
|
||||
promise.set_value();
|
||||
} else {
|
||||
promise.set_exception(currentException);
|
||||
}
|
||||
};
|
||||
if (callback) {
|
||||
try {
|
||||
auto local_callback = std::move(callback);
|
||||
local_callback(currentException);
|
||||
} catch (...) {
|
||||
currentException = std::current_exception();
|
||||
}
|
||||
}
|
||||
if (nullptr == currentException) {
|
||||
promise.set_value();
|
||||
} else {
|
||||
promise.set_exception(currentException);
|
||||
}
|
||||
};
|
||||
|
||||
if (nullptr == callbackExecutor) {
|
||||
lastStageTask();
|
||||
} else {
|
||||
callbackExecutor->run(std::move(lastStageTask));
|
||||
if (nullptr == callbackExecutor) {
|
||||
lastStageTask();
|
||||
} else {
|
||||
callbackExecutor->run(std::move(lastStageTask));
|
||||
}
|
||||
}
|
||||
}
|
||||
}, std::move(callbackExecutor));
|
||||
},
|
||||
std::move(callbackExecutor));
|
||||
}
|
||||
|
||||
std::promise<void> _promise;
|
||||
|
@ -9,11 +9,11 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <ie_parameter.hpp>
|
||||
#include <ie_remote_context.hpp>
|
||||
#include <cpp/ie_cnn_network.h>
|
||||
#include <cpp_interfaces/interface/ie_ivariable_state_internal.hpp>
|
||||
#include <details/ie_so_pointer.hpp>
|
||||
#include "cpp/ie_cnn_network.h"
|
||||
#include "cpp_interfaces/interface/ie_ivariable_state_internal.hpp"
|
||||
#include "details/ie_so_pointer.hpp"
|
||||
#include "ie_parameter.hpp"
|
||||
#include "ie_remote_context.hpp"
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
@ -27,7 +27,8 @@ class IVariableStateInternal;
|
||||
* @brief An internal API of executable network to be implemented by plugin,
|
||||
* @ingroup ie_dev_api_exec_network_api
|
||||
*/
|
||||
class INFERENCE_ENGINE_API_CLASS(IExecutableNetworkInternal) : public std::enable_shared_from_this<IExecutableNetworkInternal> {
|
||||
class INFERENCE_ENGINE_API_CLASS(IExecutableNetworkInternal)
|
||||
: public std::enable_shared_from_this<IExecutableNetworkInternal> {
|
||||
public:
|
||||
/**
|
||||
* @brief A shared pointer to IExecutableNetworkInternal interface
|
||||
@ -140,7 +141,7 @@ protected:
|
||||
virtual std::shared_ptr<IInferRequestInternal> CreateInferRequestImpl(InputsDataMap networkInputs,
|
||||
OutputsDataMap networkOutputs);
|
||||
|
||||
InferenceEngine::InputsDataMap _networkInputs; //!< Holds information about network inputs info
|
||||
InferenceEngine::InputsDataMap _networkInputs; //!< Holds information about network inputs info
|
||||
InferenceEngine::OutputsDataMap _networkOutputs; //!< Holds information about network outputs data
|
||||
|
||||
/**
|
||||
|
@ -4,16 +4,16 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ie_blob.h>
|
||||
#include <ie_common.h>
|
||||
#include <ie_preprocess_data.hpp>
|
||||
#include <ie_input_info.hpp>
|
||||
#include <cpp/ie_infer_request.hpp>
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "cpp/ie_infer_request.hpp"
|
||||
#include "ie_blob.h"
|
||||
#include "ie_common.h"
|
||||
#include "ie_input_info.hpp"
|
||||
#include "ie_preprocess_data.hpp"
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
class IExecutableNetworkInternal;
|
||||
@ -89,7 +89,8 @@ public:
|
||||
/**
|
||||
* @brief Sets pre-process for input data
|
||||
* @param name Name of input blob.
|
||||
* @param data - a reference to input or output blob. The type of Blob must correspond to the network input precision and size.
|
||||
* @param data - a reference to input or output blob. The type of Blob must correspond to the network input
|
||||
* precision and size.
|
||||
* @param info Preprocess info for blob.
|
||||
*/
|
||||
virtual void SetBlob(const std::string& name, const Blob::Ptr& data, const PreProcessInfo& info);
|
||||
@ -219,17 +220,19 @@ protected:
|
||||
* @param deviceBlob Blob object in plugin's desired format
|
||||
* @return `True` if pre-processing is required, `false` otherwise
|
||||
*/
|
||||
bool preProcessingRequired(const InputInfo::Ptr& info, const Blob::Ptr& userBlob, const Blob::Ptr& deviceBlob = nullptr);
|
||||
bool preProcessingRequired(const InputInfo::Ptr& info,
|
||||
const Blob::Ptr& userBlob,
|
||||
const Blob::Ptr& deviceBlob = nullptr);
|
||||
|
||||
void addInputPreProcessingFor(const std::string& name, Blob::Ptr const& from, const Blob::Ptr& to);
|
||||
|
||||
InferenceEngine::InputsDataMap _networkInputs; //!< Holds information about network inputs info
|
||||
InferenceEngine::InputsDataMap _networkInputs; //!< Holds information about network inputs info
|
||||
InferenceEngine::OutputsDataMap _networkOutputs; //!< Holds information about network outputs data
|
||||
InferenceEngine::BlobMap _inputs; //!< A map of user passed blobs for network inputs
|
||||
InferenceEngine::BlobMap _deviceInputs; //!< A map of actual network inputs, in plugin specific format
|
||||
InferenceEngine::BlobMap _outputs; //!< A map of user passed blobs for network outputs
|
||||
std::map<std::string, PreProcessDataPtr> _preProcData; //!< A map of pre-process data per input
|
||||
int m_curBatch = -1; //!< Current batch value used in dynamic batching
|
||||
InferenceEngine::BlobMap _inputs; //!< A map of user passed blobs for network inputs
|
||||
InferenceEngine::BlobMap _deviceInputs; //!< A map of actual network inputs, in plugin specific format
|
||||
InferenceEngine::BlobMap _outputs; //!< A map of user passed blobs for network outputs
|
||||
std::map<std::string, PreProcessDataPtr> _preProcData; //!< A map of pre-process data per input
|
||||
int m_curBatch = -1; //!< Current batch value used in dynamic batching
|
||||
|
||||
/**
|
||||
* @brief A shared pointer to IInferRequestInternal
|
||||
@ -239,7 +242,7 @@ protected:
|
||||
Callback _callback; //!< A callback
|
||||
|
||||
private:
|
||||
void* _userData = nullptr;
|
||||
void* _userData = nullptr;
|
||||
};
|
||||
|
||||
/**
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user