Implemented inference in template plugin (#1308)

* Implemented inference in template plugin

* Fixed tests

* Removed thirdparty dependency

* Simplified executor configuration

* removed half

* Fixed cmake

* Fixed ngraph node check

* device blob allocation

* Fixed enum error
This commit is contained in:
Anton Pankratv
2020-07-28 17:25:31 +03:00
committed by GitHub
parent 2a96917e2a
commit 18836f53cd
18 changed files with 668 additions and 258 deletions

View File

@@ -45,14 +45,11 @@ namespace TemplateConfigParams {
#define DECLARE_TEMPLATE_CONFIG_KEY(name) DECLARE_CONFIG_KEY(TEMPLATE_##name)
#define DECLARE_TEMPLATE_CONFIG_VALUE(name) DECLARE_CONFIG_VALUE(TEMPLATE_##name)
/**
* @brief The key to define the type of transformations for TEMPLATE inputs and outputs.
* TEMPLATE use custom data layout for input and output blobs. IE TEMPLATE Plugin provides custom
* optimized version of transformation functions that do not use OpenMP and much more faster
* than native TEMPLATE functions. Values: "NO" - optimized plugin transformations
* are used, "YES" - native TEMPLATE transformations are used.
* @brief Defines the number of throutput streams used by TEMPLATE plugin.
*/
DECLARE_TEMPLATE_CONFIG_KEY(ANY_CONFIG_KEY);
DECLARE_TEMPLATE_CONFIG_KEY(THROUGHPUT_STREAMS);
} // namespace TemplateConfigParams

View File

@@ -20,14 +20,25 @@ ie_add_plugin(NAME ${TARGET_NAME}
VERSION_DEFINES_FOR template_plugin.cpp)
target_include_directories(${TARGET_NAME} PRIVATE
"${CMAKE_CURRENT_SOURCE_DIR}"
"${CMAKE_CURRENT_SOURCE_DIR}")
target_include_directories(${TARGET_NAME} PRIVATE
"${IE_MAIN_TEMPLATE_PLUGIN_SOURCE_DIR}/include")
target_link_libraries(${TARGET_NAME} PRIVATE IE::inference_engine IE::inference_engine_transformations ${NGRAPH_LIBRARIES} ${INTEL_ITT_LIBS})
target_link_libraries(${TARGET_NAME} PRIVATE
IE::inference_engine
IE::inference_engine_transformations
${INTEL_ITT_LIBS}
${NGRAPH_LIBRARIES})
# Link inference backend library to plugin. Here we use ngraph interpreter_backend as example
target_link_libraries(${TARGET_NAME} PRIVATE
ngraph_backend
interpreter_backend)
# ATTENTION: uncomment to register a plugin in the plugins.xml file
# ie_register_plugins(MAIN_TARGET ${TARGET_NAME}
# POSSIBLE_PLUGINS ${TARGET_NAME})
# POSSIBLE_PLUGINS ${TARGET_NAME})
# [cmake:plugin]
# ATTENTION: uncomment to install component

View File

@@ -19,21 +19,28 @@ TemplateAsyncInferRequest::TemplateAsyncInferRequest(
const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor) :
AsyncInferRequestThreadSafeDefault(inferRequest, cpuTaskExecutor, callbackExecutor),
_inferRequest(inferRequest), _waitExecutor(waitExecutor) {
_pipeline = {
{cpuTaskExecutor, [this] {
IE_PROFILING_AUTO_SCOPE(PreprocessingAndStartPipeline)
_inferRequest->inferPreprocess();
_inferRequest->startPipeline();
}},
{_waitExecutor, [this] {
IE_PROFILING_AUTO_SCOPE(WaitPipeline)
_inferRequest->waitPipeline();
}},
{cpuTaskExecutor, [this] {
IE_PROFILING_AUTO_SCOPE(Postprocessing)
_inferRequest->inferPostprocess();
}}
};
constexpr const auto remoteDevice = false;
// By default single stage pipeline is created.
// This stage executes InferRequest::Infer() using cpuTaskExecutor.
// But if remote asynchronous device is used the pipeline can by splitted tasks that are executed by cpuTaskExecutor
// and waiting tasks. Waiting tasks can lock execution thread so they use separate threads from other executor.
if (remoteDevice) {
_pipeline = {
{cpuTaskExecutor, [this] {
IE_PROFILING_AUTO_SCOPE(PreprocessingAndStartPipeline)
_inferRequest->inferPreprocess();
_inferRequest->startPipeline();
}},
{_waitExecutor, [this] {
IE_PROFILING_AUTO_SCOPE(WaitPipeline)
_inferRequest->waitPipeline();
}},
{cpuTaskExecutor, [this] {
IE_PROFILING_AUTO_SCOPE(Postprocessing)
_inferRequest->inferPostprocess();
}}
};
}
}
// ! [async_infer_request:ctor]

View File

@@ -9,10 +9,12 @@
#include <ie_util_internal.hpp>
#include <ie_plugin_config.hpp>
#include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
#include <file_utils.h>
#include <cpp_interfaces/exception2status.hpp>
#include "template_config.hpp"
#include "template/template_config.hpp"
using namespace TemplatePlugin;
@@ -20,12 +22,22 @@ Configuration::Configuration() { }
Configuration::Configuration(const ConfigMap& config, const Configuration & defaultCfg, bool throwOnUnsupported) {
*this = defaultCfg;
// If plugin needs to use InferenceEngine::StreamsExecutor it should be able to process its configuration
auto streamExecutorConfigKeys = _streamsExecutorConfig.SupportedKeys();
for (auto&& c : config) {
const auto& key = c.first;
const auto& value = c.second;
if (CONFIG_KEY(DEVICE_ID) == key) {
if (TEMPLATE_CONFIG_KEY(THROUGHPUT_STREAMS) == key) {
_streamsExecutorConfig.SetConfig(CONFIG_KEY(CPU_THROUGHPUT_STREAMS), value);
} else if (streamExecutorConfigKeys.end() !=
std::find(std::begin(streamExecutorConfigKeys), std::end(streamExecutorConfigKeys), key)) {
_streamsExecutorConfig.SetConfig(key, value);
} else if (CONFIG_KEY(DEVICE_ID) == key) {
deviceId = std::stoi(value);
if (deviceId > 0) {
THROW_IE_EXCEPTION << "Device ID " << deviceId << " is not supported";
}
} else if (CONFIG_KEY(PERF_COUNT) == key) {
perfCount = (CONFIG_VALUE(YES) == value);
} else if (throwOnUnsupported) {
@@ -39,6 +51,14 @@ InferenceEngine::Parameter Configuration::Get(const std::string& name) const {
return {std::to_string(deviceId)};
} else if (name == CONFIG_KEY(PERF_COUNT)) {
return {perfCount};
} else if (name == TEMPLATE_CONFIG_KEY(THROUGHPUT_STREAMS) || name == CONFIG_KEY(CPU_THROUGHPUT_STREAMS)) {
return {std::to_string(_streamsExecutorConfig._streams)};
} else if (name == CONFIG_KEY(CPU_BIND_THREAD)) {
return const_cast<InferenceEngine::IStreamsExecutor::Config&>(_streamsExecutorConfig).GetConfig(name);
} else if (name == CONFIG_KEY(CPU_THREADS_NUM)) {
return {std::to_string(_streamsExecutorConfig._threads)};
} else if (name == CONFIG_KEY_INTERNAL(CPU_THREADS_PER_STREAM)) {
return {std::to_string(_streamsExecutorConfig._threadsPerStream)};
} else {
THROW_IE_EXCEPTION << NOT_FOUND_str << ": " << name;
}

View File

@@ -11,6 +11,8 @@
#include <ie_parameter.hpp>
#include <threading/ie_istreams_executor.hpp>
namespace TemplatePlugin {
template<typename T>
@@ -34,6 +36,7 @@ struct Configuration {
int deviceId = 0;
bool perfCount = true;
InferenceEngine::IStreamsExecutor::Config _streamsExecutorConfig;
};
// ! [configuration:header]

View File

@@ -16,40 +16,31 @@
#include <threading/ie_executor_manager.hpp>
#include <details/ie_cnn_network_tools.h>
#include <ngraph/ngraph.hpp>
#include <transformations/common_optimizations/common_optimizations.hpp>
#include "template/template_config.hpp"
#include "template_plugin.hpp"
#include "template_executable_network.hpp"
#include "template_pattern_transformation.hpp"
using namespace TemplatePlugin;
// ! [executable_network:ctor_cnnnetwork]
TemplatePlugin::ExecutableNetwork::ExecutableNetwork(InferenceEngine::ICNNNetwork& network,
const Configuration& cfg):
_name(network.getName()),
TemplatePlugin::ExecutableNetwork::ExecutableNetwork(const std::shared_ptr<ngraph::Function>& function,
const Configuration& cfg,
const Plugin::Ptr& plugin) :
InferenceEngine::ExecutableNetworkThreadSafeDefault(nullptr, nullptr), // Disable default threads creation
_cfg(cfg),
_waitExecutor(InferenceEngine::ExecutorManager::getInstance()->getExecutor("Template")) {
_plugin(plugin),
_function(function) {
// TODO: if your plugin supports device ID (more that single instance of device can be on host machine)
// you should select proper device based on KEY_DEVICE_ID or automatic behavior
// In this case, _waitExecutor should also be created per device.
try {
if (std::shared_ptr<const ngraph::Function> ngraphFunction = network.getFunction()) {
CompileGraph(ngraphFunction);
} else {
THROW_IE_EXCEPTION << "TEMPLATE plugin can compile only IR v10 networks";
}
}
catch (const InferenceEngineException & e) {
throw e;
}
catch (const std::exception & e) {
CompileGraph();
InitExecutor();
} catch (const InferenceEngineException&) {
throw;
} catch (const std::exception & e) {
THROW_IE_EXCEPTION << "Standard exception from compilation library: " << e.what();
}
catch (...) {
} catch (...) {
THROW_IE_EXCEPTION << "Generic exception is thrown";
}
}
@@ -57,53 +48,53 @@ TemplatePlugin::ExecutableNetwork::ExecutableNetwork(InferenceEngine::ICNNNetwor
// ! [executable_network:ctor_import_stream]
TemplatePlugin::ExecutableNetwork::ExecutableNetwork(std::istream & model,
const Configuration& cfg) :
_cfg(cfg) {
const Configuration& cfg,
const Plugin::Ptr& plugin) :
_cfg(cfg),
_plugin(plugin) {
// TODO: since Import network is not a mandatory functionality, this ctor can just be removed
}
// ! [executable_network:ctor_import_stream]
// ! [executable_network:compile_graph]
void TemplatePlugin::ExecutableNetwork::CompileGraph(const std::shared_ptr<const ngraph::Function> & ngraphFunction) {
void TemplatePlugin::ExecutableNetwork::CompileGraph() {
// TODO: perform actual graph compilation taking `_cfg` into account
// 1.Copy ngraph::Function first to apply some transformations later in
// ExecutableNetwork::CompileGraph, which modify original ngraph::Function
const bool shareConsts = false, constFolding = false;
std::vector<::ngraph::element::Type> new_types;
std::vector<::ngraph::PartialShape> new_shapes;
for (const auto &parameter : ngraphFunction->get_parameters()) {
new_shapes.emplace_back(parameter->get_partial_shape());
new_types.emplace_back(parameter->get_element_type());
// Generate backend specific blob mappings. For example Inference Engine uses not ngraph::Result nodes friendly name
// as inference request output names but the name of the layer before.
for (auto&& result : _function->get_results()) {
auto previousOutput = result->get_input_source_output(0);
auto outputName = previousOutput.get_node()->get_friendly_name();
if (previousOutput.get_node()->get_output_size() > 1) {
outputName += '.' + std::to_string(previousOutput.get_index());
}
_outputIndex.emplace(outputName, _function->get_result_index(result));
}
for (auto&& parameter : _function->get_parameters()) {
_inputIndex.emplace(parameter->get_friendly_name(), _function->get_parameter_index(parameter));
}
auto copyFunction = ngraph::specialize_function(std::const_pointer_cast<ngraph::Function>(ngraphFunction),
new_types, new_shapes, std::vector<void *>(new_types.size(), nullptr), constFolding, shareConsts);
// 2. Perform common optimizations and device-specific transformations
ngraph::pass::Manager passManager;
// Example: register CommonOptimizations transformation from transformations library
passManager.register_pass<ngraph::pass::CommonOptimizations>();
// Example: register plugin specific transformation
passManager.register_pass<ngraph::pass::DecomposeDivideMatcher>();
passManager.register_pass<ngraph::pass::ReluReluFusionMatcher>();
// Register any other transformations
// ..
// After `run_passes`, we have the transformed function, where operations match device operations,
// and we can create device hardware-dependent graph
passManager.run_passes(copyFunction);
// 3. Iterate over operations and create hardware-specific ngraph
for (const auto& op : copyFunction->get_ordered_ops()) {
// TODO: map ngraph `op` to device operation
}
// 4. Perform any other steps like allocation and filling device buffers, and so on
// Perform any other steps like allocation and filling device buffers, and so on
}
// ! [executable_network:compile_graph]
// ! [executable_network:init_executor]
void TemplatePlugin::ExecutableNetwork::InitExecutor() {
// Default mutlitthreaded configuration is balanced for throughtput and latency cases and takes into account
// real hardware cores and NUMA nodes.
auto streamsExecutorConfig = InferenceEngine::IStreamsExecutor::Config::MakeDefaultMultiThreaded(_cfg._streamsExecutorConfig);
streamsExecutorConfig._name = "TemplateStreamsExecutor";
// As Inference Engine CPU Streams Executor creates some additional therads
// it is better to avoid threads recreateion as some OSs memory allocator can not manage such usage cases
// and memory consumption can be larger than it is expected.
// So Inference Engone provides executors cache.
_taskExecutor = ExecutorManager::getInstance()->getIdleCPUStreamsExecutor(streamsExecutorConfig);
// NOTE: callback Executor is not configured. So callback will be called in the thread of tha last stage of inference request pipeline
// _callbackExecutor = ExecutorManager::getInstance()->getIdleCPUStreamsExecutor({"TemplateCallbackExecutor"});
}
// ! [executable_network:init_executor]
// ! [executable_network:create_infer_request_impl]
InferenceEngine::InferRequestInternal::Ptr TemplatePlugin::ExecutableNetwork::CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs,
InferenceEngine::OutputsDataMap networkOutputs) {
@@ -115,7 +106,7 @@ InferenceEngine::InferRequestInternal::Ptr TemplatePlugin::ExecutableNetwork::Cr
void TemplatePlugin::ExecutableNetwork::CreateInferRequest(IInferRequest::Ptr& asyncRequest) {
auto internalRequest = CreateInferRequestImpl(_networkInputs, _networkOutputs);
auto asyncThreadSafeImpl = std::make_shared<TemplateAsyncInferRequest>(std::static_pointer_cast<TemplateInferRequest>(internalRequest),
_taskExecutor, _waitExecutor, _callbackExecutor);
_taskExecutor, _plugin->_waitExecutor, _callbackExecutor);
asyncRequest.reset(new InferenceEngine::InferRequestBase<TemplateAsyncInferRequest>(asyncThreadSafeImpl),
[](InferenceEngine::IInferRequest *p) { p->Release(); });
asyncThreadSafeImpl->SetPointerToPublicInterface(asyncRequest);
@@ -124,13 +115,7 @@ void TemplatePlugin::ExecutableNetwork::CreateInferRequest(IInferRequest::Ptr& a
// ! [executable_network:get_config]
void TemplatePlugin::ExecutableNetwork::GetConfig(const std::string &name, Parameter &result, ResponseDesc *resp) const {
// TODO: return more supported values for config keys
if (name == CONFIG_KEY(DEVICE_ID) ||
name == CONFIG_KEY(PERF_COUNT)) {
result = _cfg.Get(name);
} else {
THROW_IE_EXCEPTION << "Unsupported ExecutableNetwork config key: " << name;
}
result = _cfg.Get(name);
}
// ! [executable_network:get_config]
@@ -144,14 +129,20 @@ void TemplatePlugin::ExecutableNetwork::GetMetric(const std::string &name, Infer
METRIC_KEY(SUPPORTED_CONFIG_KEYS),
METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)});
} else if (METRIC_KEY(SUPPORTED_CONFIG_KEYS) == name) {
result = IE_SET_METRIC(SUPPORTED_CONFIG_KEYS, std::vector<std::string>{
std::vector<std::string> configKeys = {
CONFIG_KEY(DEVICE_ID),
CONFIG_KEY(PERF_COUNT)});
CONFIG_KEY(PERF_COUNT),
TEMPLATE_CONFIG_KEY(THROUGHPUT_STREAMS) };
auto streamExecutorConfigKeys = IStreamsExecutor::Config{}.SupportedKeys();
for (auto&& configKey : streamExecutorConfigKeys) {
configKeys.emplace_back(configKey);
}
result = IE_SET_METRIC(SUPPORTED_CONFIG_KEYS, configKeys);
} else if (METRIC_KEY(NETWORK_NAME) == name) {
result = IE_SET_METRIC(NETWORK_NAME, _name);
auto networkName = _function->get_friendly_name();
result = IE_SET_METRIC(NETWORK_NAME, networkName);
} else if (METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS) == name) {
// TODO: fill with actual number
unsigned int value = 1;
unsigned int value = _cfg._streamsExecutorConfig._streams;
result = IE_SET_METRIC(OPTIMAL_NUMBER_OF_INFER_REQUESTS, value);
} else {
THROW_IE_EXCEPTION << "Unsupported ExecutableNetwork metric: " << name;

View File

@@ -27,7 +27,7 @@
namespace TemplatePlugin {
class Engine;
class Plugin;
/**
* @class ExecutableNetwork
@@ -36,11 +36,13 @@ class Engine;
// ! [executable_network:header]
class ExecutableNetwork : public InferenceEngine::ExecutableNetworkThreadSafeDefault {
public:
ExecutableNetwork(InferenceEngine::ICNNNetwork& network,
const Configuration& cfg);
ExecutableNetwork(const std::shared_ptr<ngraph::Function>& function,
const Configuration& cfg,
const std::shared_ptr<Plugin>& plugin);
ExecutableNetwork(std::istream & model,
const Configuration& cfg);
ExecutableNetwork(std::istream& model,
const Configuration& cfg,
const std::shared_ptr<Plugin>& plugin);
~ExecutableNetwork() override = default;
@@ -53,15 +55,18 @@ public:
void GetMetric(const std::string &name, InferenceEngine::Parameter &result, InferenceEngine::ResponseDesc *resp) const override;
void GetConfig(const std::string &name, InferenceEngine::Parameter &result, InferenceEngine::ResponseDesc *resp) const override;
std::atomic<std::size_t> _requestId = {0};
std::string _name;
Configuration _cfg;
private:
void CompileGraph(const std::shared_ptr<const ngraph::Function> & ngraphFunction);
friend class TemplateInferRequest;
std::shared_ptr<Engine> _plugin;
InferenceEngine::ITaskExecutor::Ptr _waitExecutor;
void CompileGraph();
void InitExecutor();
std::atomic<std::size_t> _requestId = {0};
Configuration _cfg;
std::shared_ptr<Plugin> _plugin;
std::shared_ptr<ngraph::Function> _function;
std::map<std::string, std::size_t> _inputIndex;
std::map<std::string, std::size_t> _outputIndex;
};
// ! [executable_network:header]

View File

@@ -18,17 +18,16 @@
#include <ie_parallel.hpp>
#include <ie_memcpy.h>
#include <precision_utils.h>
#include <template/template_config.hpp>
#include "template/template_config.hpp"
#include "template_infer_request.hpp"
#include "template_executable_network.hpp"
#include "template_plugin.hpp"
using namespace TemplatePlugin;
using namespace InferenceEngine;
using Time = std::chrono::high_resolution_clock;
using ns = std::chrono::nanoseconds;
using fsec = std::chrono::duration<float>;
// ! [infer_request:ctor]
TemplateInferRequest::TemplateInferRequest(const InferenceEngine::InputsDataMap& networkInputs,
@@ -38,10 +37,9 @@ TemplateInferRequest::TemplateInferRequest(const InferenceEngine::InputsDataMap&
_executableNetwork(executableNetwork) {
// TODO: allocate infer request device and host buffers if needed, fill actual list of profiling tasks
auto requestID = std::to_string(_executableNetwork->_requestId);
_executableNetwork->_requestId++;
auto requestID = std::to_string(_executableNetwork->_requestId.fetch_add(1));
std::string name = _executableNetwork->_name + "_Req" + requestID;
std::string name = _executableNetwork->_function->get_friendly_name() + "_Req" + requestID;
_profilingTask = { {
{ ProfilingTask("Template" + std::to_string(_executableNetwork->_cfg.deviceId) + "_" + name + "_Preprocess") },
{ ProfilingTask("Template" + std::to_string(_executableNetwork->_cfg.deviceId) + "_" + name + "_Postprocess") },
@@ -49,9 +47,12 @@ TemplateInferRequest::TemplateInferRequest(const InferenceEngine::InputsDataMap&
{ ProfilingTask("Template" + std::to_string(_executableNetwork->_cfg.deviceId) + "_" + name + "_WaitPipline") },
} };
_executable = _executableNetwork->_plugin->_backend->compile(_executableNetwork->_function);
_parameters = _executableNetwork->_function->get_parameters();
_results = _executableNetwork->_function->get_results();
allocateDeviceBuffers();
allocateInputBlobs();
allocateOutputBlobs();
allocateBlobs();
}
// ! [infer_request:ctor]
@@ -62,92 +63,66 @@ TemplateInferRequest::~TemplateInferRequest() {
// ! [infer_request:dtor]
void TemplateInferRequest::allocateDeviceBuffers() {
// TODO: allocate device buffers if Template device is a remote one
// Allocate plugin backend specific memory handles
_inputTensors.resize(_networkInputs.size());
_outputTensors.resize(_networkOutputs.size());
}
void TemplateInferRequest::allocateInputBlobs() {
for (auto &networkInput : _networkInputs) {
SizeVector dims = networkInput.second->getTensorDesc().getDims();
Precision precision = networkInput.second->getTensorDesc().getPrecision();
Layout input_layout = networkInput.second->getInputData()->getLayout();
Blob::Ptr inputBlob;
Blob::Ptr inputBlobNCHW;
template<typename BlobDataMap, typename GetNetworkPrecisionF>
static void AllocateImpl(const BlobDataMap& blobDataMap,
BlobMap& blobMap,
BlobMap& networkBlobMap,
GetNetworkPrecisionF&& GetNetworkPrecision) {
for (auto&& blobData : blobDataMap) {
auto& dims = blobData.second->getTensorDesc().getDims();
auto& precision = blobData.second->getTensorDesc().getPrecision();
auto layout = blobData.second->getTensorDesc().getLayout();
Blob::Ptr blob;
switch (precision) {
case Precision::FP32 :
inputBlobNCHW = inputBlob = InferenceEngine::make_shared_blob<float>({ precision, dims, input_layout });
if (input_layout == Layout::NHWC) {
inputBlobNCHW = InferenceEngine::make_shared_blob<float>({ precision, dims, Layout::NCHW });
}
break;
case Precision::FP16 :
case Precision::I16 :
inputBlobNCHW = inputBlob = InferenceEngine::make_shared_blob<int16_t>({ precision, dims, input_layout });
if (input_layout == Layout::NHWC) {
inputBlobNCHW = InferenceEngine::make_shared_blob<int16_t>({ precision, dims, Layout::NCHW });
}
break;
case Precision::U8 :
inputBlobNCHW = inputBlob = InferenceEngine::make_shared_blob<uint8_t>({ precision, dims, input_layout });
if (input_layout == Layout::NHWC) {
inputBlobNCHW = InferenceEngine::make_shared_blob<uint8_t>({ precision, dims, Layout::NCHW });
}
break;
default:
THROW_IE_EXCEPTION << "Unsupported network precision: " << precision
<< precision << "! Supported precisions are: FP32, FP16, I16, U8";
case Precision::U8: {
blob = InferenceEngine::make_shared_blob<std::uint8_t>({precision, dims, layout});
} break;
case Precision::FP32 : {
blob = InferenceEngine::make_shared_blob<float>({precision, dims, layout});
} break;
default: THROW_IE_EXCEPTION << "Template Plugin: Unsupported Input/Output Presision";
}
// allocate the input blob
inputBlob->allocate();
_inputs[networkInput.first] = inputBlob;
if (inputBlobNCHW != inputBlob) {
inputBlobNCHW->allocate();
blob->allocate();
blobMap[blobData.first] = blob;
auto networkPresion = GetNetworkPrecision(blobData.first);
Blob::Ptr networkBlob;
switch (networkPresion) {
case ngraph::element::Type_t::f32 : {
if (precision == Precision::FP32) {
networkBlob = blob;
} else {
networkBlob = InferenceEngine::make_shared_blob<float>({Precision::FP32, dims, layout});
}
} break;
default: THROW_IE_EXCEPTION << "Template Plugin: Unsupported network Input/Output Presision";
}
_inputsNCHW[networkInput.first] = inputBlobNCHW;
if (blob != networkBlob) {
networkBlob->allocate();
}
networkBlobMap[blobData.first] = networkBlob;
}
}
void TemplateInferRequest::allocateOutputBlobs() {
for (auto &networkOutput : _networkOutputs) {
SizeVector dims = networkOutput.second->getTensorDesc().getDims();
Precision precision = networkOutput.second->getPrecision();
Blob::Ptr outputBlob;
// allocate the output blob
Blob::Ptr outputBlobNCHW;
switch (precision) {
case Precision::FP32 :
outputBlobNCHW = outputBlob = InferenceEngine::make_shared_blob<float>({ precision, dims, networkOutput.second->getLayout() });
if (networkOutput.second->getLayout() == Layout::NHWC) {
outputBlobNCHW = InferenceEngine::make_shared_blob<float>({ precision, dims, Layout::NCHW });
}
break;
case Precision::FP16 :
outputBlobNCHW = outputBlob = InferenceEngine::make_shared_blob<int16_t>({ precision, dims, networkOutput.second->getLayout() });
if (networkOutput.second->getLayout() == Layout::NHWC) {
outputBlobNCHW = InferenceEngine::make_shared_blob<int16_t>({ precision, dims, Layout::NCHW });
}
break;
default:
THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str << "Unsupported output precision: "
<< precision << "! Supported precisions are: FP32, FP16";
}
// allocate the output blob
outputBlob->allocate();
_outputs[networkOutput.first] = outputBlob;
if (outputBlobNCHW != outputBlob) {
outputBlobNCHW->allocate();
}
_outputsNCHW[networkOutput.first] = outputBlobNCHW;
}
if (_networkOutputs.empty() || _networkInputs.empty()) {
THROW_IE_EXCEPTION << "Internal error: no information about network's output/input";
}
void TemplateInferRequest::allocateBlobs() {
auto&& parameters = _executableNetwork->_function->get_parameters();
AllocateImpl(_networkInputs, _inputs, _networkInputBlobs, [&] (const std::string& blobName) {
return parameters.at(_executableNetwork->_inputIndex.at(blobName))->get_element_type();
});
auto&& results = _executableNetwork->_function->get_results();
AllocateImpl(_networkOutputs, _outputs, _networkOutputBlobs, [&] (const std::string& blobName) {
return results.at(_executableNetwork->_outputIndex.at(blobName))->get_element_type();
});
}
// ! [infer_request:infer_impl]
void TemplateInferRequest::InferImpl() {
// TODO: fill with actual list of pipeline stages, which are executed syncronously for sync infer requests
// TODO: fill with actual list of pipeline stages, which are executed synchronously for sync infer requests
inferPreprocess();
startPipeline();
waitPipeline();
@@ -155,50 +130,109 @@ void TemplateInferRequest::InferImpl() {
}
// ! [infer_request:infer_impl]
// ! [infer_request:infer_preprocess]
void TemplateInferRequest::inferPreprocess() {
auto prev = Time::now();
template<typename SrcT, typename DstT>
static void blobCopy(const Blob::Ptr& src, const Blob::Ptr& dst) {
std::copy_n(InferenceEngine::as<InferenceEngine::MemoryBlob>(src)->rmap().as<const SrcT*>(),
src->size(),
InferenceEngine::as<InferenceEngine::MemoryBlob>(dst)->wmap().as<DstT*>());
}
// execute input pre-processing.
InferRequestInternal::execDataPreprocessing(_inputs);
for (auto &input : InferRequestInternal::_inputs) {
auto& src = input.second;
auto& dst = _inputsNCHW[input.first];
if (src != dst) {
if (src->getTensorDesc().getPrecision() == dst->getTensorDesc().getPrecision()
&& src->getTensorDesc().getDims() == dst->getTensorDesc().getDims()
&& src->getTensorDesc().getLayout() == dst->getTensorDesc().getLayout()) {
_inputsNCHW[input.first] = input.second;
} else { // Convert Layout to NCHW
InferenceEngine::blob_copy(src, dst);
static void blobCopy(const Blob::Ptr& src, const Blob::Ptr& dst) {
switch (src->getTensorDesc().getPrecision()) {
case Precision::U8 : {
switch (dst->getTensorDesc().getPrecision()) {
case Precision::U8 : break;
case Precision::FP32 : {
blobCopy<std::uint8_t, float>(src, dst);
} break;
default : {
THROW_IE_EXCEPTION << "Unsupported precision conversion from "
<< src->getTensorDesc().getPrecision() <<" to " << dst->getTensorDesc().getPrecision();
}
}
} break;
case Precision::FP32 : {
switch (dst->getTensorDesc().getPrecision()) {
case Precision::FP32 : break;
case Precision::U8 : {
blobCopy<float, std::uint8_t>(src, dst);
} break;
default : {
THROW_IE_EXCEPTION << "Unsupported precision conversion from "
<< src->getTensorDesc().getPrecision() <<" to " << dst->getTensorDesc().getPrecision();
}
}
} break;
default : {
THROW_IE_EXCEPTION << "Unsupported precision conversion from " << src->getTensorDesc().getPrecision();
}
}
}
// TODO: Preprocessing on inputs if needed: work _inputsNCHW
_inputPreprocessTime = static_cast<double>(std::chrono::duration_cast<ns>(Time::now() - prev).count());
// ! [infer_request:infer_preprocess]
void TemplateInferRequest::inferPreprocess() {
IE_PROFILING_AUTO_SCOPE_TASK(_profilingTask[Preprocess]);
auto start = Time::now();
// NOTE: After InferRequestInternal::execDataPreprocessing call
// input can points to other memory region than it was allocated in constructor.
InferRequestInternal::execDataPreprocessing(_inputs);
for (auto&& input : _inputs) {
auto inputBlob = input.second;
auto networkInput = _networkInputBlobs[input.first];
if (inputBlob->getTensorDesc().getPrecision() == networkInput->getTensorDesc().getPrecision()) {
networkInput = inputBlob;
} else {
blobCopy(inputBlob, networkInput);
}
auto index = _executableNetwork->_inputIndex[input.first];
const auto& parameter = _parameters[index];
const auto& parameterShape = parameter->get_shape();
const auto& parameterType = parameter->get_element_type();
_inputTensors[index] = _executableNetwork->_plugin->_backend->create_tensor(parameterType, parameterShape,
InferenceEngine::as<InferenceEngine::MemoryBlob>(networkInput)->rmap().as<void*>());
}
for (auto&& output : _outputs) {
auto outputBlob = output.second;
auto networkOutput = _networkOutputBlobs[output.first];
auto index = _executableNetwork->_outputIndex[output.first];
if (outputBlob->getTensorDesc().getPrecision() == networkOutput->getTensorDesc().getPrecision()) {
networkOutput = outputBlob;
}
const auto& result = _results[index];
const auto& resultShape = result->get_shape();
const auto& resultType = result->get_element_type();
_outputTensors[index] = _executableNetwork->_plugin->_backend->create_tensor(resultType, resultShape,
InferenceEngine::as<InferenceEngine::MemoryBlob>(networkOutput)->wmap().as<void*>());
}
_durations[Preprocess] = Time::now() - start;
}
// ! [infer_request:infer_preprocess]
void TemplateInferRequest::startPipeline() {
IE_PROFILING_AUTO_SCOPE_TASK(_profilingTask[StartPipeline])
// TODO: Start pipeline and fill _inputTransferTime, _executeTime, _outputTransferTime
auto start = Time::now();
_executable->call(_outputTensors, _inputTensors);
_durations[StartPipeline] = Time::now() - start;
}
void TemplateInferRequest::waitPipeline() {
IE_PROFILING_AUTO_SCOPE_TASK(_profilingTask[WaitPipeline])
auto prev = Time::now();
// TODO: Wait pipeline using driver API or other synronizations methods
_inputPreprocessTime = static_cast<double>(std::chrono::duration_cast<ns>(Time::now() - prev).count());
auto start = Time::now();
// TODO: Wait pipeline using driver API or other synchronizations methods
_durations[WaitPipeline] = Time::now() - start;
}
void TemplateInferRequest::inferPostprocess() {
IE_PROFILING_AUTO_SCOPE_TASK(_profilingTask[Postprocess])
auto prev = Time::now();
// TODO: perform post-processing and convert to NHWC layout
_outputPostProcessTime = static_cast<double>(std::chrono::duration_cast<ns>(Time::now() - prev).count());
IE_PROFILING_AUTO_SCOPE_TASK(_profilingTask[Postprocess]);
auto start = Time::now();
for (auto&& output : _outputs) {
auto outputBlob = output.second;
auto networkOutput = _networkOutputBlobs[output.first];
if (outputBlob->getTensorDesc().getPrecision() != networkOutput->getTensorDesc().getPrecision()) {
blobCopy(networkOutput, outputBlob);
}
}
_durations[Postprocess] = Time::now() - start;
}
// ! [infer_request:get_performance_counts]
@@ -206,18 +240,19 @@ void TemplateInferRequest::GetPerformanceCounts(std::map<std::string, InferenceE
InferenceEngineProfileInfo info;
info.execution_index = 0;
info.status = InferenceEngineProfileInfo::EXECUTED;
info.cpu_uSec = info.realTime_uSec = _inputPreprocessTime / 1000;
info.cpu_uSec = info.realTime_uSec = _durations[Preprocess].count();
perfMap["1. input preprocessing"] = info;
info.cpu_uSec = 0;
info.realTime_uSec = _inputTransferTime / 1000;
info.realTime_uSec = 0;
perfMap["2. input transfer to a device"] = info;
info.cpu_uSec = 0;
info.realTime_uSec = _executeTime / 1000;
info.status = InferenceEngineProfileInfo::EXECUTED;
info.cpu_uSec = info.realTime_uSec = _durations[StartPipeline].count();
perfMap["3. execution time"] = info;
info.cpu_uSec = 0;
info.realTime_uSec = _outputTransferTime / 1000;
info.realTime_uSec = 0;
perfMap["4. output transfer from a device"] = info;
info.cpu_uSec = info.realTime_uSec = _outputPostProcessTime / 1000;
info.cpu_uSec = info.realTime_uSec = _durations[Postprocess].count();
perfMap["5. output postprocessing"] = info;
}
// ! [infer_request:get_performance_counts]

View File

@@ -17,8 +17,13 @@
#include <cpp_interfaces/impl/ie_executable_network_internal.hpp>
#include <threading/ie_itask_executor.hpp>
#include <ngraph/runtime/tensor.hpp>
#include <ngraph/runtime/tensor.hpp>
#include <executable.hpp>
#include "template_config.hpp"
namespace TemplatePlugin {
class ExecutableNetwork;
@@ -46,8 +51,7 @@ public:
private:
void allocateDeviceBuffers();
void allocateInputBlobs();
void allocateOutputBlobs();
void allocateBlobs();
enum {
Preprocess,
@@ -57,17 +61,18 @@ private:
numOfStages
};
std::array<InferenceEngine::ProfilingTask, numOfStages> _profilingTask;
std::array<InferenceEngine::ProfilingTask, numOfStages> _profilingTask;
// for performance counters
std::array<std::chrono::duration<float, std::micro>, numOfStages> _durations;
InferenceEngine::BlobMap _inputsNCHW;
InferenceEngine::BlobMap _outputsNCHW;
InferenceEngine::BlobMap _networkInputBlobs;
InferenceEngine::BlobMap _networkOutputBlobs;
ngraph::ParameterVector _parameters;
ngraph::ResultVector _results;
// for performance counts
double _inputPreprocessTime = 0.0;
double _inputTransferTime = 0.0;
double _executeTime = 0.0;
double _outputTransferTime = 0.0;
double _outputPostProcessTime = 0.0;
std::vector<std::shared_ptr<ngraph::runtime::Tensor>> _inputTensors;
std::vector<std::shared_ptr<ngraph::runtime::Tensor>> _outputTensors;
std::shared_ptr<ngraph::runtime::Executable> _executable;
};
// ! [infer_request:header]

View File

@@ -24,11 +24,17 @@
#include <ie_input_info.hpp>
#include <ie_layouts.h>
#include <hetero/hetero_plugin_config.hpp>
#include <template/template_config.hpp>
#include <backend.hpp>
#include <ngraph/specialize_function.hpp>
#include <ngraph/pass/manager.hpp>
#include <ngraph/opsets/opset.hpp>
#include <transformations/common_optimizations/common_optimizations.hpp>
#include <transformations/rt_info/fused_names_attribute.hpp>
#include "template/template_config.hpp"
#include "template_plugin.hpp"
#include "template_executable_network.hpp"
#include "template_infer_request.hpp"
#include "template_pattern_transformation.hpp"
using namespace TemplatePlugin;
@@ -36,9 +42,61 @@ using namespace TemplatePlugin;
Plugin::Plugin() {
// TODO: fill with actual device name
_pluginName = "TEMPLATE";
ngraph::runtime::Backend::set_backend_shared_library_search_directory("");
_backend = ngraph::runtime::Backend::create("INTERPRETER");
_waitExecutor = ExecutorManager::getInstance()->getIdleCPUStreamsExecutor({"TemplateWaitExecutor"});
}
// ! [plugin:ctor]
// ! [plugin:dtor]
Plugin::~Plugin() {
// Plugin should remove executors from executor cache to avoid threads number growth in the whole application
ExecutorManager::getInstance()->clear("TemplateStreamsExecutor");
ExecutorManager::getInstance()->clear("TemplateWaitExecutor");
// NOTE: Uncomment this if Inference Engine Executor cache is used to create callback executor
// ExecutorManager::getInstance()->clear("TemplateCallbackExecutor");
}
// ! [plugin:dtor]
// ! [plugin:transform]
std::shared_ptr<ngraph::Function> Plugin::Transform(const std::shared_ptr<const ngraph::Function>& function) {
// 1.Copy ngraph::Function first to apply some transformations which modify original ngraph::Function
const bool shareConsts = false, constFolding = false;
std::vector<::ngraph::element::Type> new_types;
std::vector<::ngraph::PartialShape> new_shapes;
for (const auto &parameter : function->get_parameters()) {
new_shapes.emplace_back(parameter->get_partial_shape());
new_types.emplace_back(parameter->get_element_type());
}
auto copyFunction = ngraph::specialize_function(std::const_pointer_cast<ngraph::Function>(function),
new_types, new_shapes, std::vector<void *>(new_types.size(), nullptr), constFolding, shareConsts);
copyFunction->set_friendly_name(function->get_friendly_name());
// 2. Perform common optimizations and device-specific transformations
ngraph::pass::Manager passManager;
// Example: register CommonOptimizations transformation from transformations library
passManager.register_pass<ngraph::pass::CommonOptimizations>();
// Example: register plugin specific transformation
passManager.register_pass<ngraph::pass::DecomposeDivideMatcher>();
passManager.register_pass<ngraph::pass::ReluReluFusionMatcher>();
// Register any other transformations
// ..
// After `run_passes`, we have the transformed function, where operations match device operations,
// and we can create device hardware-dependent graph
passManager.run_passes(copyFunction);
// 3. Iterate over operations and create hardware-specific ngraph
for (const auto& op : copyFunction->get_ordered_ops()) {
// TODO: map ngraph `op` to device operation
}
return copyFunction;
}
// ! [plugin:transform]
// ! [plugin:load_exe_network_impl]
InferenceEngine::ExecutableNetworkInternal::Ptr Plugin::LoadExeNetworkImpl(const InferenceEngine::ICNNNetwork & network,
const ConfigMap &config) {
@@ -72,9 +130,12 @@ InferenceEngine::ExecutableNetworkInternal::Ptr Plugin::LoadExeNetworkImpl(const
}
}
auto clonedNetwork = cloneNet(network);
auto function = network.getFunction();
if (function == nullptr) {
THROW_IE_EXCEPTION << "TEMPLATE plugin can compile only IR v10 networks";
}
return std::make_shared<ExecutableNetwork>(*clonedNetwork, cfg);
return std::make_shared<ExecutableNetwork>(Transform(function), cfg, std::static_pointer_cast<Plugin>(shared_from_this()));
}
// ! [plugin:load_exe_network_impl]
@@ -90,7 +151,7 @@ InferenceEngine::ExecutableNetwork Plugin::ImportNetworkImpl(std::istream& model
auto cfg = Configuration(config, exportedCfg);
IExecutableNetwork::Ptr executableNetwork;
auto exec_network_impl = std::make_shared<ExecutableNetwork>(model, cfg);
auto exec_network_impl = std::make_shared<ExecutableNetwork>(model, cfg, std::static_pointer_cast<Plugin>(shared_from_this()));
executableNetwork.reset(new ExecutableNetworkBase<ExecutableNetworkInternal>(exec_network_impl),
[](InferenceEngine::details::IRelease *p) {p->Release(); });
@@ -101,19 +162,42 @@ InferenceEngine::ExecutableNetwork Plugin::ImportNetworkImpl(std::istream& model
// ! [plugin:query_network]
void Plugin::QueryNetwork(const ICNNNetwork &network, const ConfigMap& config, QueryNetworkResult &res) const {
Configuration cfg{config, _cfg, false};
res.rc = StatusCode::OK;
if (std::shared_ptr<const ngraph::Function> ngraphFunction = network.getFunction()) {
auto ops = ngraphFunction->get_ordered_ops();
for (auto&& op : ops) {
// TODO: investigate if an op is actually supported by Template device
bool supported = true;
if (supported) {
res.supportedLayersMap.insert({ op->get_friendly_name(), GetName() });
auto function = network.getFunction();
if (function == nullptr) {
THROW_IE_EXCEPTION << "Template Plugin supports only ngraph cnn network representation";
}
// First of all we should store initial input operation set
std::unordered_set<std::string> originalOps;
for (auto&& node : function->get_ops()) {
originalOps.emplace(node->get_friendly_name());
}
// It is needed to apply all transformations as it is done in LoadExeNetworkImpl
auto transformedFunction = Transform(function);
// The same input node can be transformed into supported and unsupported backend node
// So we need store as supported ether unsupported node sets
std::unordered_set<std::string> supported;
std::unordered_set<std::string> unsupported;
auto opset = ngraph::get_opset4();
for (auto&& node : transformedFunction->get_ops()) {
if (!ngraph::op::is_constant(node) && !ngraph::op::is_parameter(node) && !ngraph::op::is_output(node)) {
// Extract transformation history from transformed node as list of nodes
for (auto&& fusedLayerName : ngraph::getFusedNamesVector(node)) {
// Filter just nodes from original operation set
if (contains(originalOps, fusedLayerName)) {
if (opset.contains_type_insensitive(fusedLayerName)) {
supported.emplace(fusedLayerName);
} else {
unsupported.emplace(fusedLayerName);
}
}
}
}
} else {
THROW_IE_EXCEPTION << "TEMPLATE plugin can query only IR v10 networks";
}
// The result set should contains just nodes from supported set
for (auto&& layerName : supported) {
if (!contains(unsupported, layerName)) {
res.supportedLayersMap.emplace(layerName, GetName());
}
}
}
// ! [plugin:query_network]
@@ -148,10 +232,17 @@ InferenceEngine::Parameter Plugin::GetMetric(const std::string& name, const std:
METRIC_KEY(RANGE_FOR_ASYNC_INFER_REQUESTS) };
IE_SET_METRIC_RETURN(SUPPORTED_METRICS, supportedMetrics);
} else if (METRIC_KEY(SUPPORTED_CONFIG_KEYS) == name) {
std::vector<std::string> confiKeys = {
std::vector<std::string> configKeys = {
CONFIG_KEY(DEVICE_ID),
CONFIG_KEY(PERF_COUNT) };
IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, confiKeys);
CONFIG_KEY(PERF_COUNT),
TEMPLATE_CONFIG_KEY(THROUGHPUT_STREAMS)};
auto streamExecutorConfigKeys = IStreamsExecutor::Config{}.SupportedKeys();
for (auto&& configKey : streamExecutorConfigKeys) {
if (configKey != InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS) {
configKeys.emplace_back(configKey);
}
}
IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, configKeys);
} else if (METRIC_KEY(AVAILABLE_DEVICES) == name) {
// TODO: fill list of available devices
std::vector<std::string> availableDevices = { "" };
@@ -161,7 +252,7 @@ InferenceEngine::Parameter Plugin::GetMetric(const std::string& name, const std:
IE_SET_METRIC_RETURN(FULL_DEVICE_NAME, name);
} else if (METRIC_KEY(OPTIMIZATION_CAPABILITIES) == name) {
// TODO: fill actual list of supported capabilities: e.g. Template device supports only FP32
std::vector<std::string> capabilities = { METRIC_VALUE(FP32), TEMPLATE_METRIC_VALUE(HARDWARE_CONVOLUTION) };
std::vector<std::string> capabilities = { METRIC_VALUE(FP32) /*, TEMPLATE_METRIC_VALUE(HARDWARE_CONVOLUTION)*/ };
IE_SET_METRIC_RETURN(OPTIMIZATION_CAPABILITIES, capabilities);
} else if (METRIC_KEY(RANGE_FOR_ASYNC_INFER_REQUESTS) == name) {
// TODO: fill with actual values

View File

@@ -17,6 +17,8 @@
#include "template_executable_network.hpp"
#include "template_config.hpp"
#include "backend.hpp"
//! [plugin:header]
namespace TemplatePlugin {
@@ -25,7 +27,7 @@ public:
using Ptr = std::shared_ptr<Plugin>;
Plugin();
~Plugin() override = default;
~Plugin() override;
void SetConfig(const std::map<std::string, std::string> &config) override;
void QueryNetwork(const InferenceEngine::ICNNNetwork &network,
@@ -40,7 +42,14 @@ public:
InferenceEngine::ExecutableNetwork ImportNetworkImpl(std::istream& model, const std::map<std::string, std::string>& config) override;
private:
Configuration _cfg;
friend class ExecutableNetwork;
friend class TemplateInferRequest;
static std::shared_ptr<ngraph::Function> Transform(const std::shared_ptr<const ngraph::Function>& function);
Configuration _cfg;
std::shared_ptr<ngraph::runtime::Backend> _backend;
InferenceEngine::ITaskExecutor::Ptr _waitExecutor;
};
} // namespace TemplatePlugin

View File

@@ -16,3 +16,5 @@ addIeTargetTest(
LABELS
TEMPLATE
)
target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include)

View File

@@ -5,6 +5,7 @@
#include "multi-device/multi_device_config.hpp"
#include "behavior/config.hpp"
#include <template/template_config.hpp>
using namespace BehaviorTestsDefinitions;
namespace {
@@ -14,14 +15,20 @@ namespace {
};
const std::vector<std::map<std::string, std::string>> configs = {
{}
{{TEMPLATE_CONFIG_KEY(THROUGHPUT_STREAMS), InferenceEngine::PluginConfigParams::CPU_THROUGHPUT_AUTO}},
{{TEMPLATE_CONFIG_KEY(THROUGHPUT_STREAMS), InferenceEngine::PluginConfigParams::CPU_THROUGHPUT_NUMA}},
{{TEMPLATE_CONFIG_KEY(THROUGHPUT_STREAMS), "8"}},
};
const std::vector<std::map<std::string, std::string>> inconfigs = {
{{TEMPLATE_CONFIG_KEY(THROUGHPUT_STREAMS), "OFF"}},
};
INSTANTIATE_TEST_CASE_P(smoke_BehaviorTests, IncorrectConfigTests,
::testing::Combine(
::testing::ValuesIn(netPrecisions),
::testing::Values("TEMPLATE"),
::testing::ValuesIn(configs)),
::testing::ValuesIn(inconfigs)),
IncorrectConfigTests::getTestCaseName);
@@ -29,7 +36,7 @@ namespace {
::testing::Combine(
::testing::ValuesIn(netPrecisions),
::testing::Values("TEMPLATE"),
::testing::ValuesIn(configs)),
::testing::ValuesIn(inconfigs)),
IncorrectConfigAPITests::getTestCaseName);

View File

@@ -0,0 +1,115 @@
// Copyright (C) 2019 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include "single_layer_tests/convolution.hpp"
#include "common_test_utils/test_constants.hpp"
using namespace LayerTestsDefinitions;
namespace {
const std::vector<InferenceEngine::Precision> netPrecisions = {
InferenceEngine::Precision::FP32,
};
/* ============= 2D Convolution ============= */
const std::vector<std::vector<size_t >> kernels = {{3, 3},
{3, 5}};
const std::vector<std::vector<size_t >> strides = {{1, 1},
{1, 3}};
const std::vector<std::vector<ptrdiff_t>> padBegins = {{0, 0},
{0, 3}};
const std::vector<std::vector<ptrdiff_t>> padEnds = {{0, 0},
{0, 3}};
const std::vector<std::vector<size_t >> dilations = {{1, 1},
{3, 1}};
const std::vector<size_t> numOutChannels = {1, 5};
const std::vector<ngraph::op::PadType> padTypes = {
ngraph::op::PadType::EXPLICIT,
ngraph::op::PadType::VALID
};
const auto conv2DParams_ExplicitPadding = ::testing::Combine(
::testing::ValuesIn(kernels),
::testing::ValuesIn(strides),
::testing::ValuesIn(padBegins),
::testing::ValuesIn(padEnds),
::testing::ValuesIn(dilations),
::testing::ValuesIn(numOutChannels),
::testing::Values(ngraph::op::PadType::EXPLICIT)
);
const auto conv2DParams_AutoPadValid = ::testing::Combine(
::testing::ValuesIn(kernels),
::testing::ValuesIn(strides),
::testing::Values(std::vector<ptrdiff_t>({0, 0})),
::testing::Values(std::vector<ptrdiff_t>({0, 0})),
::testing::ValuesIn(dilations),
::testing::ValuesIn(numOutChannels),
::testing::Values(ngraph::op::PadType::VALID)
);
INSTANTIATE_TEST_CASE_P(Convolution2D_ExplicitPadding, ConvolutionLayerTest,
::testing::Combine(
conv2DParams_ExplicitPadding,
::testing::ValuesIn(netPrecisions),
::testing::Values(std::vector<size_t >({1, 3, 30, 30})),
::testing::Values("TEMPLATE")),
ConvolutionLayerTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(Convolution2D_AutoPadValid, ConvolutionLayerTest,
::testing::Combine(
conv2DParams_AutoPadValid,
::testing::ValuesIn(netPrecisions),
::testing::Values(std::vector<size_t >({1, 3, 30, 30})),
::testing::Values("TEMPLATE")),
ConvolutionLayerTest::getTestCaseName);
/* ============= 3D Convolution ============= */
const std::vector<std::vector<size_t >> kernels3d = {{3, 3, 3},
{3, 5, 3}};
const std::vector<std::vector<ptrdiff_t>> paddings3d = {{0, 0, 0},
{0, 2, 0}};
const std::vector<std::vector<size_t >> strides3d = {{1, 1, 1},
{1, 2, 1}};
const std::vector<std::vector<size_t >> dilations3d = {{1, 1, 1},
{1, 2, 1}};
const auto conv3DParams_ExplicitPadding = ::testing::Combine(
::testing::ValuesIn(kernels3d),
::testing::ValuesIn(strides3d),
::testing::ValuesIn(paddings3d),
::testing::ValuesIn(paddings3d),
::testing::ValuesIn(dilations3d),
::testing::Values(5),
::testing::Values(ngraph::op::PadType::EXPLICIT)
);
const auto conv3DParams_AutoPadValid = ::testing::Combine(
::testing::ValuesIn(kernels3d),
::testing::ValuesIn(strides3d),
::testing::Values(std::vector<ptrdiff_t>({0, 0, 0})),
::testing::Values(std::vector<ptrdiff_t>({0, 0, 0})),
::testing::ValuesIn(dilations3d),
::testing::Values(5),
::testing::Values(ngraph::op::PadType::VALID)
);
INSTANTIATE_TEST_CASE_P(Convolution3D_ExplicitPadding, ConvolutionLayerTest,
::testing::Combine(
conv3DParams_ExplicitPadding,
::testing::ValuesIn(netPrecisions),
::testing::Values(std::vector<size_t >({1, 3, 10, 10, 10})),
::testing::Values("TEMPLATE")),
ConvolutionLayerTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(Convolution3D_AutoPadValid, ConvolutionLayerTest,
::testing::Combine(
conv3DParams_AutoPadValid,
::testing::ValuesIn(netPrecisions),
::testing::Values(std::vector<size_t >({1, 3, 10, 10, 10})),
::testing::Values("TEMPLATE")),
ConvolutionLayerTest::getTestCaseName);
} // namespace

View File

@@ -0,0 +1,36 @@
// Copyright (C) 2019 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include "single_layer_tests/reshape.hpp"
#include "common_test_utils/test_constants.hpp"
using namespace LayerTestsDefinitions;
namespace {
const std::vector<InferenceEngine::Precision> netPrecisions = {
InferenceEngine::Precision::FP32,
};
INSTANTIATE_TEST_CASE_P(ReshapeCheckDynBatch, ReshapeLayerTest,
::testing::Combine(
::testing::Values(true),
::testing::ValuesIn(netPrecisions),
::testing::Values(std::vector<size_t>({30, 30, 30, 30})),
::testing::Values(std::vector<size_t>({30, 30, 30, 30})),
::testing::Values("TEMPLATE"),
::testing::Values(std::map<std::string, std::string>({}))),
ReshapeLayerTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(ReshapeCheck, ReshapeLayerTest,
::testing::Combine(
::testing::Values(true),
::testing::ValuesIn(netPrecisions),
::testing::Values(std::vector<size_t>({10, 10, 10, 10})),
::testing::Values(std::vector<size_t>({10, 0, 100})),
::testing::Values("TEMPLATE"),
::testing::Values(std::map<std::string, std::string>({}))),
ReshapeLayerTest::getTestCaseName);
} // namespace

View File

@@ -0,0 +1,72 @@
// Copyright (C) 2019 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include "single_layer_tests/softmax.hpp"
#include "common_test_utils/test_constants.hpp"
using namespace LayerTestsDefinitions;
namespace {
const std::vector<InferenceEngine::Precision> netPrecisions = {
InferenceEngine::Precision::FP32,
};
const std::vector<InferenceEngine::Layout> inputLayouts2D = {
InferenceEngine::Layout::NC,
};
const std::vector<InferenceEngine::SizeVector> inputShapes2D = {
InferenceEngine::SizeVector {1, 100},
InferenceEngine::SizeVector {100, 1},
InferenceEngine::SizeVector {10, 10},
};
const std::vector<size_t> axis2D = {
0, 1
};
const auto params2D = testing::Combine(
testing::ValuesIn(netPrecisions),
testing::ValuesIn(inputLayouts2D),
testing::ValuesIn(inputShapes2D),
testing::ValuesIn(axis2D),
testing::Values("TEMPLATE"),
testing::Values(std::map<std::string, std::string>())
);
INSTANTIATE_TEST_CASE_P(
SoftMax2D,
SoftMaxLayerTest,
params2D,
SoftMaxLayerTest::getTestCaseName
);
const std::vector<InferenceEngine::SizeVector> inputShapes4D = {
InferenceEngine::SizeVector {1, 100, 1, 1},
InferenceEngine::SizeVector {1, 3, 4, 3},
InferenceEngine::SizeVector {2, 3, 4, 5},
};
const std::vector<size_t> axis4D = {0, 1, 2, 3};
const auto params4D = testing::Combine(
testing::ValuesIn(netPrecisions),
testing::Values(InferenceEngine::Layout::NCHW),
testing::ValuesIn(inputShapes4D),
testing::ValuesIn(axis4D),
testing::Values("TEMPLATE"),
testing::Values(std::map<std::string, std::string>())
);
INSTANTIATE_TEST_CASE_P(
SoftMax4D,
SoftMaxLayerTest,
params4D,
SoftMaxLayerTest::getTestCaseName
);
} // namespace

View File

@@ -9,5 +9,9 @@
std::vector<std::string> disabledTestPatterns() {
return {
".*ExclusiveAsyncRequests.*",
".*reusableCPUStreamsExecutor.*",
".*registerPlugin.*",
".*IEClassGetAvailableDevices.*"
};
}

View File

@@ -18,7 +18,7 @@
using namespace testing;
// ! [transformation:test]
TEST(TransformationTests, TemplateTest) {
TEST(TransformationTests, DISABLED_TemplateTest) {
std::shared_ptr<ngraph::Function> f, f_ref;
// f - ngraph::Function for applying transformation
// f_ref - ngraph::Function that is expected after applying transformation