[IE CLDNN] NGraph integration into cldnn plugin (#2506)

Co-authored-by: Roman Lyamin <roman.lyamin@intel.com>
Co-authored-by: Mikhail Letavin <mikhail.letavin@intel.com>
This commit is contained in:
Vladimir Paramuzov 2020-12-23 13:35:44 +03:00 committed by GitHub
parent bd9bbe09c3
commit 241b0faea1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
159 changed files with 8828 additions and 9773 deletions

View File

@ -11,7 +11,7 @@ if (LINUX)
endif()
endif()
file(GLOB MAIN_SRC ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp)
file(GLOB_RECURSE MAIN_SRC ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp)
file(GLOB LIBRARY_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/*.h)
addVersionDefines(cldnn_engine.cpp CI_BUILD_NUMBER CLDNN_VERSION)
@ -22,9 +22,10 @@ ie_add_plugin(NAME ${TARGET_NAME}
VERSION_DEFINES_FOR cldnn_engine.cpp)
target_link_libraries(${TARGET_NAME} PRIVATE clDNN_lib pugixml
inference_engine inference_engine_legacy
inference_engine
inference_engine_transformations
inference_engine_lp_transformations)
inference_engine_lp_transformations
${NGRAPH_LIBRARIES})
set(CLDNN_TOP_FOLDER "${IE_MAIN_SOURCE_DIR}/thirdparty/clDNN")
target_include_directories(${TARGET_NAME} PRIVATE

View File

@ -9,20 +9,10 @@
#include <cpp_interfaces/exception2status.hpp>
#include <api/layout.hpp>
using namespace InferenceEngine;
using namespace InferenceEngine::details;
#include "ngraph/type/element_type.hpp"
namespace CLDNNPlugin {
#ifndef NDEBUG
#define THROW_CLDNN_EXCEPTION(desc)\
do { \
InferenceEngineException ex(__FILE__, __LINE__);\
std::cout << desc << "\n---\nException detected at " << __FILE__ << ":" << \
__LINE__ << " (" << __FUNCTION__ << ")\n---\n" << std::endl; THROW_IE_EXCEPTION << desc; } while (0);
#else
#define THROW_CLDNN_EXCEPTION(desc) THROW_IE_EXCEPTION << desc;
#endif // NDEBUG
#define TensorValue(val) static_cast<cldnn::tensor::value_type>(val)
const auto CldnnTensorFromIEDims = [](const InferenceEngine::SizeVector& dims, int def = 1) {
@ -34,33 +24,57 @@ const auto CldnnTensorFromIEDims = [](const InferenceEngine::SizeVector& dims, i
case 4: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(dims[3], dims[2]));
case 5: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(dims[4], dims[3], dims[2]));
case 6: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(dims[5], dims[4], dims[3], dims[2]));
default: THROW_CLDNN_EXCEPTION("Invalid dimensions size(" << dims.size() << ") for clDNN tensor");
default: THROW_IE_EXCEPTION << "Invalid dimensions size(" << dims.size() << ") for clDNN tensor";
}
};
inline cldnn::data_types DataTypeFromPrecision(InferenceEngine::Precision p) {
switch (p) {
case Precision::I16:
case Precision::U16:
case Precision::FP32:
case InferenceEngine::Precision::I16:
case InferenceEngine::Precision::U16:
case InferenceEngine::Precision::FP32:
return cldnn::data_types::f32;
case Precision::FP16:
case InferenceEngine::Precision::FP16:
return cldnn::data_types::f16;
case Precision::U8:
case InferenceEngine::Precision::U8:
return cldnn::data_types::u8;
case Precision::I8:
case InferenceEngine::Precision::I8:
return cldnn::data_types::i8;
case Precision::I32:
case InferenceEngine::Precision::I32:
return cldnn::data_types::i32;
case Precision::I64:
case InferenceEngine::Precision::I64:
return cldnn::data_types::i64;
case Precision::BIN:
case InferenceEngine::Precision::BIN:
return cldnn::data_types::bin;
case Precision::BOOL:
case InferenceEngine::Precision::BOOL:
return cldnn::data_types::i8;
default:
THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str << "The plugin does not support " << p.name() << " precision";
break;
}
}
inline cldnn::data_types DataTypeFromPrecision(ngraph::element::Type t) {
switch (t) {
case ngraph::element::Type_t::i16:
case ngraph::element::Type_t::u16:
case ngraph::element::Type_t::f32:
return cldnn::data_types::f32;
case ngraph::element::Type_t::f16:
return cldnn::data_types::f16;
case ngraph::element::Type_t::u8:
return cldnn::data_types::u8;
case ngraph::element::Type_t::i8:
return cldnn::data_types::i8;
case ngraph::element::Type_t::i32:
return cldnn::data_types::i32;
case ngraph::element::Type_t::i64:
return cldnn::data_types::i64;
case ngraph::element::Type_t::boolean:
return cldnn::data_types::i8;
case ngraph::element::Type_t::u1:
return cldnn::data_types::bin;
default:
THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str << "The plugin does not support " << t.get_type_name()<< " precision";
}
}
@ -81,7 +95,6 @@ inline cldnn::format FormatFromLayout(InferenceEngine::Layout l) {
return cldnn::format::byxf;
default:
THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str << "The plugin does not support " << l << " layout";
break;
}
}
@ -107,7 +120,6 @@ inline cldnn::format FormatFromTensorDesc(InferenceEngine::TensorDesc desc) {
return cldnn::format::byxf;
default:
THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str << "The plugin does not support " << desc.getLayout() << " layout";
break;
}
}
@ -124,12 +136,11 @@ inline cldnn::format ImageFormatFromLayout(InferenceEngine::Layout l) {
return cldnn::format::nv12;
default:
THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str << "The plugin does not support " << l << " image layout";
break;
}
}
inline cldnn::format defaultFormatForDims(size_t dimensions) {
inline cldnn::format DefaultFormatForDims(size_t dimensions) {
switch (dimensions) {
case 0:
case 1:
@ -142,7 +153,7 @@ inline cldnn::format defaultFormatForDims(size_t dimensions) {
case 6:
return cldnn::format::bfwzyx;
default:
THROW_CLDNN_EXCEPTION("Unsupported number of dimensions: " << dimensions);
THROW_IE_EXCEPTION << "Unsupported number of dimensions: " << dimensions;
}
return cldnn::format::bfyx; // Should not get here

View File

@ -7,6 +7,7 @@
#include <cldnn/cldnn_config.hpp>
#include "cldnn_config.h"
#include "cpp_interfaces/exception2status.hpp"
#include "details/ie_exception.hpp"
#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
#include "ie_api.h"
#include "file_utils.h"

View File

@ -6,11 +6,6 @@
#include <map>
#include <string>
#include <vector>
#include "ie_blob.h"
#include "cpp/ie_cnn_network.h"
#include "debug_options.h"
#include "cldnn_custom_layer.h"

View File

@ -4,7 +4,6 @@
#include <limits>
#include <algorithm>
#include <string>
#include <map>
#include <vector>
@ -12,62 +11,86 @@
#include <cmath>
#include <tuple>
#include <cctype>
#include <memory>
#include "ie_metric_helpers.hpp"
#include <ie_data.h>
#include <cpp/ie_cnn_network.h>
#include <description_buffer.hpp>
#include <memory>
#include "ie_plugin_config.hpp"
#include "caseless.hpp"
#include <legacy/details/ie_cnn_network_tools.h>
#include <ngraph/opsets/opset2.hpp>
#include <ngraph/opsets/opset3.hpp>
#include <ngraph/opsets/opset4.hpp>
#include <ngraph/opsets/opset5.hpp>
#include <ngraph/pass/manager.hpp>
#include <ngraph/pass/constant_folding.hpp>
#include <generic_ie.hpp>
#include <transformations/control_flow/unroll_tensor_iterator.hpp>
#include <transformations/common_optimizations/common_optimizations.hpp>
#include <transformations/opset_conversions/convert_opset2_to_opset1.hpp>
#include <ie_ngraph_utils.hpp>
#include <transformations/opset_conversions/convert_opset3_to_opset2.hpp>
#include <transformations/opset_conversions/convert_opset2_to_opset1.hpp>
#include <transformations/control_flow/unroll_tensor_iterator.hpp>
#include <transformations/common_optimizations/common_optimizations.hpp>
#include <transformations/op_conversions/convert_depth_to_space.hpp>
#include <transformations/op_conversions/convert_space_to_depth.hpp>
#include <transformations/op_conversions/convert_gelu.hpp>
#include <transformations/op_conversions/convert_mod.hpp>
#include <transformations/op_conversions/reduce_l1_decomposition.hpp>
#include <transformations/op_conversions/reduce_l2_decomposition.hpp>
#include <transformations/op_conversions/convert_pad_to_group_conv.hpp>
#include <transformations/op_conversions/softplus_decomposition.hpp>
#include <transformations/op_conversions/convert_space_to_batch.hpp>
#include <transformations/op_conversions/convert_batch_to_space.hpp>
#include <transformations/op_conversions/convert_reduce_to_pooling.hpp>
#include <transformations/op_conversions/convert_shuffle_channels3.hpp>
#include <transformations/op_conversions/hswish_decomposition.hpp>
#include <transformations/op_conversions/hsigmoid_decomposition.hpp>
#include <transformations/op_conversions/log_softmax_decomposition.hpp>
#include <transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp>
#include <transformations/op_conversions/convert_ti_to_sequences.hpp>
#include <transformations/op_conversions/gru_cell_decomposition.hpp>
#include <transformations/op_conversions/lstm_cell_decomposition.hpp>
#include <transformations/op_conversions/rnn_cell_decomposition.hpp>
#include <transformations/init_node_info.hpp>
#include <transformations/op_conversions/bidirectional_sequences_decomposition.hpp>
#include <transformations/op_conversions/convert_previous_nms_to_nms_5.hpp>
#include <transformations/op_conversions/convert_nms_to_nms_ie_internal.hpp>
#include <transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp>
#include <transformations/convert_precision.hpp>
#include <transformations/init_node_info.hpp>
#include <transformations/rt_info/fused_names_attribute.hpp>
#include <legacy/transformations/convert_opset1_to_legacy/convert_opset1_to_legacy.hpp>
#include <legacy/transformations/convert_opset1_to_legacy/convert_prior_to_ie_prior.hpp>
#include <legacy/transformations/convert_opset1_to_legacy/convert_nms_5_to_legacy.hpp>
#include <legacy/transformations/convert_opset1_to_legacy/reshape_fully_connected.hpp>
#include <legacy/convert_function_to_cnn_network.hpp>
#include <legacy/ie_util_internal.hpp>
#include <legacy/graph_transformer.h>
#include <low_precision/transformer.hpp>
#include <low_precision/mat_mul.hpp>
#include "cldnn_engine.h"
#include "cldnn_executable_network.h"
#include "cldnn_custom_layer.h"
#include <low_precision/transformer.hpp>
#include <low_precision/mat_mul.hpp>
#ifdef __linux__
# include <dlfcn.h>
#endif
using InferenceEngine::DescriptionBuffer;
using InferenceEngine::TBlob;
using InferenceEngine::Blob;
using namespace InferenceEngine;
using namespace InferenceEngine::gpu;
using namespace InferenceEngine::details;
namespace CLDNNPlugin {
#define FACTORY_DECLARATION(op_version, op_name) \
void __register ## _ ## op_name ## _ ## op_version();
#define FACTORY_CALL(op_version, op_name) \
__register ## _ ## op_name ## _ ## op_version();
#define REGISTER_FACTORY(op_version, op_name) FACTORY_DECLARATION(op_version, op_name)
#include "cldnn_primitives_list.hpp"
#undef REGISTER_FACTORY
void clDNNEngine::RegisterPrimitives() {
#define REGISTER_FACTORY(op_version, op_name) FACTORY_CALL(op_version, op_name)
#include "cldnn_primitives_list.hpp"
#undef REGISTER_FACTORY
}
struct clDNNEngine::impl {
CLDNNPlugin::Config m_config;
};
@ -85,205 +108,197 @@ cldnn::device_info clDNNEngine::GetDeviceInfo(const std::map<std::string, std::s
return device_info;
}
InferenceEngine::ICNNNetwork::Ptr clDNNEngine::CloneAndTransformNetwork(const InferenceEngine::ICNNNetwork& network, CLDNNPlugin::Config config) const {
std::shared_ptr<ICNNNetwork> clonedNetwork = cloneNetwork(network);
bool baselineIsFP16 = false;
template<typename T>
static bool disableReduceDecomposition(const std::shared_ptr<const ngraph::Node> node) {
if (auto op = std::dynamic_pointer_cast<const T>(node)) {
auto reduction_axes = op->get_reduction_axes().to_vector();
bool reduce_along_f = op->get_reduction_axes().size() == 1 && std::count(reduction_axes.begin(), reduction_axes.end(), 1) != 0;
bool fp16_batch_not_1 = op->get_element_type() == ngraph::element::f16 && op->input(0).get_shape()[0] != 1;
bool can_use_reduce = !reduce_along_f && !fp16_batch_not_1;
return can_use_reduce;
}
return false;
}
if (clonedNetwork->getFunction()) {
const auto transformations_callback = [](const std::shared_ptr<const ::ngraph::Node> &node) -> bool {
// Reshape->Permute->Reshape pattern in theory can change output rank, so this check is added to be sure
// that the following primitives will be handled correctly
// DepthToSpace node implementation supports only equal input/output tensors with rank <= 5
if (auto dtsOp = std::dynamic_pointer_cast<const ::ngraph::opset3::DepthToSpace>(node)) {
return dtsOp->input_value(0).get_shape().size() <= 5lu && dtsOp->input_value(0).get_shape().size() == dtsOp->get_output_shape(0).size();
}
InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const InferenceEngine::CNNNetwork& network,
const CLDNNPlugin::Config& config) const {
CNNNetwork clonedNetwork = InferenceEngine::details::cloneNetwork(network);
// SpaceToDepth node implementation supports only equal input/output tensors with rank <= 5
if (auto stdOp = std::dynamic_pointer_cast<const ::ngraph::opset3::SpaceToDepth>(node)) {
return stdOp->input_value(0).get_shape().size() <= 5lu && stdOp->input_value(0).get_shape().size() == stdOp->get_output_shape(0).size();
}
// Reduce node implementation with reduce along features performs better with Reshape->Pooling->Reshape pattern
// Reshape->Pooling->Reshape scenario is also more optimal in case when batch > 1 and network precission is FP16
if (auto redOp = std::dynamic_pointer_cast<const ::ngraph::opset1::ReduceMean>(node)) {
auto reduction_axes = redOp->get_reduction_axes().to_vector();
bool reduce_along_f = redOp->get_reduction_axes().size() == 1 && std::count(reduction_axes.begin(), reduction_axes.end(), 1) != 0;
bool fp16_batch_not_1 = redOp->get_element_type() == ngraph::element::f16 && redOp->input(0).get_shape()[0] != 1;
bool can_use_reduce = !reduce_along_f && !fp16_batch_not_1;
return can_use_reduce;
}
if (auto redOp = std::dynamic_pointer_cast<const ::ngraph::opset1::ReduceMax>(node)) {
auto reduction_axes = redOp->get_reduction_axes().to_vector();
bool reduce_along_f = redOp->get_reduction_axes().size() == 1 && std::count(reduction_axes.begin(), reduction_axes.end(), 1) != 0;
bool fp16_batch_not_1 = redOp->get_element_type() == ngraph::element::f16 && redOp->input(0).get_shape()[0] != 1;
bool can_use_reduce = !reduce_along_f && !fp16_batch_not_1;
return can_use_reduce;
}
if (auto redOp = std::dynamic_pointer_cast<const ::ngraph::opset1::ReduceSum>(node)) {
auto reduction_axes = redOp->get_reduction_axes().to_vector();
bool reduce_along_f = redOp->get_reduction_axes().size() == 1 && std::count(reduction_axes.begin(), reduction_axes.end(), 1) != 0;
bool fp16_batch_not_1 = redOp->get_element_type() == ngraph::element::f16 && redOp->input(0).get_shape()[0] != 1;
bool can_use_reduce = !reduce_along_f && !fp16_batch_not_1;
return can_use_reduce;
}
if (auto add_op = std::dynamic_pointer_cast<const ngraph::opset1::Add>(node)) {
return ngraph::is_type<ngraph::opset1::Convolution>(add_op->get_input_node_shared_ptr(0)) ||
ngraph::is_type<ngraph::opset1::GroupConvolution>(add_op->get_input_node_shared_ptr(0)) ||
ngraph::is_type<ngraph::opset1::MatMul>(add_op->get_input_node_shared_ptr(0));
}
return std::dynamic_pointer_cast<const ::ngraph::opset2::Gelu>(node) ||
std::dynamic_pointer_cast<const ::ngraph::opset3::ShuffleChannels>(node) ||
std::dynamic_pointer_cast<const ::ngraph::opset2::BatchToSpace>(node) ||
std::dynamic_pointer_cast<const ::ngraph::opset2::SpaceToBatch>(node) ||
std::dynamic_pointer_cast<const ::ngraph::opset5::HSigmoid>(node) ||
std::dynamic_pointer_cast<const ::ngraph::opset4::HSwish>(node) ||
std::dynamic_pointer_cast<const ::ngraph::opset4::ReduceL1>(node) ||
std::dynamic_pointer_cast<const ::ngraph::opset4::ReduceL2>(node) ||
std::dynamic_pointer_cast<const ::ngraph::opset4::SoftPlus>(node) ||
std::dynamic_pointer_cast<const ::ngraph::opset5::LogSoftmax>(node);
};
auto nGraphFunc = clonedNetwork->getFunction();
if (clonedNetwork.getFunction()) {
auto nGraphFunc = clonedNetwork.getFunction();
// Disable shape inference (WA for generic operations)
::ngraph::op::GenericIE::DisableReshape noReshape(nGraphFunc);
ngraph::op::GenericIE::DisableReshape noReshape(nGraphFunc);
bool enableInt8;
{
// Note: instead of running all Conversion Transformations you can make up your own transformation pipeline
ngraph::pass::Manager manager;
using const_node_ptr = const std::shared_ptr<const ngraph::Node>;
const auto& pass_config = manager.get_pass_config();
manager.register_pass<ngraph::pass::InitNodeInfo>();
// WA: ConvertPriorBox must be executed before the 1st ConstantFolding pass
manager.register_pass<ngraph::pass::ConvertPriorBox>();
manager.register_pass<ngraph::pass::ConvertNMS5ToLegacyMatcher>();
manager.register_pass<ngraph::pass::CommonOptimizations>();
manager.register_pass<ngraph::pass::ConvertRNNSequenceToTensorIterator>();
manager.register_pass<ngraph::pass::ConvertGRUSequenceToTensorIterator>();
manager.register_pass<ngraph::pass::ConvertLSTMSequenceToTensorIterator>();
manager.register_pass<ngraph::pass::ConvertOpSet3ToOpSet2>();
manager.register_pass<ngraph::pass::ConvertOpSet2ToOpSet1>();
manager.register_pass<ngraph::pass::ConvertTensorIteratorToGRUSequence>();
manager.register_pass<ngraph::pass::ConvertTensorIteratorToLSTMSequence>();
manager.register_pass<ngraph::pass::ConvertTensorIteratorToRNNSequence>();
manager.register_pass<ngraph::pass::LSTMCellDecomposition>();
manager.register_pass<ngraph::pass::GRUCellDecomposition>();
manager.register_pass<ngraph::pass::RNNCellDecomposition>();
manager.register_pass<ngraph::pass::BidirectionalLSTMSequenceDecomposition>();
manager.register_pass<ngraph::pass::BidirectionalGRUSequenceDecomposition>();
manager.register_pass<ngraph::pass::BidirectionalRNNSequenceDecomposition>();
manager.register_pass<ngraph::pass::ConvertNMS1ToNMS5>();
manager.register_pass<ngraph::pass::ConvertNMS3ToNMS5>();
manager.register_pass<ngraph::pass::ConvertNMS4ToNMS5>();
manager.register_pass<ngraph::pass::ConvertNMSToNMSIEInternal>();
manager.set_callback(transformations_callback);
std::vector<std::pair<ngraph::element::Type, ngraph::element::Type>> convert_precision_list {
{ngraph::element::i64, ngraph::element::i32},
{ngraph::element::u64, ngraph::element::i32},
{ngraph::element::u16, ngraph::element::i32},
{ngraph::element::u32, ngraph::element::i32},
{ngraph::element::boolean, ngraph::element::u8},
};
for (auto & precision : convert_precision_list) {
manager.register_pass<ngraph::pass::ConvertPrecision>(precision.first, precision.second);
}
auto pass_config = manager.get_pass_config();
using const_node_ptr = const std::shared_ptr<const ngraph::Node>;
// SpaceToDepth/DepthToSpace node implementation supports only equal input/output tensors with rank <= 5
pass_config->set_callback<ngraph::pass::ConvertSpaceToDepth,
ngraph::pass::ConvertDepthToSpace>(
[](const_node_ptr &node) -> bool {
return node->input_value(0).get_shape().size() <= 5lu &&
node->input_value(0).get_shape().size() == node->get_output_shape(0).size();
});
pass_config->set_callback<ngraph::pass::ConvertBatchToSpace,
ngraph::pass::ConvertSpaceToBatch>(
[](const_node_ptr &node) -> bool {
const auto & rank = node->input(0).get_partial_shape().rank().get_length();
return rank <= 5lu;
});
pass_config->set_callback<ngraph::pass::ConvertReduceSumToPooling>(
[](const_node_ptr &node) -> bool {
return disableReduceDecomposition<ngraph::opset1::ReduceSum>(node);
});
pass_config->set_callback<ngraph::pass::ConvertReduceMeanToPooling>(
[](const_node_ptr &node) -> bool {
return disableReduceDecomposition<ngraph::opset1::ReduceMean>(node);
});
pass_config->set_callback<ngraph::pass::ConvertReduceMaxToPooling>(
[](const_node_ptr &node) -> bool {
return disableReduceDecomposition<ngraph::opset1::ReduceMax>(node);
});
auto isCellPrimitiveSupported = [](const_node_ptr &node) -> bool {
if (const auto &rnn_cell = std::dynamic_pointer_cast<const ngraph::opset4::RNNCell>(node)) {
if (std::dynamic_pointer_cast<const ngraph::op::v0::RNNCell>(node) || std::dynamic_pointer_cast<const ngraph::op::v5::RNNSequence>(node)) {
return false;
} else if (const auto &gru_cell = std::dynamic_pointer_cast<const ngraph::opset4::GRUCell>(
node)) {
} else if (std::dynamic_pointer_cast<const ngraph::op::v3::GRUCell>(node) ||
std::dynamic_pointer_cast<const ngraph::op::v5::GRUSequence>(node)) {
return false;
} else if (const auto &lstm_cell = std::dynamic_pointer_cast<const ngraph::opset4::LSTMCell>(
node)) {
return lstm_cell->get_clip() == 0.0f &&
lstm_cell->get_activations() == std::vector<std::string>{"sigmoid", "tanh", "tanh"};
} else if (const auto &lstm_cell_v1 = std::dynamic_pointer_cast<const ngraph::opset1::LSTMCell>(
node)) {
return lstm_cell_v1->get_clip() == 0.0f &&
lstm_cell_v1->get_activations() == std::vector<std::string>{"sigmoid", "tanh", "tanh"};
} else if (const auto &lstm_cell = std::dynamic_pointer_cast<const ngraph::op::v4::LSTMCell>(node)) {
return lstm_cell->get_clip() == 0.0f && lstm_cell->get_activations() == std::vector<std::string>{"sigmoid", "tanh", "tanh"};
} else if (const auto &lstm_cell_v1 = std::dynamic_pointer_cast<const ngraph::op::v0::LSTMCell>(node)) {
return lstm_cell_v1->get_clip() == 0.0f && lstm_cell_v1->get_activations() == std::vector<std::string>{"sigmoid", "tanh", "tanh"};
} else if (const auto &lstm_sequence = std::dynamic_pointer_cast<const ngraph::op::v5::LSTMSequence>(node)) {
return lstm_sequence->get_clip() == 0.0f && lstm_sequence->get_activations() == std::vector<std::string>{"sigmoid", "tanh", "tanh"};
}
return false;
};
pass_config->set_callback<ngraph::pass::RNNCellDecomposition, ngraph::pass::GRUCellDecomposition,
ngraph::pass::LSTMCellDecomposition>(
[isCellPrimitiveSupported](const_node_ptr &node) -> bool {
return isCellPrimitiveSupported(node);
});
pass_config->set_callback<ngraph::pass::ConvertRNNSequenceToTensorIterator,
ngraph::pass::ConvertGRUSequenceToTensorIterator,
ngraph::pass::ConvertLSTMSequenceToTensorIterator,
ngraph::pass::RNNCellDecomposition,
ngraph::pass::GRUCellDecomposition,
ngraph::pass::LSTMCellDecomposition>(
[isCellPrimitiveSupported](const_node_ptr &node) -> bool {
return isCellPrimitiveSupported(node);
});
pass_config->set_callback<ngraph::pass::ConvertTensorIteratorToRNNSequence,
ngraph::pass::ConvertTensorIteratorToLSTMSequence,
ngraph::pass::ConvertTensorIteratorToGRUSequence>(
[isCellPrimitiveSupported](const_node_ptr &node) -> bool {
if (const auto& ti_op = std::dynamic_pointer_cast<const ngraph::op::TensorIterator>(node)) {
size_t count_rnn = 0;
for (const auto &op : ti_op->get_body()->get_ops())
count_rnn += isCellPrimitiveSupported(op);
return count_rnn != 1;
}
return true;
});
manager.run_passes(nGraphFunc);
enableInt8 = config.enableInt8 && ngraph::pass::low_precision::LowPrecisionTransformer::isFunctionQuantized(nGraphFunc);
if (enableInt8) {
const auto fp16_callback = [&baselineIsFP16](const std::shared_ptr<const ::ngraph::Node> &node) -> bool {
if (!baselineIsFP16 && node->get_output_element_type(0) == ngraph::element::f16) {
baselineIsFP16 = true;
ngraph::pass::ConvertTensorIteratorToLSTMSequence,
ngraph::pass::ConvertTensorIteratorToGRUSequence>(
[isCellPrimitiveSupported](const_node_ptr &node) -> bool {
if (const auto& ti_op = std::dynamic_pointer_cast<const ngraph::op::TensorIterator>(node)) {
size_t count_rnn = 0;
for (const auto &op : ti_op->get_body()->get_ops())
count_rnn += isCellPrimitiveSupported(op);
return count_rnn != 1;
}
return true;
};
});
ngraph::pass::Manager conversion_manager;
// [WA part1] Convert quantized FP16 model to FP32 to avoid possible overflow and mixed precision errors
conversion_manager.register_pass<ngraph::pass::ConvertPrecision>(ngraph::element::f16, ngraph::element::f32);
conversion_manager.set_callback(fp16_callback);
conversion_manager.run_passes(nGraphFunc);
}
pass_config->set_callback<ngraph::pass::ConvertNMS1ToNMS5,
ngraph::pass::ConvertNMS3ToNMS5,
ngraph::pass::ConvertNMS4ToNMS5,
ngraph::pass::ConvertNMSToNMSIEInternal>(
[](const_node_ptr &node) -> bool {
return node->input_value(0).get_shape().back() == 4lu &&
node->input_value(0).get_shape().front() == node->input_value(1).get_shape().front() &&
node->input_value(0).get_shape()[1] == node->input_value(1).get_shape().back() &&
node->input_value(0).get_shape().size() == 3lu &&
node->input_value(1).get_shape().size() == 3lu;
});
// List of enabled/disabled transformations
pass_config->disable<ngraph::pass::ConvertGELU>();
pass_config->disable<ngraph::pass::ConvertMod>();
pass_config->disable<ngraph::pass::ConvertShuffleChannels3>();
pass_config->disable<ngraph::pass::HSwishDecomposition>();
pass_config->disable<ngraph::pass::HSigmoidDecomposition>();
pass_config->disable<ngraph::pass::ReduceL1Decomposition>();
pass_config->disable<ngraph::pass::ReduceL2Decomposition>();
pass_config->disable<ngraph::pass::SoftPlusDecomposition>();
pass_config->disable<ngraph::pass::LogSoftmaxDecomposition>();
pass_config->enable<ngraph::pass::ConvertInterpolate1ToInterpolate4>();
manager.run_passes(nGraphFunc);
}
using namespace ngraph::pass::low_precision;
bool enableInt8 = config.enableInt8 && ngraph::pass::low_precision::LowPrecisionTransformer::isFunctionQuantized(nGraphFunc);
if (enableInt8) {
auto params = LayerTransformation::Params(
true, // updatePrecisions
LayerTransformation::QuantizedTensorAlignment::UpdateLevel, // quantizedTensorAlignmentOnActivations
LayerTransformation::QuantizedTensorAlignment::None, // quantizedTensorAlignmentOnWeights
true); // supportAsymmetricQuantization
using namespace ngraph::pass::low_precision;
ngraph::pass::Manager conversion_manager;
// [WA part1] Convert quantized FP16 model to FP32 to avoid possible overflow and mixed precision errors
conversion_manager.register_pass<ngraph::pass::ConvertPrecision>(ngraph::element::f16, ngraph::element::f32);
conversion_manager.run_passes(nGraphFunc);
auto params = LayerTransformation::Params(true, // updatePrecisions
LayerTransformation::QuantizedTensorAlignment::UpdateLevel, // quantizedTensorAlignmentOnActivations
LayerTransformation::QuantizedTensorAlignment::None, // quantizedTensorAlignmentOnWeights
true); // supportAsymmetricQuantization
LowPrecisionTransformer transformer(LowPrecisionTransformer::getAllTransformations(params)
.add<MatMulTransformation, ngraph::opset1::MatMul>(LayerTransformation::Params(params).setSupportAsymmetricQuantization(false)));
transformer.transform(nGraphFunc);
}
const auto reshape_fc_callback = [](const std::shared_ptr<const ::ngraph::Node>& node) -> bool {
return node->input_value(0).get_shape().size() <= 3lu;
};
{
ngraph::pass::Manager manager = ngraph::pass::Manager();
manager.register_pass<ngraph::pass::ConvertOpSet1ToLegacy>();
ngraph::pass::Manager manager;
// This ConstantFolding pass is added to fold reshapes added for constant inputs on NMS internal operation which prevents upper-bound calculation
// TODO: check why we have these reshapes
manager.register_pass<ngraph::pass::ConstantFolding>();
manager.register_pass<ngraph::pass::UnrollTensorIterator>();
manager.set_callback(transformations_callback);
auto pass_config = manager.get_pass_config();
pass_config->set_callback<ngraph::pass::ReshapeFullyConnected>(reshape_fc_callback);
manager.run_passes(nGraphFunc);
}
clonedNetwork = InferenceEngine::details::convertFunctionToICNNNetwork(nGraphFunc, *clonedNetwork);
}
auto implNetwork = std::dynamic_pointer_cast<InferenceEngine::details::CNNNetworkImpl>(clonedNetwork);
if (implNetwork) {
// valid for CNNNetworkImpl only, while there's no API in ICNNNetwork to change network
ConstTransformer transformator(implNetwork.get());
transformator.fullTrim();
}
if (baselineIsFP16) {
// [WA part1] Store 'lpt_back_to_fp16' flag to convert FP32 operations to original FP16 after LPT
InputsDataMap inputsMap;
clonedNetwork->getInputsInfo(inputsMap);
if (!inputsMap.empty()) {
auto input0 = getInputTo(inputsMap.begin()->second->getInputData());
input0.begin()->second->params["lpt_back_to_fp16"];
}
}
return clonedNetwork;
}
clDNNEngine::clDNNEngine() : m_defaultContext(nullptr) {
_pluginName = "GPU";
_impl = std::make_shared<impl>();
RegisterPrimitives();
// try loading clDNN engine and get info from it
{
cldnn::device_query device_query;
@ -333,6 +348,15 @@ auto check_inputs = [](InferenceEngine::InputsDataMap _networkInputs) {
}
};
void clDNNEngine::UpdateConfig(CLDNNPlugin::Config& conf, const InferenceEngine::CNNNetwork &network, const std::map<std::string, std::string> &params) const {
auto device_info = GetDeviceInfo(params);
conf.enableInt8 = device_info.supports_imad || device_info.supports_immad;
conf.UpdateFromMap(params);
if (conf.enableDynamicBatch) {
conf.max_dynamic_batch = static_cast<int>(network.getBatchSize());
}
}
ExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network,
const std::map<std::string, std::string> &config) {
// verification of supported input
@ -340,13 +364,7 @@ ExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceEn
check_inputs(_networkInputs);
CLDNNPlugin::Config conf = _impl->m_config;
auto device_info = GetDeviceInfo(config);
conf.enableInt8 = device_info.supports_imad || device_info.supports_immad;
conf.UpdateFromMap(config);
if (conf.enableDynamicBatch) {
conf.max_dynamic_batch = static_cast<int>(network.getBatchSize());
}
UpdateConfig(conf, network, config);
CLDNNRemoteCLContext::Ptr context;
@ -379,7 +397,7 @@ ExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceEn
context = m_defaultContext;
InferenceEngine::CNNNetwork transformedNetwork(CloneAndTransformNetwork(network, conf));
auto transformedNetwork = CloneAndTransformNetwork(network, conf);
return std::make_shared<CLDNNExecNetwork>(transformedNetwork, context, conf);
}
@ -395,15 +413,9 @@ ExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceEn
}
CLDNNPlugin::Config conf = getContextImpl(casted)->GetConfig();
auto device_info = GetDeviceInfo(config);
conf.enableInt8 = device_info.supports_imad || device_info.supports_immad;
conf.UpdateFromMap(config);
UpdateConfig(conf, network, config);
if (conf.enableDynamicBatch) {
conf.max_dynamic_batch = static_cast<int>(network.getBatchSize());
}
InferenceEngine::CNNNetwork transformedNetwork(CloneAndTransformNetwork(network, conf));
auto transformedNetwork = CloneAndTransformNetwork(network, conf);
return std::make_shared<CLDNNExecNetwork>(transformedNetwork, casted, conf);
}
@ -440,85 +452,101 @@ void clDNNEngine::SetConfig(const std::map<std::string, std::string> &config) {
QueryNetworkResult clDNNEngine::QueryNetwork(const CNNNetwork& network,
const std::map<std::string, std::string>& config) const {
QueryNetworkResult res;
GetDeviceInfo(config); // Verify device id
CLDNNPlugin::Config conf = _impl->m_config;
UpdateConfig(conf, network, config);
Program prog;
auto function = network.getFunction();
if (function != nullptr) {
std::unordered_set<std::string> originalOps;
for (auto&& node : function->get_ops()) {
originalOps.emplace(node->get_friendly_name());
if (function == nullptr) {
THROW_IE_EXCEPTION << "CNNetworkImpl representation is not supported anymore";
}
std::unordered_set<std::string> originalOpNames;
auto originalOps = function->get_ops();
for (auto&& node : originalOps) {
originalOpNames.emplace(node->get_friendly_name());
}
auto clonedNetwork = CloneAndTransformNetwork(network, conf);
auto ops = clonedNetwork.getFunction()->get_ordered_ops();
std::unordered_set<std::string> supported;
std::unordered_set<std::string> unsupported;
std::unordered_set<std::string> splitNames;
std::unordered_set<std::string> concatNames;
std::unordered_set<std::string> constantsNames;
std::unordered_set<std::string> depLayerNames;
std::vector<std::shared_ptr<ngraph::Node>> splits;
std::vector<std::shared_ptr<ngraph::Node>> concats;
std::vector<std::shared_ptr<ngraph::Node>> constants;
std::vector<std::shared_ptr<ngraph::Node>> nextLayerDependent;
auto layerIsSupported = [&](std::shared_ptr<ngraph::Node> node) {
if (ngraph::is_type<const ngraph::op::v0::DetectionOutput>(node) ||
ngraph::is_type<const ngraph::op::v0::PriorBox>(node) ||
ngraph::is_type<const ngraph::op::v0::PriorBoxClustered>(node) ||
ngraph::is_type<const ngraph::op::v0::Proposal>(node)) {
return false;
} else if (ngraph::is_type<const ngraph::op::v1::Split>(node)) {
splitNames.emplace(node->get_friendly_name());
splits.push_back(node);
return false;
} else if (ngraph::is_type<const ngraph::op::v0::Concat>(node)) {
concatNames.emplace(node->get_friendly_name());
concats.push_back(node);
return false;
} else if (ngraph::is_type<const ngraph::op::v1::Reshape>(node) ||
ngraph::is_type<const ngraph::op::v0::Squeeze>(node) ||
ngraph::is_type<const ngraph::op::v0::Unsqueeze>(node) ||
ngraph::is_type<const ngraph::op::v1::Transpose>(node)) {
depLayerNames.emplace(node->get_friendly_name());
nextLayerDependent.push_back(node);
return false;
} else if (ngraph::is_type<const ngraph::op::v0::Constant>(node)) {
constantsNames.emplace(node->get_friendly_name());
constants.push_back(node);
return false;
} else if (prog.IsOpSupported(network, node) &&
!ngraph::op::is_parameter(node) &&
!ngraph::op::is_output(node)) {
return true;
} else {
return false;
}
auto clonedNetwork = CloneAndTransformNetwork(network, _impl->m_config);
std::unordered_set<std::string> supported;
std::unordered_set<std::string> unsupported;
};
std::unordered_set<std::string> splitNames;
std::unordered_set<std::string> concatNames;
std::unordered_set<std::string> depLayerNames;
std::vector<std::shared_ptr<ngraph::Node>> splits;
std::vector<std::shared_ptr<ngraph::Node>> concats;
std::vector<std::shared_ptr<ngraph::Node>> nextLayerDependent;
for (InferenceEngine::details::CNNNetworkIterator itLayer{clonedNetwork.get()};
itLayer != InferenceEngine::details::CNNNetworkIterator();
itLayer++) {
auto layerIsSupported = [&] {
auto node = (*itLayer)->getNode();
if (std::dynamic_pointer_cast<const ::ngraph::opset3::DetectionOutput>(node) != nullptr ||
std::dynamic_pointer_cast<const ::ngraph::opset3::PriorBox>(node) != nullptr ||
std::dynamic_pointer_cast<const ::ngraph::opset3::PriorBoxClustered>(node) != nullptr ||
std::dynamic_pointer_cast<const ::ngraph::opset3::Proposal>(node) != nullptr) {
return false;
} else if (std::dynamic_pointer_cast<const ::ngraph::opset3::Split>(node) != nullptr) {
splitNames.emplace(node->get_friendly_name());
splits.push_back(node);
return false;
} else if (std::dynamic_pointer_cast<const ::ngraph::opset3::Concat>(node) != nullptr) {
concatNames.emplace(node->get_friendly_name());
concats.push_back(node);
return false;
} else if (std::dynamic_pointer_cast<const ::ngraph::opset3::Reshape>(node) != nullptr ||
std::dynamic_pointer_cast<const ::ngraph::opset3::Squeeze>(node) != nullptr ||
std::dynamic_pointer_cast<const ::ngraph::opset3::Unsqueeze>(node) != nullptr ||
std::dynamic_pointer_cast<const ::ngraph::opset3::Transpose>(node) != nullptr ||
ngraph::op::is_constant(node)) {
depLayerNames.emplace(node->get_friendly_name());
nextLayerDependent.push_back(node);
return false;
} else if (CLDNNGraph::IsLayerSupported((*itLayer)->type)) {
return true;
// Get ops after transformations and check if it's supported
// Transformations might lead to the situation when single node is merged to multiple operations,
// so we mark original op as supported only if all nodes that it was merged into are supported
for (auto&& op : ops) {
for (auto&& fusedLayerName : ngraph::getFusedNamesVector(op)) {
if (InferenceEngine::details::contains(originalOpNames, fusedLayerName)) {
if (layerIsSupported(op)) {
supported.emplace(fusedLayerName);
} else {
return false;
}
}();
const auto fusedNode = (*itLayer)->getNode();
if (fusedNode == nullptr) {
// skip layers completely generated by IR transformation
continue;
}
for (auto&& fusedLayerName : ngraph::getFusedNamesVector(fusedNode)) {
if (InferenceEngine::details::contains(originalOps, fusedLayerName)) {
if (layerIsSupported) {
supported.emplace(fusedLayerName);
} else {
unsupported.emplace(fusedLayerName);
}
unsupported.emplace(fusedLayerName);
}
}
}
}
for (auto&& layerName : supported) {
if (InferenceEngine::details::contains(unsupported, layerName)) {
supported.erase(layerName);
}
for (auto&& layerName : supported) {
if (InferenceEngine::details::contains(unsupported, layerName)) {
supported.erase(layerName);
}
unsupported.clear();
}
unsupported.clear();
for (const auto & split : splits) {
bool is_supported = true;
const auto outputs = split->outputs();
for (const auto& output : outputs) {
const auto& name = output.get_node()->get_friendly_name();
// Check set of heuristics to produce more efficient hetero sub-graph. Note: checks order is important.
// 1. Split is marked as supported when all output ops can be offloaded to GPU
for (const auto & op : splits) {
bool is_supported = true;
for (size_t i = 0; i < op->get_output_size(); i++) {
auto outTensors = op->get_output_target_inputs(i);
for (auto& t : outTensors) {
auto output = t.get_node();
const auto& name = output->get_friendly_name();
if (!InferenceEngine::details::contains(supported, name) &&
!InferenceEngine::details::contains(depLayerNames, name) &&
!InferenceEngine::details::contains(concatNames, name) &&
@ -527,69 +555,97 @@ QueryNetworkResult clDNNEngine::QueryNetwork(const CNNNetwork& network,
break;
}
}
if (is_supported) {
supported.emplace(split->get_friendly_name());
}
if (is_supported) {
supported.emplace(op->get_friendly_name());
}
}
// 2. Concat is marked as supported when all inputs can be offloaded to GPU
for (const auto& op : concats) {
bool is_supported = true;
for (size_t i = 0; i < op->get_input_size(); i++) {
auto input = op->get_input_node_shared_ptr(i);
const auto& name = input->get_friendly_name();
if (!InferenceEngine::details::contains(supported, name) &&
!InferenceEngine::details::contains(depLayerNames, name) &&
!InferenceEngine::details::contains(concatNames, name)) {
is_supported = false;
break;
}
}
if (is_supported) {
supported.emplace(op->get_friendly_name());
}
}
for (const auto& concat : concats) {
bool is_supported = true;
const auto inputs = concat->inputs();
for (const auto& input : inputs) {
const auto& name = input.get_node()->get_friendly_name();
if (!InferenceEngine::details::contains(supported, name) &&
!InferenceEngine::details::contains(depLayerNames, name) &&
!InferenceEngine::details::contains(concatNames, name)) {
is_supported = false;
break;
}
}
if (is_supported) {
supported.emplace(concat->get_friendly_name());
// 3. Some layers are marked as supported when all inputs and outputs can be offloaded to GPU
for (const auto& op : nextLayerDependent) {
bool is_supported = true;
// both inputs and output should be GPU to remain on GPU
for (size_t i = 0; i < op->get_input_size(); i++) {
auto input = op->get_input_node_shared_ptr(i);
const auto& name = input->get_friendly_name();
// All inputs must be supported or be a constant
if (!InferenceEngine::details::contains(supported, name) && !InferenceEngine::details::contains(constantsNames, name)) {
is_supported = false;
break;
}
}
for (const auto& cnl : nextLayerDependent) {
bool is_supported = true;
// both inputs and output should be GPU to remain on GPU
const auto inputs = cnl->inputs();
for (const auto& input : inputs) {
const auto& name = input.get_node()->get_friendly_name();
for (size_t i = 0; i < op->get_output_size(); i++) {
auto outTensors = op->get_output_target_inputs(i);
for (auto& t : outTensors) {
auto output = t.get_node();
const auto& name = output->get_friendly_name();
if (!InferenceEngine::details::contains(supported, name)) {
is_supported = false;
break;
}
}
const auto outputs = cnl->outputs();
for (const auto& output : outputs) {
const auto& name = output.get_node()->get_friendly_name();
}
if (is_supported) {
supported.emplace(op->get_friendly_name());
}
}
// 4. Constants are marked as supported when all outputs can be offloaded to GPU
for (const auto& op : constants) {
bool is_supported = true;
for (size_t i = 0; i < op->get_output_size(); i++) {
auto outTensors = op->get_output_target_inputs(i);
for (auto& t : outTensors) {
auto output = t.get_node();
const auto& name = output->get_friendly_name();
if (!InferenceEngine::details::contains(supported, name)) {
is_supported = false;
break;
}
}
if (is_supported) {
supported.emplace(cnl->get_friendly_name());
}
if (is_supported) {
supported.emplace(op->get_friendly_name());
}
}
// Mark original constants/parameters/results ops as supported for each supported operation
// since rt_info doesn't contain names of constant that are removed during constant folding
for (auto&& node : originalOps) {
if (InferenceEngine::details::contains(supported, node->get_friendly_name())) {
for (auto&& inputNodeOutput : node->input_values()) {
if (ngraph::op::is_constant(inputNodeOutput.get_node()) || ngraph::op::is_parameter(inputNodeOutput.get_node())) {
supported.emplace(inputNodeOutput.get_node()->get_friendly_name());
}
}
for (auto&& outputs : node->outputs()) {
for (auto&& outputNodeInput : outputs.get_target_inputs()) {
if (ngraph::op::is_output(outputNodeInput.get_node())) {
supported.emplace(outputNodeInput.get_node()->get_friendly_name());
}
}
}
}
for (auto&& node : function->get_ops()) {
if (InferenceEngine::details::contains(supported, node->get_friendly_name())) {
for (auto&& inputNodeOutput : node->input_values()) {
if (ngraph::op::is_constant(inputNodeOutput.get_node()) || ngraph::op::is_parameter(inputNodeOutput.get_node())) {
supported.emplace(inputNodeOutput.get_node()->get_friendly_name());
}
}
for (auto&& outputs : node->outputs()) {
for (auto&& outputNodeInput : outputs.get_target_inputs()) {
if (ngraph::op::is_output(outputNodeInput.get_node())) {
supported.emplace(outputNodeInput.get_node()->get_friendly_name());
}
}
}
}
if (ngraph::op::is_constant(node) || ngraph::op::is_parameter(node)) {
if (ngraph::op::is_constant(node) || ngraph::op::is_parameter(node)) {
if (!InferenceEngine::details::contains(supported, node->output(0).get_target_inputs().begin()->get_node()->get_friendly_name())) {
supported.erase(node->get_friendly_name());
}
@ -598,69 +654,10 @@ QueryNetworkResult clDNNEngine::QueryNetwork(const CNNNetwork& network,
supported.erase(node->get_friendly_name());
}
}
}
}
for (auto&& layerName : supported) {
res.supportedLayersMap.emplace(layerName, GetName());
}
} else {
std::vector<CNNLayer::Ptr> concats;
std::vector<CNNLayer::Ptr> nextLayerDependent;
std::vector<CNNLayerPtr> sortedLayers = CNNNetSortTopologically(network);
for (auto layer : sortedLayers) {
if (CaselessEq<std::string>()(layer->type, "DetectionOutput")) {
} else if (CaselessEq<std::string>()(layer->type, "PriorBox")) {
} else if (CaselessEq<std::string>()(layer->type, "Proposal")) {
} else if (CaselessEq<std::string>()(layer->type, "SimplerNMS")) {
} else if (CaselessEq<std::string>()(layer->type, "Concat")) {
concats.push_back(layer);
} else if (CaselessEq<std::string>()(layer->type, "reshape")) {
nextLayerDependent.push_back(layer);
} else if (CaselessEq<std::string>()(layer->type, "permute")) {
nextLayerDependent.push_back(layer);
} else if (CaselessEq<std::string>()(layer->type, "Const")) {
nextLayerDependent.push_back(layer);
} else if (CLDNNGraph::IsLayerSupported(layer->type)) {
res.supportedLayersMap.insert({ layer->name, GetName() });
}
}
// evaluation of concats - if all parent layers are supported, only in this case we
// will mark concat as a supported for GPU
for (const auto& concat : concats) {
// take all parrents.
bool supported = true;
for (DataWeakPtr insData : concat->insData) {
CNNLayerPtr prev = getCreatorLayer(insData.lock()).lock();
// verify if previous layer is not supported or if it in the list of not defined layers yet
// not defined layers are treated as layers which will be assigned to GPU if next layer is assigned to GPU
if (res.supportedLayersMap.find(prev->name) == res.supportedLayersMap.end()
&& std::find(nextLayerDependent.begin(), nextLayerDependent.end(), prev) == nextLayerDependent.end()) {
supported = false;
}
}
if (supported) {
res.supportedLayersMap.insert({ concat->name, GetName() });
}
}
// evaluation of constant blobs - if all consumers are on GPU,
// then leave it on GPU, else - move to other device
for (auto cnl = nextLayerDependent.rbegin();
cnl != nextLayerDependent.rend();
cnl++) {
bool supported = true;
for (DataPtr out : (*cnl)->outData) {
for (auto ol : getInputTo(out)) {
if (res.supportedLayersMap.find(ol.second->name) == res.supportedLayersMap.end()) {
supported = false;
}
}
}
if (supported) {
res.supportedLayersMap.insert({ (*cnl)->name, GetName() });
}
}
for (auto&& layerName : supported) {
res.supportedLayersMap.emplace(layerName, GetName());
}
return res;

View File

@ -16,7 +16,7 @@ namespace CLDNNPlugin {
using CLDNNCustomLayerPtr = std::shared_ptr<class CLDNNCustomLayer>;
class clDNNEngine : public InferenceEngine::InferencePluginInternal,
public gpu::details::param_map_obj_getter {
public InferenceEngine::gpu::details::param_map_obj_getter {
struct impl;
std::shared_ptr<impl> _impl;
@ -27,8 +27,11 @@ class clDNNEngine : public InferenceEngine::InferencePluginInternal,
CLDNNRemoteCLContext::Ptr m_defaultContext;
cldnn::device_info GetDeviceInfo(const std::map<std::string, std::string> &config) const;
InferenceEngine::ICNNNetwork::Ptr CloneAndTransformNetwork(const InferenceEngine::ICNNNetwork& network,
CLDNNPlugin::Config config) const;
InferenceEngine::CNNNetwork CloneAndTransformNetwork(const InferenceEngine::CNNNetwork& network,
const CLDNNPlugin::Config& config) const;
void RegisterPrimitives();
void UpdateConfig(Config& conf, const InferenceEngine::CNNNetwork &network, const std::map<std::string, std::string> &params) const;
public:
clDNNEngine();
@ -46,7 +49,7 @@ public:
const std::map<std::string, std::string>& config) const override;
InferenceEngine::RemoteContext::Ptr CreateContext(const InferenceEngine::ParamMap& params) override;
InferenceEngine::RemoteContext::Ptr GetDefaultContext(const ParamMap& params) override;
InferenceEngine::RemoteContext::Ptr GetDefaultContext(const InferenceEngine::ParamMap& params) override;
};
}; // namespace CLDNNPlugin

View File

@ -16,8 +16,6 @@
#include <description_buffer.hpp>
#include <cldnn/cldnn_config.hpp>
#include <legacy/graph_tools.hpp>
#include <legacy/net_pass.h>
#include "cldnn_infer_request.h"
#include <threading/ie_executor_manager.hpp>
#include "cldnn_async_infer_request.h"

View File

@ -12,7 +12,6 @@
#include <utility>
#include "ie_blob.h"
#include "cpp/ie_cnn_network.h"
#include "debug_options.h"
#include <cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp>
#include "cldnn_graph.h"
#include "cldnn_config.h"
@ -24,7 +23,7 @@ class CLDNNExecNetwork : public InferenceEngine::ExecutableNetworkThreadSafeDefa
public:
typedef std::shared_ptr<CLDNNExecNetwork> Ptr;
CLDNNExecNetwork(InferenceEngine::CNNNetwork &network, RemoteContext::Ptr context, Config config);
CLDNNExecNetwork(InferenceEngine::CNNNetwork &network, InferenceEngine::RemoteContext::Ptr context, Config config);
InferenceEngine::CNNNetwork GetExecGraphInfo() override;
InferenceEngine::IInferRequest::Ptr CreateInferRequest() override;
@ -33,11 +32,10 @@ public:
InferenceEngine::Parameter GetMetric(const std::string &name) const override;
InferenceEngine::Parameter GetConfig(const std::string &name) const override;
RemoteContext::Ptr GetContext() const override;
InferenceEngine::RemoteContext::Ptr GetContext() const override;
std::vector<std::shared_ptr<CLDNNGraph>> m_graphs;
gpu::ClContext::Ptr m_context;
InferenceEngine::gpu::ClContext::Ptr m_context;
Config m_config;
InferenceEngine::ITaskExecutor::Ptr m_taskExecutor;
};

View File

@ -17,8 +17,6 @@
#include "simple_math.h"
#include <description_buffer.hpp>
#include <cldnn/cldnn_config.hpp>
#include <legacy/graph_tools.hpp>
#include <legacy/net_pass.h>
#include "cldnn_infer_request.h"
#include <threading/ie_executor_manager.hpp>
#include <fstream>
@ -69,12 +67,12 @@ void CLDNNGraph::Build() {
if (GetMaxDynamicBatchSize() > 1) {
int m_bv_sz = m_program->GetMaxBatchSizeForSingleProgram();
for (int b = m_bv_sz - 1; b >= 0; b--) {
auto network = BuildNetwork(m_program->getCompiledProgram(b));
auto network = BuildNetwork(m_program->GetCompiledProgram(b));
m_networks.insert(m_networks.begin(), network);
GetEngine()->release_pending_memory(network->get_id());
}
} else {
auto network = BuildNetwork(m_program->getCompiledProgram());
auto network = BuildNetwork(m_program->GetCompiledProgram());
m_networks.emplace_back(network);
GetEngine()->release_pending_memory(network->get_id());
}
@ -131,6 +129,7 @@ InferenceEngine::CNNNetwork CLDNNGraph::GetExecGraphInfoByPrimitivesInfo(std::ve
}
};
// TODO: Adjust output layer names to be aligned with ngraph and add new ops
auto to_IE_type_name = [](const std::string& cldnn_name) -> std::string{
static std::map<std::string, std::string> type_n2l {
{ "activation", "Activation" },
@ -748,6 +747,9 @@ std::string CLDNNGraph::MapOutputName(std::string outName) const {
auto allPrimitiveIds = GetNetwork()->get_all_primitives();
// Find correct output ID. Start with name stored in IR.
if (primitiveIDs.find(outName) == primitiveIDs.end()) {
THROW_IE_EXCEPTION << "output with name " << outName << " was not found in primitiveIDs";
}
std::string outputID = primitiveIDs.at(outName);
while (std::find(networkOutputsIDs.begin(), networkOutputsIDs.end(), outputID) == networkOutputsIDs.end()) {
// If current ID isn't found in cldnn network outputs, get previous primitive id and try again.

View File

@ -16,17 +16,10 @@
#include <utility>
#include "ie_blob.h"
#include "cpp/ie_cnn_network.h"
#include "debug_options.h"
#include <api/network.hpp>
#include <api/memory.hpp>
#include <api/primitive.hpp>
#include <api/topology.hpp>
#include <api/pooling.hpp>
#include <api/eltwise.hpp>
#include <api/concatenation.hpp>
#include <api/detection_output.hpp>
#include <api/softmax.hpp>
#include <api/resample.hpp>
#include <cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp>
#include "cldnn_custom_layer.h"
#include "cldnn_config.h"
@ -39,24 +32,20 @@ class CLDNNGraph {
public:
typedef std::shared_ptr<CLDNNGraph> Ptr;
CLDNNGraph(InferenceEngine::CNNNetwork& network, gpu::ClContext::Ptr context, Config config, uint16_t stream_id = 0);
CLDNNGraph(InferenceEngine::CNNNetwork& network, InferenceEngine::gpu::ClContext::Ptr context, Config config, uint16_t stream_id = 0);
explicit CLDNNGraph(std::shared_ptr<CLDNNGraph> graph, uint16_t stream_id = 0);
InferenceEngine::CNNNetwork GetExecGraphInfo();
bool IsLoaded() const;
static bool IsLayerSupported(const std::string& type) {
return Program::LayerTypeFromStr(type) != Program::NO_TYPE;
}
void GetPerformanceCounts(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo>& perfMap) const;
void UpdatePerfStatistics();
const Config& getConfig() const { return m_config; }
gpu::ClContext::Ptr GetContext() { return m_context; }
InferenceEngine::gpu::ClContext::Ptr GetContext() { return m_context; }
std::shared_ptr<const cldnn::engine> GetEngine() const { return getContextImpl(m_context)->GetEngine(); }
int GetMaxDynamicBatchSize() const { return getConfig().max_dynamic_batch; }
const std::map<std::string, cldnn::layout>& GetInputLayouts() const { return m_program->getInputLayouts(); }
const std::map<std::string, cldnn::layout>& GetInputLayouts() const { return m_program->GetInputLayouts(); }
size_t GetNetworksCount() const { return m_networks.size(); }
std::shared_ptr<cldnn::network> GetNetwork(size_t idx = 0) const;
InferenceEngine::SizeVector GetOutputSize(std::string outName) const;
@ -67,7 +56,7 @@ protected:
std::string m_networkName;
Config m_config;
gpu::ClContext::Ptr m_context;
InferenceEngine::gpu::ClContext::Ptr m_context;
std::vector<std::shared_ptr<cldnn::network>> m_networks;
std::map<std::string, cldnn::primitive_id> primitiveIDs;
std::map<cldnn::primitive_id, std::vector<std::string>> primitivesToIRLayersMap;

View File

@ -273,7 +273,7 @@ void CLDNNInferRequest::copyInputData(std::shared_ptr<cldnn::network> network,
size_t n = (bi == nullptr) ? inputBlob.size() : bi->buf_size;
size_t offset = (bi == nullptr) ? 0 : bi->buf_offset;
cldnn::primitive_id internalName = "input:" + inputName;
cldnn::primitive_id internalName = "parameter:" + inputName;
auto locked = inputBlob.cbuffer();
switch (inputBlob.getTensorDesc().getPrecision()) {
case Precision::FP32: {
@ -562,6 +562,7 @@ void CLDNNInferRequest::SetBlob(const char *name, const Blob::Ptr &data) {
}
void CLDNNInferRequest::AllocateInputs() {
auto inputLayouts = m_graph->GetInputLayouts();
// allocate inputs
for (auto& ni : _networkInputs) {
std::string name = ni.first;
@ -572,8 +573,14 @@ void CLDNNInferRequest::AllocateInputs() {
cldnn::primitive_id YName(name + "_Y");
cldnn::primitive_id UVName(name + "_UV");
input_alloc(YName, m_graph->GetInputLayouts().at(YName));
input_alloc(UVName, m_graph->GetInputLayouts().at(UVName));
if (inputLayouts.find(YName) == inputLayouts.end()) {
THROW_IE_EXCEPTION << "Input layout for " << YName << " is not found";
}
if (inputLayouts.find(UVName) == inputLayouts.end()) {
THROW_IE_EXCEPTION << "Input layout for " << UVName << " is not found";
}
input_alloc(YName, inputLayouts.at(YName));
input_alloc(UVName, inputLayouts.at(UVName));
size_t height = desc.getDims()[2], width = desc.getDims()[3];
cldnn::pointer<uint8_t> input_mem_ptr_Y = inputsMemory.at(YName).pointer<uint8_t>();
@ -586,7 +593,10 @@ void CLDNNInferRequest::AllocateInputs() {
_inputs[name] = make_shared_blob<NV12Blob>(blobY, blobUV);
} else {
cldnn::layout layout = m_graph->GetInputLayouts().at(name);
if (inputLayouts.find(name) == inputLayouts.end()) {
THROW_IE_EXCEPTION << "Input layout for " << name << " is not found";
}
cldnn::layout layout = inputLayouts.at(name);
input_alloc(name, layout);
cldnn::pointer<uint8_t> mem_ptr = inputsMemory.at(name).pointer<uint8_t>();
_inputs[name] = createInputBlob(desc, mem_ptr.data());
@ -907,7 +917,7 @@ void CLDNNInferRequest::PrepareInput(const cldnn::primitive_id &inputName, const
return (blob_ptr == mem_ptr) && (blob.byteSize() == memory.size());
};
cldnn::primitive_id internalName = "input:" + inputName;
cldnn::primitive_id internalName = "parameter:" + inputName;
const cldnn::memory& memory = inputsMemory.at(inputName);
auto _nw_ptr = m_graph->GetNetwork();
auto prec = inputBlob.getTensorDesc().getPrecision();

View File

@ -1,585 +0,0 @@
// Copyright (C) 2018-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <string>
#include <vector>
#include <sstream>
#include <utility>
#include <api/cldnn.hpp>
#include <api/data.hpp>
#include <api/mutable_data.hpp>
#include <api/reorder.hpp>
#include <api/fully_connected.hpp>
#include <api/concatenation.hpp>
#include <api/reshape.hpp>
#include <api/permute.hpp>
#include <api/split.hpp>
#include <api/crop.hpp>
#include <api/reverse_sequence.hpp>
#include <api/lstm.hpp>
#include <api/lstm_dynamic.hpp>
#include "cldnn_common_utils.h"
#include "cldnn_program.h"
using namespace InferenceEngine;
using namespace InferenceEngine::details;
namespace CLDNNPlugin {
std::string get_string_id(size_t i) {
std::stringstream ss;
ss << std::setw(5) << std::setfill('0') << i;
return ss.str();
}
void Program::CreateLSTMCellPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
int lstm_batch_size, lstm_input_size, lstm_hidden_size;
bool hasBias = false;
auto inputPrimitives = GetPrevLayersPrimitives(layer);
std::string layerName = layer_type_name_ID(layer);
cldnn::primitive_id weightID = layerName + m_weightsTag;
cldnn::primitive_id biasID = layerName + m_biasesTag;
/* check incoming CNN layer and setup required variables */
{
auto in_data0 = layer->insData[0].lock();
if (!in_data0)
THROW_IE_EXCEPTION << "Missing first input for LSTMCell layer " << layer->name;
const auto in_dims0 = in_data0->getTensorDesc().getDims();
const auto out_dims0 = layer->outData[0]->getTensorDesc().getDims();
lstm_input_size = in_dims0.back();
lstm_batch_size = in_dims0.at(in_dims0.size()-2);
lstm_hidden_size = out_dims0.back();
auto in_data1 = layer->insData[1].lock();
if (!in_data1)
THROW_IE_EXCEPTION << "Missing second input for LSTMCell layer " << layer->name;
auto in_data2 = layer->insData[2].lock();
if (!in_data2)
THROW_IE_EXCEPTION << "Missing third input for LSTMCell layer " << layer->name;
if (in_dims0.size() != 2 ||
in_data1->getTensorDesc().getDims().size() != 2 ||
in_data2->getTensorDesc().getDims().size() != 2)
THROW_IE_EXCEPTION << "Wrong input shapes for LSTMCell Layer " << layer->name;
}
/* Prepare weight/bias memory primitives */
{
auto wLayer = as<InferenceEngine::WeightableLayer *>(layer);
auto pWeightsBlob = wLayer->_weights;
cldnn::tensor wTensor = cldnn::tensor(cldnn::batch(4 * lstm_hidden_size), cldnn::feature(1), cldnn::spatial(lstm_input_size + lstm_hidden_size, 1));
cldnn::layout WLayout = cldnn::layout(DataTypeFromPrecision(pWeightsBlob->getTensorDesc().getPrecision()), m_defaultFormat, wTensor);
weightID = CreatePrimitiveFromBlob(topology, weightID, pWeightsBlob, WLayout);
/* create bias memory primitive */
auto pBiasBlob = wLayer->_biases;
if (pBiasBlob != nullptr) {
cldnn::tensor bTensor = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(4 * lstm_hidden_size, 1));
cldnn::layout BLayout = cldnn::layout(DataTypeFromPrecision(pBiasBlob->getTensorDesc().getPrecision()), m_defaultFormat, bTensor);
biasID = CreatePrimitiveFromBlob(topology, biasID, pBiasBlob, BLayout);
hasBias = true;
}
}
cldnn::primitive_id inReshapeID = layerName + "_inReshape";
cldnn::primitive_id permuteID = layerName + "_inputReorder";
cldnn::primitive_id inHiddenReshapeID = layerName + "_inHiddenReshape";
cldnn::primitive_id inHiddenReorderID = layerName + "_inHiddenReorder";
cldnn::primitive_id gemmReshapeID = layerName + "_gemmReshape";
cldnn::primitive_id gemmReorderID = layerName + "_gemmReorder";
cldnn::primitive_id concatID = layerName + "_inputConcat";
// LSTM primitive works with single precision for all in/out/weights tensors
auto lstmPrecision = layer->outData[0]->getPrecision();
cldnn::tensor inputShape = { lstm_batch_size, 1, lstm_input_size, 1 };
cldnn::tensor hiddenStateShape = { lstm_batch_size, 1, lstm_hidden_size, 1 };
cldnn::layout inputLayout = cldnn::layout(DataTypeFromPrecision(lstmPrecision), cldnn::format::bfyx, inputShape);
cldnn::layout hiddenLayout = cldnn::layout(DataTypeFromPrecision(lstmPrecision), cldnn::format::bfyx, hiddenStateShape);
topology.add(cldnn::reshape(inReshapeID, inputPrimitives[0], inputShape));
topology.add(cldnn::reorder(permuteID, inReshapeID, inputLayout));
AddInnerPrimitiveToProfiler(inReshapeID, layer->name, layer);
AddInnerPrimitiveToProfiler(permuteID, layer->name, layer);
std::string hiddenInResh = inHiddenReshapeID + "_1";
std::string hiddenInStr = inHiddenReorderID + "_1";
std::string cellInResh = inHiddenReshapeID + "_2";
std::string cellInStr = inHiddenReorderID + "_2";
topology.add(cldnn::reshape(hiddenInResh, inputPrimitives[1], hiddenStateShape));
topology.add(cldnn::reorder(hiddenInStr, hiddenInResh, hiddenLayout));
topology.add(cldnn::reshape(cellInResh, inputPrimitives[2], hiddenStateShape));
topology.add(cldnn::reorder(cellInStr, cellInResh, hiddenLayout));
topology.add(cldnn::concatenation(concatID, { permuteID, hiddenInStr }, cldnn::concatenation::concatenation_axis::along_x));
AddInnerPrimitiveToProfiler(hiddenInResh, layer->name, layer);
AddInnerPrimitiveToProfiler(hiddenInStr, layer->name, layer);
AddInnerPrimitiveToProfiler(cellInResh, layer->name, layer);
AddInnerPrimitiveToProfiler(cellInStr, layer->name, layer);
AddInnerPrimitiveToProfiler(concatID, layer->name, layer);
cldnn::tensor gemmSz = cldnn::tensor{ lstm_batch_size, 1, 4 * lstm_hidden_size, 1 };
cldnn::layout gemmLayout = cldnn::layout(DataTypeFromPrecision(lstmPrecision), cldnn::format::bfyx, gemmSz);
cldnn::tensor hiddenSz = cldnn::tensor{ lstm_batch_size, 1, lstm_hidden_size, 1 };
cldnn::tensor cellCropSz = cldnn::tensor{0, 1, 0, 0};
std::string lstm_fc_id = layerName + "_fully_connected";
std::string lstm_elt_id = layerName + "_lstm_elt";
std::string crop_id = layerName + "_crop";
topology.add(cldnn::fully_connected(lstm_fc_id, concatID, weightID, hasBias ? biasID : ""));
topology.add(cldnn::reshape(gemmReshapeID, lstm_fc_id, gemmSz));
topology.add(cldnn::reorder(gemmReorderID, gemmReshapeID, gemmLayout));
topology.add(cldnn::lstm_elt(lstm_elt_id, gemmReorderID, cellInStr,
0, 0, {}, {}, cldnn::lstm_weights_order::fizo));
AddInnerPrimitiveToProfiler(lstm_fc_id, layer->name, layer);
AddInnerPrimitiveToProfiler(gemmReshapeID, layer->name, layer);
AddInnerPrimitiveToProfiler(gemmReorderID, layer->name, layer);
AddInnerPrimitiveToProfiler(lstm_elt_id, layer->name, layer);
cldnn::primitive_id outputHiddenID = layerName;
topology.add(cldnn::crop(outputHiddenID, lstm_elt_id, hiddenSz, cldnn::tensor{0, 0, 0, 0}));
AddInnerPrimitiveToProfiler(outputHiddenID, layer->name, layer);
cldnn::primitive_id outputCellID = layer_type_lower(layer) + ":" + layer->outData[1]->getName();
topology.add(cldnn::crop(outputCellID, lstm_elt_id, hiddenSz, cellCropSz));
AddInnerPrimitiveToProfiler(outputCellID, layer->name, layer);
// output primitive IDs
primitiveIDs[outputHiddenID] = outputHiddenID; // LSTMCell:LSTMCell - "concat hidden"
primitiveIDs[layer_type_lower(layer) + ":" + layer->outData[0]->getName()] = outputHiddenID; // LSTMCell:LSTMCell:0 - hidden state
primitiveIDs[outputCellID] = outputCellID; // LSTMCell:LSTMCell:1 - cell state
AddPrimitiveToProfiler(layerName, layer, outputHiddenID);
}
void Program::CreateRegularLSTM(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
int lstm_batch_size, lstm_sequence_len, lstm_input_size, lstm_hidden_size;
bool hasInitialHidden = false, hasInitialCell = false, hasBias = false, isForward = true;
auto inputPrimitives = GetPrevLayersPrimitives(layer);
std::string layerName = layer_type_name_ID(layer);
cldnn::primitive_id weightID = layerName + m_weightsTag;
cldnn::primitive_id biasID = layerName + m_biasesTag;
auto rnnLayer = as<RNNSequenceLayer*> (layer);
bool permute_input = (1 != rnnLayer->axis);
/* check incoming CNN layer and setup required variables */
{
if (rnnLayer->cellType != RNNSequenceLayer::LSTM)
THROW_IE_EXCEPTION << "RNN layer supports only LSTM like cell";
auto in_data0 = layer->insData[0].lock();
if (!in_data0)
THROW_IE_EXCEPTION << "Missing first input for RNN layer " << layer->name;
const auto in_dims0 = in_data0->getTensorDesc().getDims();
const auto out_dims0 = layer->outData[0]->getTensorDesc().getDims();
/* do we have initial hidden and cell?
if blobs are not null, direct the data from them
into corresponding LSTM inputs */
auto in_data1 = layer->insData[1].lock();
if (in_data1) {
hasInitialHidden = true;
}
auto in_data2 = layer->insData[2].lock();
if (in_data2) {
hasInitialCell = true;
}
if (in_dims0.size() != 3 ||
in_data1->getTensorDesc().getDims().size() != 2 ||
in_data2->getTensorDesc().getDims().size() != 2)
THROW_IE_EXCEPTION << "Wrong input shapes for RNN Layer " << layer->name;
if (!permute_input) {
lstm_batch_size = in_dims0.front();
lstm_sequence_len = in_dims0[1];
} else {
lstm_batch_size = in_dims0[1];
lstm_sequence_len = in_dims0.front();
}
lstm_input_size = in_dims0.back();
lstm_hidden_size = out_dims0.back();
if (rnnLayer->direction != RNNSequenceLayer::FWD && rnnLayer->direction != RNNSequenceLayer::BWD)
THROW_IE_EXCEPTION << "Support only forward and backward direction for RNN Layer " << layer->name;
isForward = rnnLayer->direction == RNNSequenceLayer::FWD;
}
/* Prepare weight/bias memory primitives */
{
auto wLayer = as<InferenceEngine::WeightableLayer *>(layer);
auto pWeightsBlob = wLayer->_weights;
cldnn::tensor wTensor = cldnn::tensor(cldnn::batch(4 * lstm_hidden_size), cldnn::feature(1), cldnn::spatial(lstm_input_size + lstm_hidden_size, 1));
cldnn::layout WLayout = cldnn::layout(DataTypeFromPrecision(pWeightsBlob->getTensorDesc().getPrecision()), m_defaultFormat, wTensor);
weightID = CreatePrimitiveFromBlob(topology, weightID, pWeightsBlob, WLayout);
/* create bias memory primitive */
auto pBiasBlob = wLayer->_biases;
if (pBiasBlob != nullptr) {
cldnn::tensor bTensor = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(4 * lstm_hidden_size, 1));
cldnn::layout BLayout = cldnn::layout(DataTypeFromPrecision(pBiasBlob->getTensorDesc().getPrecision()), m_defaultFormat, bTensor);
biasID = CreatePrimitiveFromBlob(topology, biasID, pBiasBlob, BLayout);
hasBias = true;
}
}
std::vector<std::pair<cldnn::primitive_id, cldnn::tensor>> input_ids_offsets;
std::vector<cldnn::primitive_id> output_ids_offsets;
cldnn::primitive_id inReshapeID = layerName + "_inReshape";
cldnn::primitive_id permuteID = layerName + "_inputReorder";
cldnn::primitive_id inHiddenReshapeID = layerName + "_inHiddenReshape";
// LSTM primitive works with single precision for all in/out/weights tensors
auto lstmPrecision = layer->outData[0]->getPrecision();
cldnn::tensor inputShape;
if (permute_input) {
inputShape = { lstm_sequence_len, lstm_batch_size, lstm_input_size, 1 };
} else {
inputShape = { lstm_batch_size, lstm_sequence_len, lstm_input_size, 1 };
}
cldnn::tensor hiddenStateShape = { lstm_batch_size, 1, lstm_hidden_size, 1 };
cldnn::layout inputLayout = cldnn::layout(DataTypeFromPrecision(lstmPrecision), cldnn::format::bfyx, inputShape);
topology.add(cldnn::reshape(inReshapeID, inputPrimitives[0], inputShape));
topology.add(cldnn::reorder(permuteID, inReshapeID, inputLayout));
topology.add(cldnn::reshape(inHiddenReshapeID+"_1", inputPrimitives[1], hiddenStateShape));
topology.add(cldnn::reshape(inHiddenReshapeID+"_2", inputPrimitives[2], hiddenStateShape));
AddInnerPrimitiveToProfiler(inReshapeID, layerName, layer);
AddInnerPrimitiveToProfiler(permuteID, layerName, layer);
AddInnerPrimitiveToProfiler(inHiddenReshapeID + "_1", layerName, layer);
AddInnerPrimitiveToProfiler(inHiddenReshapeID + "_2", layerName, layer);
for (int i = 0; i < lstm_sequence_len; ++i)
input_ids_offsets.push_back({ get_string_id(i), {0, i, 0, 0} });
cldnn::primitive_id inputSplitID = layerName + "_inputSplit";
if (permute_input) {
topology.add(cldnn::permute(layerName + "_inputSwap", permuteID, { 1, 0, 2, 3 }));
AddInnerPrimitiveToProfiler(layerName + "_inputSwap", layerName, layer);
topology.add(cldnn::split(inputSplitID, layerName + "_inputSwap", input_ids_offsets));
} else {
topology.add(cldnn::split(inputSplitID, permuteID, input_ids_offsets));
}
AddInnerPrimitiveToProfiler(inputSplitID, layerName, layer);
cldnn::tensor gemmSz = cldnn::tensor{ lstm_batch_size, 1, 4 * lstm_hidden_size, 1 };
cldnn::layout gemmLayout = cldnn::layout(DataTypeFromPrecision(lstmPrecision), cldnn::format::bfyx, gemmSz);
cldnn::tensor hiddenSz = cldnn::tensor{ lstm_batch_size, 1, lstm_hidden_size, 1 };
cldnn::tensor cellCropSz = cldnn::tensor{0, 1, 0, 0};
std::string hiddenStr = hasInitialHidden ? inHiddenReshapeID+"_1" : "";
std::string cellStr = hasInitialCell ? inHiddenReshapeID+"_2" : "";
for (int i = 0; i < lstm_sequence_len; ++i) {
std::string concatID = layerName + "_inputConcat" + get_string_id(i);
std::string lstm_fc_id = layerName + "_fully_connected" + get_string_id(i);
std::string lstm_fc_resh_id = layerName + "_gemmReshape" + get_string_id(i);
std::string lstm_fc_reor_id = layerName + "_gemmReorder" + get_string_id(i);
std::string lstm_elt_id = layerName + "_lstm_elt" + get_string_id(i);
std::string crop_id = layerName + "_crop" + get_string_id(i);
int seqIdx = isForward ? i : lstm_sequence_len - 1 - i;
if (hiddenStr != "") {
topology.add(cldnn::concatenation(concatID, { inputSplitID + ":" + get_string_id(seqIdx), hiddenStr },
cldnn::concatenation::concatenation_axis::along_x));
AddInnerPrimitiveToProfiler(concatID, layerName, layer);
topology.add(cldnn::fully_connected(lstm_fc_id, concatID, weightID, hasBias ? biasID : ""));
AddInnerPrimitiveToProfiler(lstm_fc_id, layerName, layer);
AddInnerPrimitiveToProfiler(inputSplitID + ":" + get_string_id(seqIdx), layerName, layer);
} else {
topology.add(cldnn::fully_connected(lstm_fc_id, inputSplitID + ":" + get_string_id(seqIdx), weightID, hasBias ? biasID : ""));
AddInnerPrimitiveToProfiler(lstm_fc_id, layerName, layer);
}
topology.add(cldnn::reshape(lstm_fc_resh_id, lstm_fc_id, gemmSz));
topology.add(cldnn::reorder(lstm_fc_reor_id, lstm_fc_resh_id, gemmLayout));
topology.add(cldnn::lstm_elt(lstm_elt_id, lstm_fc_reor_id,
cellStr, 0, 0, {}, {},
cldnn::lstm_weights_order::fizo));
AddInnerPrimitiveToProfiler(lstm_fc_resh_id, layerName, layer);
AddInnerPrimitiveToProfiler(lstm_fc_reor_id, layerName, layer);
AddInnerPrimitiveToProfiler(lstm_elt_id, layerName, layer);
hiddenStr = crop_id + ":hidden";
cellStr = crop_id + ":cell";
topology.add(cldnn::crop(hiddenStr, lstm_elt_id, hiddenSz, cldnn::tensor{ 0, 0, 0, 0 }));
AddInnerPrimitiveToProfiler(hiddenStr, layerName, layer);
output_ids_offsets.push_back(hiddenStr);
if (i < lstm_sequence_len - 1) {
topology.add(cldnn::crop(cellStr, lstm_elt_id, hiddenSz, cellCropSz));
AddInnerPrimitiveToProfiler(cellStr, layerName, layer);
} else {
// last hidden state crop (output 2)
if (layer->outData.size() > 1) {
cldnn::primitive_id outputHiddenID = layer_type_lower(layer) + ":" + layer->outData[1]->getName();
primitiveIDs[hiddenStr] = hiddenStr;
primitiveIDs[outputHiddenID] = hiddenStr;
}
// last cell state crop (output 3)
if (layer->outData.size() > 2) {
topology.add(cldnn::crop(cellStr, lstm_elt_id, hiddenSz, cellCropSz));
cldnn::primitive_id outputCellID = layer_type_lower(layer) + ":" + layer->outData[2]->getName();
AddInnerPrimitiveToProfiler(cellStr, layerName, layer);
primitiveIDs[outputCellID] = cellStr;
}
}
}
if (!isForward) std::reverse(output_ids_offsets.begin(), output_ids_offsets.end());
if (permute_input) {
topology.add(cldnn::concatenation(layerName + "_outputConcat", output_ids_offsets, cldnn::concatenation::along_f));
AddInnerPrimitiveToProfiler(layerName + "_outputConcat", layerName, layer);
topology.add(cldnn::permute(layerName, layerName + "_outputConcat", { 1, 0, 2, 3 }));
} else {
topology.add(cldnn::concatenation(layerName, output_ids_offsets, cldnn::concatenation::along_f));
}
primitiveIDs[layer_type_lower(layer) + ":" + layer->outData[0]->getName()] = layerName;
AddPrimitiveToProfiler(layerName, layer);
}
void Program::CreateDynamicLSTM(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
int lstm_batch_size, lstm_sequence_len, lstm_input_size, lstm_hidden_size;
bool hasBias = false, reverseSeq = false;
auto inputPrimitives = GetPrevLayersPrimitives(layer);
auto lstmPrecision = layer->outData[0]->getPrecision();
auto elementSize = cldnn::data_type_traits::size_of(DataTypeFromPrecision(lstmPrecision));
std::string layerName = layer_type_name_ID(layer);
cldnn::primitive_id weightID = layerName + m_weightsTag;
cldnn::primitive_id recurrentID = weightID + "_recurrent";
cldnn::primitive_id biasID = layerName + m_biasesTag;
auto rnnLayer = as<RNNSequenceLayer*>(layer);
bool permute_input = (1 != rnnLayer->axis);
int32_t directions = 1;
/* check incoming CNN layer and setup required variables */
{
if (rnnLayer->cellType != RNNSequenceLayer::LSTM)
THROW_IE_EXCEPTION << "RNN layer supports only LSTM like cell";
auto in_data0 = layer->insData[0].lock();
if (!in_data0)
THROW_IE_EXCEPTION << "Missing first input for RNN layer " << layer->name;
const auto in_dims0 = in_data0->getTensorDesc().getDims();
const auto out_dims0 = layer->outData[0]->getTensorDesc().getDims();
auto in_data1 = layer->insData[1].lock();
auto in_data2 = layer->insData[2].lock();
auto in_data3 = layer->insData[3].lock();
if (in_dims0.size() != 3 ||
in_data1->getTensorDesc().getDims().size() != 2 ||
in_data2->getTensorDesc().getDims().size() != 2 ||
in_data3->getTensorDesc().getDims().size() != 1)
THROW_IE_EXCEPTION << "Wrong input shapes for dynamic RNN Layer " << layer->name;
if (!permute_input) {
lstm_batch_size = in_dims0.front();
lstm_sequence_len = in_dims0[1];
} else {
lstm_batch_size = in_dims0[1];
lstm_sequence_len = in_dims0.front();
}
lstm_input_size = in_dims0.back();
lstm_hidden_size = out_dims0.back();
if (rnnLayer->direction == RNNSequenceLayer::BDR) {
directions = 2;
} else {
reverseSeq = rnnLayer->direction == RNNSequenceLayer::BWD;
}
}
/* Prepare weight/bias memory primitives - split weight blob into W and R */
{
const size_t WchunkSz = lstm_input_size * elementSize;
const size_t RchunkSz = lstm_hidden_size * elementSize;
cldnn::tensor wTensor = cldnn::tensor(cldnn::batch(1), cldnn::feature(directions), cldnn::spatial(lstm_input_size, 4 * lstm_hidden_size));
cldnn::tensor rTensor = cldnn::tensor(cldnn::batch(1), cldnn::feature(directions), cldnn::spatial(lstm_hidden_size, 4 * lstm_hidden_size));
cldnn::layout WLayout = cldnn::layout(DataTypeFromPrecision(lstmPrecision), m_defaultFormat, wTensor);
cldnn::layout RLayout = cldnn::layout(DataTypeFromPrecision(lstmPrecision), m_defaultFormat, rTensor);
auto wLayer = as<InferenceEngine::WeightableLayer *>(layer);
{
auto pWeightsBlob = wLayer->_weights;
auto blobBytes = static_cast<const char *>(pWeightsBlob->buffer());
auto wmem = cldnn::memory::allocate(*m_engine, WLayout);
auto wtmpPointer = wmem.pointer<char>(); // implicitly maps buffer - unmap in destructor
auto rmem = cldnn::memory::allocate(*m_engine, RLayout);
auto rtmpPointer = rmem.pointer<char>();
auto wBytes = wtmpPointer.data();
auto rBytes = rtmpPointer.data();
for (int h = 0; h < 4 * lstm_hidden_size; h++) {
// copy "input size" elements to W
for (size_t b = 0; b < WchunkSz; b++)
*wBytes++ = *blobBytes++;
// copy "lstm_hidden_size" elements to R
for (size_t b = 0; b < RchunkSz; b++)
*rBytes++ = *blobBytes++;
}
topology.add(cldnn::data(weightID, wmem));
topology.add(cldnn::data(recurrentID, rmem));
}
/* create bias memory primitive */
auto pBiasBlob = wLayer->_biases;
if (pBiasBlob != nullptr) {
cldnn::tensor bTensor = cldnn::tensor(cldnn::batch(1), cldnn::feature(directions), cldnn::spatial(4 * lstm_hidden_size, 1));
cldnn::layout BLayout = cldnn::layout(DataTypeFromPrecision(pBiasBlob->getTensorDesc().getPrecision()), m_defaultFormat, bTensor);
auto bmem = cldnn::memory::allocate(*m_engine, BLayout);
auto btmpPointer = bmem.pointer<char>();
auto blobBytes = static_cast<const char *>(pBiasBlob->buffer());
const size_t BchunkSz = lstm_hidden_size * elementSize;
auto bBytes = btmpPointer.data();
for (size_t b = 0; b < 4 * BchunkSz; b++)
*bBytes++ = *blobBytes++;
topology.add(cldnn::data(biasID, bmem));
hasBias = true;
}
}
cldnn::primitive_id inReshapeID = layerName + "_inReshape";
cldnn::primitive_id permuteID = layerName + "_inputReorder";
cldnn::primitive_id inHiddenReshapeID = layerName + "_inHiddenReshape";
cldnn::tensor inputShape;
if (permute_input) {
inputShape = { lstm_sequence_len, lstm_batch_size, lstm_input_size, directions };
} else {
inputShape = { lstm_batch_size, lstm_sequence_len, lstm_input_size, directions };
}
cldnn::tensor hiddenStateShape = { lstm_batch_size, 1, lstm_hidden_size, directions };
cldnn::layout inputLayout = cldnn::layout(DataTypeFromPrecision(lstmPrecision), cldnn::format::bfyx, inputShape);
topology.add(cldnn::reshape(inReshapeID, inputPrimitives[0], inputShape));
topology.add(cldnn::reorder(permuteID, inReshapeID, inputLayout));
AddInnerPrimitiveToProfiler(inReshapeID, layerName, layer);
AddInnerPrimitiveToProfiler(permuteID, layerName, layer);
topology.add(cldnn::reshape(inHiddenReshapeID + "_1", inputPrimitives[1], hiddenStateShape));
topology.add(cldnn::reshape(inHiddenReshapeID + "_2", inputPrimitives[2], hiddenStateShape));
AddInnerPrimitiveToProfiler(inHiddenReshapeID + "_1", layerName, layer);
AddInnerPrimitiveToProfiler(inHiddenReshapeID + "_2", layerName, layer);
cldnn::primitive_id dynID = layerName + "_dynLength";
cldnn::primitive_id dynReshapeID = layerName + "_dynReshape";
cldnn::tensor dynShape = { 1, 1, lstm_batch_size, 1 };
cldnn::layout dynLayout = cldnn::layout(DataTypeFromPrecision(lstmPrecision), cldnn::format::bfyx, dynShape);
topology.add(cldnn::reshape(dynReshapeID, inputPrimitives[3], dynShape));
topology.add(cldnn::reorder(dynID, dynReshapeID, dynLayout));
AddInnerPrimitiveToProfiler(dynReshapeID, layerName, layer);
AddInnerPrimitiveToProfiler(dynID, layerName, layer);
cldnn::primitive_id inputID = permuteID;
cldnn::primitive_id prevInputID = permuteID;
if (permute_input) {
inputID = layerName + "_inputSwap";
topology.add(cldnn::permute(inputID, prevInputID, { 1, 0, 2, 3 }));
prevInputID = inputID;
AddInnerPrimitiveToProfiler(inputID, layerName, layer);
}
cldnn::primitive_id seq_len_id = layer->name + "seq_lengths";
if (reverseSeq) {
inputID = layerName + "_inputReverse";
topology.add(cldnn::reverse_sequence(inputID, prevInputID, dynID, 1, 0));
primitivesToIRLayersMap[inputID] = { layer->name };
AddInnerPrimitiveToProfiler(inputID, layerName, layer);
prevInputID = inputID;
}
// last hidden state crop (output 2)
cldnn::primitive_id outputHiddenID = "", outputCellID = "";
if (layer->outData.size() > 1) {
outputHiddenID = layer_type_lower(layer) + ":" + layer->outData[1]->getName();
auto last_hidden_mem = cldnn::memory::allocate(*m_engine,
{ DataTypeFromPrecision(lstmPrecision),
cldnn::format::bfyx, { lstm_batch_size, 1, lstm_hidden_size, directions } });
topology.add(cldnn::mutable_data(outputHiddenID, last_hidden_mem));
primitiveIDs[outputHiddenID] = outputHiddenID;
}
// last cell state crop (output 3)
if (layer->outData.size() > 2) {
outputCellID = layer_type_lower(layer) + ":" + layer->outData[2]->getName();
auto last_cell_mem = cldnn::memory::allocate(*m_engine,
{ DataTypeFromPrecision(lstmPrecision),
cldnn::format::bfyx, { lstm_batch_size, 1, lstm_hidden_size, directions } });
topology.add(cldnn::mutable_data(outputCellID, last_cell_mem));
primitiveIDs[outputCellID] = outputCellID;
}
// main part - dLSTM primitive intself
cldnn::primitive_id dlstmID = layerName + "_dlstm";
topology.add(cldnn::lstm_dynamic(dlstmID, inputID, dynID,
weightID, recurrentID, outputHiddenID, outputCellID, biasID,
inHiddenReshapeID + "_1", inHiddenReshapeID + "_2"));
prevInputID = inputID = dlstmID;
AddInnerPrimitiveToProfiler(dlstmID, layerName, layer);
if (reverseSeq) {
inputID = layerName + "_outputReverse";
topology.add(cldnn::reverse_sequence(inputID, prevInputID, dynID, 1, 0));
AddInnerPrimitiveToProfiler(inputID, layerName, layer);
prevInputID = inputID;
}
if (permute_input) {
inputID = layerName + "_outputSwap";
topology.add(cldnn::permute(inputID, prevInputID, { 1, 0, 2, 3 }));
AddInnerPrimitiveToProfiler(inputID, layerName, layer);
prevInputID = inputID;
}
primitiveIDs[inputID] = inputID;
primitiveIDs[layer_type_lower(layer) + ":" + layer->outData[0]->getName()] = inputID;
AddPrimitiveToProfiler(layerName, layer, inputID);
}
void Program::CreateRNNPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) {
if (layer->insData.size() > 3) {
CreateDynamicLSTM(topology, layer);
} else {
CreateRegularLSTM(topology, layer);
}
}
}; // namespace CLDNNPlugin

View File

@ -0,0 +1,206 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#ifndef REGISTER_FACTORY
#error "REGISTER_FACTORY is not defined"
#endif
// ------------------------------ Supported v0 ops ------------------------------ //
REGISTER_FACTORY(v0, Abs);
REGISTER_FACTORY(v0, Acos);
REGISTER_FACTORY(v0, Asin);
REGISTER_FACTORY(v0, Atan);
REGISTER_FACTORY(v0, Ceiling);
REGISTER_FACTORY(v0, Clamp);
REGISTER_FACTORY(v0, Concat);
REGISTER_FACTORY(v0, Constant);
REGISTER_FACTORY(v0, Convert);
REGISTER_FACTORY(v0, Cos);
REGISTER_FACTORY(v0, Cosh);
REGISTER_FACTORY(v0, CumSum);
REGISTER_FACTORY(v0, CTCGreedyDecoder);
REGISTER_FACTORY(v0, DepthToSpace);
REGISTER_FACTORY(v0, DetectionOutput);
REGISTER_FACTORY(v0, Elu);
REGISTER_FACTORY(v0, Erf);
REGISTER_FACTORY(v0, Exp);
REGISTER_FACTORY(v0, FakeQuantize);
REGISTER_FACTORY(v0, Floor);
REGISTER_FACTORY(v0, Gelu);
REGISTER_FACTORY(v0, GRN);
REGISTER_FACTORY(v0, HardSigmoid);
// REGISTER_FACTORY(v0, Interpolate); Supported via v0 -> v4 conversion
REGISTER_FACTORY(v0, Log);
REGISTER_FACTORY(v0, LRN);
REGISTER_FACTORY(v0, MatMul);
REGISTER_FACTORY(v0, MVN);
REGISTER_FACTORY(v0, Negative);
REGISTER_FACTORY(v0, NormalizeL2);
REGISTER_FACTORY(v0, Parameter);
REGISTER_FACTORY(v0, PRelu);
REGISTER_FACTORY(v0, PriorBox);
REGISTER_FACTORY(v0, PriorBoxClustered);
REGISTER_FACTORY(v0, Proposal);
REGISTER_FACTORY(v0, PSROIPooling);
REGISTER_FACTORY(v0, Relu);
REGISTER_FACTORY(v0, Result);
REGISTER_FACTORY(v0, RegionYolo);
REGISTER_FACTORY(v0, ReorgYolo);
REGISTER_FACTORY(v0, ReverseSequence);
REGISTER_FACTORY(v0, ROIPooling);
REGISTER_FACTORY(v0, Sigmoid);
REGISTER_FACTORY(v0, Sqrt);
REGISTER_FACTORY(v0, Selu);
REGISTER_FACTORY(v0, Sin);
REGISTER_FACTORY(v0, Sinh);
REGISTER_FACTORY(v0, Sign);
REGISTER_FACTORY(v0, SquaredDifference);
REGISTER_FACTORY(v0, SpaceToDepth);
REGISTER_FACTORY(v0, Squeeze);
REGISTER_FACTORY(v0, ShuffleChannels);
REGISTER_FACTORY(v0, Tan);
REGISTER_FACTORY(v0, Tanh);
REGISTER_FACTORY(v0, Tile);
REGISTER_FACTORY(v0, Unsqueeze);
// ----------------------------- Unsupported v0 ops ----------------------------- //
// Deprecated ops
// REGISTER_FACTORY(v0, Add);
// REGISTER_FACTORY(v0, Divide);
// REGISTER_FACTORY(v0, Greater);
// REGISTER_FACTORY(v0, GreaterEq);
// REGISTER_FACTORY(v0, Less);
// REGISTER_FACTORY(v0, LessEq);
// REGISTER_FACTORY(v0, LSTMSequence);
// REGISTER_FACTORY(v0, LSTMCell);
// REGISTER_FACTORY(v0, Maximum);
// REGISTER_FACTORY(v0, Minimum);
// REGISTER_FACTORY(v0, Multiply);
// REGISTER_FACTORY(v0, NotEqual);
// REGISTER_FACTORY(v0, Power);
// REGISTER_FACTORY(v0, Quantize);
// REGISTER_FACTORY(v0, Select);
// REGISTER_FACTORY(v0, Subtract);
// REGISTER_FACTORY(v0, Xor); // Not marked as deprecated yet, but removed from new opsets
// REGISTER_FACTORY(v0, BatchNormInference);
// REGISTER_FACTORY(v0, Range);
// REGISTER_FACTORY(v0, RNNCell);
// REGISTER_FACTORY(v0, ShapeOf);
// REGISTER_FACTORY(v0, TensorIterator);
// ------------------------------ Supported v1 ops ------------------------------ //
REGISTER_FACTORY(v1, Add);
REGISTER_FACTORY(v1, AvgPool);
REGISTER_FACTORY(v1, BatchToSpace);
REGISTER_FACTORY(v1, BinaryConvolution);
REGISTER_FACTORY(v1, Broadcast);
REGISTER_FACTORY(v1, ConvertLike);
REGISTER_FACTORY(v1, Convolution);
REGISTER_FACTORY(v1, ConvolutionBackpropData);
REGISTER_FACTORY(v1, DeformableConvolution);
REGISTER_FACTORY(v1, DeformablePSROIPooling);
REGISTER_FACTORY(v1, Divide);
REGISTER_FACTORY(v1, Equal);
REGISTER_FACTORY(v1, FloorMod);
REGISTER_FACTORY(v1, Gather);
REGISTER_FACTORY(v1, GatherTree);
REGISTER_FACTORY(v1, Greater);
REGISTER_FACTORY(v1, GreaterEqual);
REGISTER_FACTORY(v1, GroupConvolution);
REGISTER_FACTORY(v1, GroupConvolutionBackpropData);
REGISTER_FACTORY(v1, Less);
REGISTER_FACTORY(v1, LessEqual);
REGISTER_FACTORY(v1, LogicalAnd);
REGISTER_FACTORY(v1, LogicalNot);
REGISTER_FACTORY(v1, LogicalOr);
REGISTER_FACTORY(v1, LogicalXor);
REGISTER_FACTORY(v1, MaxPool);
REGISTER_FACTORY(v1, Maximum);
REGISTER_FACTORY(v1, Minimum);
REGISTER_FACTORY(v1, Multiply);
REGISTER_FACTORY(v1, NotEqual);
// REGISTER_FACTORY(v1, NonMaxSuppression); Supported via v1 -> v5 internal conversion
REGISTER_FACTORY(v1, OneHot);
REGISTER_FACTORY(v1, Pad);
REGISTER_FACTORY(v1, Power);
REGISTER_FACTORY(v1, ReduceMax);
REGISTER_FACTORY(v1, ReduceLogicalAnd);
REGISTER_FACTORY(v1, ReduceLogicalOr);
REGISTER_FACTORY(v1, ReduceMean);
REGISTER_FACTORY(v1, ReduceMin);
REGISTER_FACTORY(v1, ReduceProd);
REGISTER_FACTORY(v1, ReduceSum);
REGISTER_FACTORY(v1, Reshape);
REGISTER_FACTORY(v1, Subtract);
REGISTER_FACTORY(v1, SpaceToBatch);
REGISTER_FACTORY(v1, Softmax);
REGISTER_FACTORY(v1, StridedSlice);
REGISTER_FACTORY(v1, Select);
REGISTER_FACTORY(v1, Split);
REGISTER_FACTORY(v1, Transpose);
REGISTER_FACTORY(v1, TopK);
REGISTER_FACTORY(v1, VariadicSplit);
REGISTER_FACTORY(v1, Mod);
// ----------------------------- Unsupported v1 ops ----------------------------- //
// REGISTER_FACTORY(v1, Reverse);
// ------------------------------ Supported v3 ops ------------------------------ //
REGISTER_FACTORY(v3, Asinh);
REGISTER_FACTORY(v3, Acosh);
REGISTER_FACTORY(v3, Atanh);
REGISTER_FACTORY(v3, Broadcast);
REGISTER_FACTORY(v3, EmbeddingBagOffsetsSum);
REGISTER_FACTORY(v3, EmbeddingBagPackedSum);
REGISTER_FACTORY(v3, EmbeddingSegmentsSum);
REGISTER_FACTORY(v3, ExtractImagePatches);
// REGISTER_FACTORY(v3, NonMaxSuppression); Supported via v3 -> v5 internal conversion
// ----------------------------- Unsupported v3 ops ----------------------------- //
// REGISTER_FACTORY(v3, ScatterUpdate); // There is the scatter_update primitive, but seems like it produces wrong results
// REGISTER_FACTORY(v3, Assign);
// REGISTER_FACTORY(v3, Bucketize);
// REGISTER_FACTORY(v3, GRUCell);
// REGISTER_FACTORY(v3, NonZero);
// REGISTER_FACTORY(v3, ROIAlign);
// REGISTER_FACTORY(v3, ReadValue);
// REGISTER_FACTORY(v3, ScatterElementsUpdate);
// REGISTER_FACTORY(v3, ScatterUpdate);
// REGISTER_FACTORY(v3, ScatterNDUpdate);
// REGISTER_FACTORY(v3, ShapeOf);
// REGISTER_FACTORY(v3, TopK);
// ------------------------------ Supported v4 ops ------------------------------ //
REGISTER_FACTORY(v4, HSwish);
REGISTER_FACTORY(v4, Interpolate);
REGISTER_FACTORY(v4, LSTMCell);
REGISTER_FACTORY(v4, Mish);
// REGISTER_FACTORY(v4, NonMaxSuppression); Supported via v4 -> v5 internal conversion
REGISTER_FACTORY(v4, Proposal);
REGISTER_FACTORY(v4, ReduceL1);
REGISTER_FACTORY(v4, ReduceL2);
REGISTER_FACTORY(v4, SoftPlus);
REGISTER_FACTORY(v4, Swish);
// ----------------------------- Unsupported v4 ops ----------------------------- //
// REGISTER_FACTORY(v4, CTCLoss);
// REGISTER_FACTORY(v4, Range);
// ------------------------------ Supported v5 ops ------------------------------ //
REGISTER_FACTORY(v5, HSigmoid);
REGISTER_FACTORY(v5, LogSoftmax);
REGISTER_FACTORY(v5, LSTMSequence);
//REGISTER_FACTORY(v5, NonMaxSuppression); Supported via v5 -> v5 internal conversion
REGISTER_FACTORY(v5, Round);
// ----------------------------- Unsupported v5 ops ----------------------------- //
// REGISTER_FACTORY(v5, BatchNormInference);
// REGISTER_FACTORY(v5, GatherND);
// REGISTER_FACTORY(v5, GRUSequence);
// REGISTER_FACTORY(v5, Loop);
// REGISTER_FACTORY(v5, RNNSequence);
// --------------------------- Supported internal ops --------------------------- //
REGISTER_FACTORY(internal, NonMaxSuppressionIEInternal);

File diff suppressed because it is too large Load Diff

View File

@ -6,65 +6,49 @@
#include <vector>
#include <map>
#include <set>
#include <memory>
#include <string>
#include <utility>
#include <algorithm>
#include <cstdint>
#include <cpp/ie_cnn_network.h>
#include <legacy/ie_layers.h>
#include <cpp_interfaces/exception2status.hpp>
#include <ie_blob.h>
#include "details/ie_exception.hpp"
#include "debug_options.h"
#include "cldnn_custom_layer.h"
#include "cldnn_config.h"
#include <api/engine.hpp>
#include <api/memory.hpp>
#include <api/topology.hpp>
#include <api/primitive.hpp>
#include <api/softmax.hpp>
#include <api/resample.hpp>
#include <api/pooling.hpp>
#include <api/eltwise.hpp>
#include <api/concatenation.hpp>
#include <api/detection_output.hpp>
// Forward declarations for cldnn part
namespace cldnn {
enum class activation_func;
struct activation_additional_params;
enum class reduce_mode : uint16_t;
enum class eltwise_mode : int32_t;
} // namespace cldnn
// Forward declarations for ngraph part
namespace ngraph {
class Node;
class DiscreteTypeInfo;
} // namespace ngraph
#define REGISTER_FACTORY_IMPL(op_version, op_name) \
void __register ## _ ## op_name ## _ ## op_version() { \
Program::RegisterFactory<ngraph::op::op_version::op_name>( \
[](Program& p, const std::shared_ptr<ngraph::Node>& op) { \
auto op_casted = std::dynamic_pointer_cast<ngraph::op::op_version::op_name>(op); \
if (!op_casted) \
THROW_IE_EXCEPTION << "Invalid ngraph Node type passed into " << __PRETTY_FUNCTION__; \
Create##op_name##Op(p, op_casted); \
}); \
}
namespace CLDNNPlugin {
template<typename LayerTypePtr>
LayerTypePtr tryAs(const InferenceEngine::CNNLayerPtr& in_ptr) {
return dynamic_cast<LayerTypePtr>(in_ptr.get());
}
template<typename LayerTypePtr>
LayerTypePtr as(const InferenceEngine::CNNLayerPtr& in_ptr) {
auto result_ptr = dynamic_cast<LayerTypePtr> (in_ptr.get());
if (nullptr == result_ptr) {
THROW_IE_EXCEPTION << "CNNLayerPtr is not suitable for casting to requested layer type";
}
return result_ptr;
}
inline std::string layer_type_lower(const InferenceEngine::CNNLayer* layer) {
std::string layerType = layer->type;
std::transform(layerType.begin(), layerType.end(), layerType.begin(),
[](unsigned char c) -> unsigned char { return std::tolower(c); });
return layerType;
}
inline std::string layer_type_name_ID(const InferenceEngine::CNNLayer* layer) {
return layer_type_lower(layer) + ":" + layer->name;
}
inline std::string layer_type_lower(InferenceEngine::CNNLayerPtr layer) {
return layer_type_lower(layer.get());
}
inline std::string layer_type_name_ID(InferenceEngine::CNNLayerPtr layer) {
return layer_type_name_ID(layer.get());
}
std::string layer_type_lower(const ngraph::Node* op);
std::string layer_type_name_ID(const ngraph::Node* op);
std::string layer_type_lower(const std::shared_ptr<ngraph::Node>& op);
std::string layer_type_name_ID(const std::shared_ptr<ngraph::Node>& op);
struct PerfCounter {
InferenceEngine::InferenceEngineProfileInfo::LayerStatus status;
@ -85,8 +69,14 @@ public:
class Program {
public:
Program(InferenceEngine::CNNNetwork &network, std::shared_ptr<const cldnn::engine> engine, const Config& config);
std::shared_ptr<cldnn::program> getCompiledProgram(int program_id = 0);
Program(InferenceEngine::CNNNetwork& network, std::shared_ptr<const cldnn::engine> engine, const Config& config);
Program() : m_config({}), m_engine(nullptr), m_curBatch(-1), queryMode(false) {}
static const cldnn::primitive_id m_preProcessTag;
static const cldnn::primitive_id m_meanValuesTag;
static const cldnn::primitive_id m_workaroundTag;
static const cldnn::primitive_id m_preCustomLayerTag;
static const cldnn::primitive_id m_postCustomLayerTag;
std::map<std::string, cldnn::primitive_id> primitiveIDs;
std::map<cldnn::primitive_id, std::vector<std::string>> primitivesToIRLayersMap;
@ -103,298 +93,82 @@ public:
int m_max_batch;
int m_curBatch;
InferenceEngine::OutputsDataMap p_currentOutputs;
std::vector<cldnn::primitive_id> GetPrevLayersPrimitives(const InferenceEngine::CNNLayerPtr layer) const;
const std::map<std::string, cldnn::layout>& getInputLayouts() const { return inputLayouts; }
std::shared_ptr<cldnn::program> GetCompiledProgram(int program_id = 0);
const std::map<std::string, cldnn::layout>& GetInputLayouts() const { return inputLayouts; }
InferenceEngine::InputsDataMap GetNetworkInputs() const { return m_networkInputs; }
InferenceEngine::OutputsDataMap GetNetworkOutputs() const { return m_networkOutputs; }
const cldnn::engine& GetEngine() const { return *m_engine; }
const Config& GetConfig() const { return m_config; }
int GetMaxBatchSizeForSingleProgram();
void AddPrimitiveToProfiler(cldnn::primitive_id id, const InferenceEngine::CNNLayerPtr &layer,
cldnn::primitive_id customOutputId = "");
void AddInnerPrimitiveToProfiler(cldnn::primitive_id id, cldnn::primitive_id parentId,
const InferenceEngine::CNNLayerPtr &layer);
// internal types
enum LayerType {
Convolution,
DeformableConvolution,
ReLU,
ReLU6,
Sigmoid,
TanH,
ELU,
Activation,
Exp,
Asin,
Atan,
Acos,
Abs,
Asinh,
Acosh,
Atanh,
Not,
LRN,
Pooling,
FullyConnected,
SoftMax,
LogSoftmax,
Power,
Split,
VariadicSplit,
Concatenate,
Eltwise,
SimplerNMS,
ROIPooling,
Crop,
Deconvolution,
PriorBox,
DetectionOutput,
Normalize,
Reshape,
Transpose,
Permute,
Flatten,
BatchNormalization,
PReLU,
ScaleShift,
Proposal,
PSROIPooling,
Clamp,
Copy,
Resample,
Interp,
Interpolate,
RegionYolo,
ReorgYolo,
ConstantBlob,
ArgMax,
ArgMin,
MVN,
Unpooling,
Tile,
Pad,
LSTMCell,
RNN,
Gather,
DepthToSpace,
SpaceToDepth,
BatchToSpace,
SpaceToBatch,
ShuffleChannels,
StridedSlice,
Broadcast,
ReverseSequence,
BinaryConvolution,
Quantize,
Squeeze,
Unsqueeze,
Reduce,
TopK,
Floor,
Ceil,
Ceiling,
Erf,
HardSigmoid,
HSigmoid,
Log,
Neg,
Reciprocal,
Selu,
Sign,
SoftPlus,
SoftSign,
Swish,
HSwish,
Mish,
Gelu,
Sin,
Sinh,
Cos,
Cosh,
Tan,
Gemm,
OneHot,
Convert,
ConvertLike,
GatherTree,
ExperimentalDetectronROIFeatureExtractor,
NonMaxSuppression,
Select,
GRN,
CTCGreedyDecoder,
PriorBoxClustered,
CumSum,
Round,
EmbeddingBagPackedSum,
EmbeddingBagOffsetsSum,
EmbeddingSegmentsSum,
ExtractImagePatches,
NO_TYPE
};
using GenericBlobMap = std::map<cldnn::primitive_id, cldnn::primitive_id>;
static LayerType LayerTypeFromStr(const std::string& str);
private:
std::vector<std::shared_ptr<cldnn::program>> m_programs;
std::shared_ptr<const cldnn::engine> m_engine;
Config m_config;
std::shared_ptr<cldnn::program> BuildProgram(InferenceEngine::CNNNetwork &network);
bool IsOpSupported(const InferenceEngine::CNNNetwork& network, const std::shared_ptr<ngraph::Node>& op);
// Profiling utils
void InitProfileInfo(const std::string& layerName,
const std::string& layerType,
bool isCPU = false,
InferenceEngine::InferenceEngineProfileInfo::LayerStatus status
= InferenceEngine::InferenceEngineProfileInfo::EXECUTED,
std::string parentId = "");
void AddPrimitiveToProfiler(cldnn::primitive_id id, const std::shared_ptr<ngraph::Node>& op,
cldnn::primitive_id customOutputId = "");
void AddPrimitiveToProfiler(const std::shared_ptr<ngraph::Node>& op,
cldnn::primitive_id customOutputId = "");
void AddInnerPrimitiveToProfiler(cldnn::primitive_id id, cldnn::primitive_id parentId,
const std::shared_ptr<ngraph::Node>& op);
static const cldnn::primitive_id m_preProcessTag;
static const cldnn::primitive_id m_weightsTag;
static const cldnn::primitive_id m_biasesTag;
static const cldnn::primitive_id m_meanValuesTag;
static const cldnn::primitive_id m_postProcessTag;
static const cldnn::primitive_id m_scalesTag;
static const cldnn::primitive_id m_workaroundTag;
static const cldnn::primitive_id m_preCustomLayerTag;
static const cldnn::primitive_id m_postCustomLayerTag;
// Graph construction helpers
void ValidateInputs(const std::shared_ptr<ngraph::Node>& op, std::vector<size_t> validInputsCount);
std::vector<cldnn::primitive_id> GetInputPrimitiveIDs(const std::shared_ptr<ngraph::Node>& op) const;
using factory_t = std::function<void(Program&, const std::shared_ptr<ngraph::Node>&)>;
using factories_map_t = std::map<ngraph::DiscreteTypeInfo, factory_t>;
enum WeightRearrangeType {
BroadcastFeatures,
FlipDeconvDims,
NO_REARRANGE
};
cldnn::format m_defaultFormat;
void InitFormat(InferenceEngine::ICNNNetwork &network);
static cldnn::resample_type ResampleTypeFromString(const std::string &str);
void Load(InferenceEngine::ICNNNetwork &network);
static cldnn::pooling_mode PoolingModeFromIEPooling(InferenceEngine::PoolingLayer::PoolType pt, bool excludePadding = false);
static cldnn::eltwise_mode EltwiseModeFromIEEltwise(InferenceEngine::EltwiseLayer::eOperation op);
static cldnn::prior_box_code_type PriorBoxCodeFromString(const std::string& str);
static cldnn::softmax::dimension_t SoftmaxDimensionFromIEAxis(const InferenceEngine::SoftMaxLayer* softmaxLayer);
cldnn::primitive_id CreatePrimitiveFromBlob(cldnn::topology& topology,
cldnn::primitive_id primID,
const InferenceEngine::Blob::Ptr pBlob,
const cldnn::layout& blobLayout,
size_t blobByteOffset = 0,
WeightRearrangeType rearrange = NO_REARRANGE);
void CreateWeightAndBiasPrimitives(cldnn::topology& topology,
const InferenceEngine::CNNLayerPtr& layer,
std::vector<cldnn::primitive_id>& weightsPrimID,
std::vector<cldnn::primitive_id>& biasesPrimID);
void CreateBinaryWeightAndBiasPrimitives(cldnn::topology& topology,
const InferenceEngine::CNNLayerPtr& layer,
std::vector<cldnn::primitive_id>& weightsPrimID,
std::vector<cldnn::primitive_id>& biasesPrimID);
void CreateScaleWeightsAndBiasesFromBN(cldnn::topology& topology,
const InferenceEngine::BatchNormalizationLayer* bnLayer,
cldnn::primitive_id& weightsPrimID,
cldnn::primitive_id& biasesPrimID);
void AddPreProcessPrimitive(InferenceEngine::InputInfo::Ptr inputInfo);
void AddInputPrimitive(cldnn::topology& topology,
InferenceEngine::InputInfo::Ptr inputInfo, InferenceEngine::Precision inputPrecision, const std::string inputName);
void AddOutputPrimitive(cldnn::topology& topology,
std::string outputName, const InferenceEngine::DataPtr outputData,
InferenceEngine::Precision outputPrecision = InferenceEngine::Precision::UNSPECIFIED);
void CreateSingleLayerPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer);
bool IsValidSplitConvMerge(const InferenceEngine::SplitLayer* splitLayer) const;
bool CanProcessDynBatch(InferenceEngine::ICNNNetwork &network) const;
static std::vector<InferenceEngine::CNNLayerPtr> GetNextLayers(const InferenceEngine::DataPtr data);
static std::vector<InferenceEngine::CNNLayerPtr> GetNextLayers(const InferenceEngine::CNNLayerPtr layer);
static InferenceEngine::CNNLayerPtr GetNextSingleLayer(const InferenceEngine::DataPtr data);
static InferenceEngine::CNNLayerPtr GetNextSingleLayer(const InferenceEngine::CNNLayerPtr layer);
void AddSingleValuePrimitive(cldnn::topology& topology, cldnn::primitive_id valPrimID, cldnn::data_types dataType, float value);
GenericBlobMap CreateGenericLayerBlobPrimitives(cldnn::topology& topology, const InferenceEngine::GenericLayer* layer);
static void ValidateGenericLayerBlobs(const InferenceEngine::GenericLayer* layer, const std::vector<std::string>& blobNames);
static bool HasParam(const std::map<std::string, std::string>& layerParams, std::string paramName) {
auto p = layerParams.find(paramName);
return p != layerParams.end();
template<typename OpType, typename std::enable_if<std::is_base_of<ngraph::Node, OpType>::value, int>::type = 0>
static void RegisterFactory(factory_t func) {
Program::factories_map.insert({OpType::type_info, func});
}
void changeInputBatch(int batch);
template<typename PType>
void AddPrimitive(PType prim) {
if (m_topology == nullptr) {
THROW_IE_EXCEPTION << "m_topology object was not created in clDNNPlugin::Program";
}
// Layer Primitive Creators
void CreatePReLUPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateBatchNormalizationPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr & layer);
void CreateFlattenPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreatePermutePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateReshapePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateNormalizePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateDetectionOutputPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreatePriorBoxPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateDeconvolutionPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateCropPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateROIPoolingPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateSimplerNMSPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateEltwisePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateConcatenatePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateSplitPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateFusedSplitConvMergePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer, bool useGroups = true);
void CreatePowerPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateSoftMaxPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateLogSoftmaxPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateFullyConnectedPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreatePoolingPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateLRNPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateActivationPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer, const LayerType type);
void CreateConvolutionPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateDeformableConvolutionPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateScaleShiftPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateProposalPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreatePSROIPoolingPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateCopyPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateResamplePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateInterpPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateInterpolatePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateYOLO2RegionPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateYOLO2ReorgPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateArgMaxMinPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer, const LayerType type);
void CreateTopKPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateMaxUnpoolingPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateMVNPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateTilePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreatePadPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateRegularLSTM(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateDynamicLSTM(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateRNNPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateLSTMCellPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void AddConstantBlobInput(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateCustomLayerPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer, CLDNNCustomLayerPtr customLayer);
void CreateGatherPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateDepthToSpacePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateSpaceToDepthPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateBatchToSpacePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateSpaceToBatchPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateShuffleChannelsPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateStridedSlicePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateBroadcastPrimitive(cldnn::topology &topology, InferenceEngine::CNNLayerPtr &layer);
void CreateReverseSequencePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateBinaryConvolutionPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateQuantizePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateGemmPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateReducePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateOneHotPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateGatherTreePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateConvertPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateConvertLikePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreatePyramidRoIAlignPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateNonMaxSuppressionPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
void CreateSelectPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer);
void CreateGRNPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer);
void CreateCTCGreedyDecoderPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer);
void CreatePriorBoxClusteredPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer);
void CreateCumSumPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer);
void CreateRoundPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer);
void CreateEmbeddingBagPackedSumPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer);
void CreateEmbeddingBagOffsetsSumPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer);
void CreateEmbeddingSegmentsSumPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer);
void CreateExtractImagePatchesPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer);
m_topology->add(prim);
}
private:
static factories_map_t factories_map;
std::vector<std::shared_ptr<cldnn::program>> m_programs;
std::shared_ptr<const cldnn::engine> m_engine;
Config m_config;
std::shared_ptr<cldnn::topology> m_topology;
InferenceEngine::InputsDataMap m_networkInputs;
InferenceEngine::OutputsDataMap m_networkOutputs;
bool queryMode;
void EnableQueryMode() { queryMode = true; }
void DisableQueryMode() { queryMode = false; }
void PrepareBuild(InferenceEngine::InputsDataMap networkInputs, InferenceEngine::OutputsDataMap networkOutputs);
void CleanupBuild();
std::shared_ptr<cldnn::program> BuildProgram(std::vector<std::shared_ptr<ngraph::Node>> ops,
InferenceEngine::InputsDataMap networkInputs,
InferenceEngine::OutputsDataMap networkOutputs);
void CreateSingleLayerPrimitive(cldnn::topology& topology, const std::shared_ptr<ngraph::Node>& op);
bool CanProcessDynBatch(std::vector<std::shared_ptr<ngraph::Node>> ops, InferenceEngine::InputsDataMap networkInputs) const;
void ChangeInputBatch(int batch);
};
void CreateCustomOp(Program& p, const std::shared_ptr<ngraph::Node>& node, CLDNNCustomLayerPtr customLayer);
void CreateUnaryEltwiseOp(Program& p, const std::shared_ptr<ngraph::Node>& node,
cldnn::activation_func func, cldnn::activation_additional_params params);
void CreateElementwiseOp(Program& p, const std::shared_ptr<ngraph::Node>& node, cldnn::eltwise_mode mode);
bool IsNodeOnConstPath(const std::shared_ptr<ngraph::Node>& node);
} // namespace CLDNNPlugin

View File

@ -28,7 +28,7 @@
namespace CLDNNPlugin {
class CLDNNRemoteAllocator;
class CLDNNRemoteBlobImpl : public gpu::details::param_map_obj_getter {
class CLDNNRemoteBlobImpl : public InferenceEngine::gpu::details::param_map_obj_getter {
friend class CLDNNRemoteAllocator;
public:
enum BlobType {
@ -40,24 +40,24 @@ public:
BT_DX_BUF_SHARED,
};
explicit CLDNNRemoteBlobImpl(gpu::ClContext::Ptr context,
const cldnn::layout& layout,
cldnn::shared_handle mem,
cldnn::shared_surface surf,
uint32_t plane = 0,
BlobType mem_type = BT_BUF_INTERNAL);
explicit CLDNNRemoteBlobImpl(InferenceEngine::gpu::ClContext::Ptr context,
const cldnn::layout& layout,
cldnn::shared_handle mem,
cldnn::shared_surface surf,
uint32_t plane = 0,
BlobType mem_type = BT_BUF_INTERNAL);
void allocate() noexcept;
bool deallocate() noexcept;
ParamMap getParams() const;
InferenceEngine::ParamMap getParams() const;
std::string getDeviceName() const noexcept;
std::shared_ptr<RemoteContext> getContext() const noexcept;
LockedMemory<void> buffer() noexcept;
LockedMemory<const void> cbuffer() const noexcept;
LockedMemory<void> rwmap()noexcept;
LockedMemory<const void> rmap() const noexcept;
LockedMemory<void> wmap()noexcept;
const std::shared_ptr<IAllocator> &getAllocator() const noexcept;
std::shared_ptr<InferenceEngine::RemoteContext> getContext() const noexcept;
InferenceEngine::LockedMemory<void> buffer() noexcept;
InferenceEngine::LockedMemory<const void> cbuffer() const noexcept;
InferenceEngine::LockedMemory<void> rwmap()noexcept;
InferenceEngine::LockedMemory<const void> rmap() const noexcept;
InferenceEngine::LockedMemory<void> wmap()noexcept;
const std::shared_ptr<InferenceEngine::IAllocator> &getAllocator() const noexcept;
void *getHandle() const noexcept { return _handle; }
bool is_allocated() const noexcept;
@ -67,7 +67,7 @@ public:
protected:
static CLDNNRemoteAllocator m_allocator;
std::weak_ptr<gpu::ClContext> m_context;
std::weak_ptr<InferenceEngine::gpu::ClContext> m_context;
// constructor stuff
cldnn::shared_handle m_mem;
@ -81,10 +81,10 @@ protected:
mutable std::unique_ptr<cldnn::pointer<uint8_t>> lockedHolder;
mutable void* _handle;
mutable std::shared_ptr<IAllocator> _allocator;
mutable std::shared_ptr<InferenceEngine::IAllocator> _allocator;
void lock() const;
void unlock() const;
void lock() const;
void unlock() const;
};
template<typename TpublicAPI>
@ -92,45 +92,44 @@ class typedCLDNNRemoteBlob : public TpublicAPI {
public:
using Ptr = std::shared_ptr<typedCLDNNRemoteBlob>;
explicit typedCLDNNRemoteBlob(gpu::ClContext::Ptr context,
const TensorDesc& desc,
const cldnn::layout& layout,
cldnn::shared_handle mem,
cldnn::shared_surface surf,
uint32_t plane,
CLDNNRemoteBlobImpl::BlobType mem_type)
: _impl(context, layout, mem,
surf,
plane, mem_type), TpublicAPI(desc) {}
explicit typedCLDNNRemoteBlob(InferenceEngine::gpu::ClContext::Ptr context,
const InferenceEngine::TensorDesc& desc,
const cldnn::layout& layout,
cldnn::shared_handle mem,
cldnn::shared_surface surf,
uint32_t plane,
CLDNNRemoteBlobImpl::BlobType mem_type)
: _impl(context, layout, mem, surf, plane, mem_type)
, TpublicAPI(desc) {}
void allocate() noexcept override { _impl.allocate(); }
bool deallocate() noexcept override { return _impl.deallocate(); }
ParamMap getParams() const override { return _impl.getParams(); }
InferenceEngine::ParamMap getParams() const override { return _impl.getParams(); }
std::string getDeviceName() const noexcept override { return _impl.getDeviceName(); }
std::shared_ptr<RemoteContext> getContext() const noexcept override { return _impl.getContext(); }
LockedMemory<void> buffer() noexcept override { return _impl.buffer(); }
LockedMemory<const void> cbuffer() const noexcept override { return _impl.cbuffer(); }
LockedMemory<void> rwmap() noexcept override { return _impl.rwmap(); }
LockedMemory<const void> rmap() const noexcept override { return _impl.rmap(); }
LockedMemory<void> wmap()noexcept override { return _impl.wmap(); }
std::shared_ptr<InferenceEngine::RemoteContext> getContext() const noexcept override { return _impl.getContext(); }
InferenceEngine::LockedMemory<void> buffer() noexcept override { return _impl.buffer(); }
InferenceEngine::LockedMemory<const void> cbuffer() const noexcept override { return _impl.cbuffer(); }
InferenceEngine::LockedMemory<void> rwmap() noexcept override { return _impl.rwmap(); }
InferenceEngine::LockedMemory<const void> rmap() const noexcept override { return _impl.rmap(); }
InferenceEngine::LockedMemory<void> wmap()noexcept override { return _impl.wmap(); }
CLDNNRemoteBlobImpl* getImpl() { return &_impl; }
protected:
const std::shared_ptr<IAllocator> &getAllocator() const noexcept override { return _impl.getAllocator(); }
const std::shared_ptr<InferenceEngine::IAllocator> &getAllocator() const noexcept override { return _impl.getAllocator(); }
void *getHandle() const noexcept override { return _impl.getHandle(); }
CLDNNRemoteBlobImpl _impl;
};
using CLDNNRemoteCLbuffer = typedCLDNNRemoteBlob<gpu::ClBufferBlob>;
using CLDNNRemoteCLImage2D = typedCLDNNRemoteBlob<gpu::ClImage2DBlob>;
using CLDNNRemoteCLbuffer = typedCLDNNRemoteBlob<InferenceEngine::gpu::ClBufferBlob>;
using CLDNNRemoteCLImage2D = typedCLDNNRemoteBlob<InferenceEngine::gpu::ClImage2DBlob>;
#ifdef WIN32
using CLDNNRemoteD3DBuffer = typedCLDNNRemoteBlob<gpu::D3DBufferBlob>;
using CLDNNRemoteD3DSurface = typedCLDNNRemoteBlob<gpu::D3DSurface2DBlob>;
using CLDNNRemoteD3DBuffer = typedCLDNNRemoteBlob<InferenceEngine::gpu::D3DBufferBlob>;
using CLDNNRemoteD3DSurface = typedCLDNNRemoteBlob<InferenceEngine::gpu::D3DSurface2DBlob>;
#else
using CLDNNRemoteVASurface = typedCLDNNRemoteBlob<gpu::VASurfaceBlob>;
using CLDNNRemoteVASurface = typedCLDNNRemoteBlob<InferenceEngine::gpu::VASurfaceBlob>;
#endif
inline CLDNNRemoteBlobImpl* getBlobImpl(gpu::ClBlob* blobPtr) {
inline CLDNNRemoteBlobImpl* getBlobImpl(InferenceEngine::gpu::ClBlob* blobPtr) {
#ifdef WIN32
{
auto ptr = blobPtr->as<CLDNNRemoteD3DSurface>();
@ -157,7 +156,7 @@ inline CLDNNRemoteBlobImpl* getBlobImpl(gpu::ClBlob* blobPtr) {
return nullptr;
}
class CLDNNRemoteAllocator : public IAllocator {
class CLDNNRemoteAllocator : public InferenceEngine::IAllocator {
protected:
friend class CLDNNRemoteBlobImpl;
std::atomic_flag _lock;
@ -181,13 +180,13 @@ public:
* @brief Maps handle to heap memory accessible by any memory manipulation routines.
* @return Generic pointer to memory
*/
void* lock(void* handle, LockOp = LOCK_FOR_WRITE) noexcept override { return nullptr; };
void* lock(void* handle, InferenceEngine::LockOp = InferenceEngine::LOCK_FOR_WRITE) noexcept override { return nullptr; };
/**
* @brief Unmaps memory by handle with multiple sequential mappings of the same handle.
* The multiple sequential mappings of the same handle are suppose to get the same
* result while there isn't a ref counter supported.
*/
void unlock(void* handle) noexcept override;
void unlock(void* handle) noexcept override;
/**
* @brief Allocates memory
* @param size The size in bytes to allocate
@ -198,12 +197,12 @@ public:
* @brief Releases handle and all associated memory resources which invalidates the handle.
* @return false if handle cannot be released, otherwise - true.
*/
bool free(void* handle) noexcept override { return true; }
bool free(void* handle) noexcept override { return true; }
void Release() noexcept override {}
};
class CLDNNExecutionContextImpl : public gpu::details::param_map_obj_getter {
class CLDNNExecutionContextImpl : public InferenceEngine::gpu::details::param_map_obj_getter {
public:
enum ContextType {
OCL,
@ -213,17 +212,17 @@ public:
using Ptr = std::shared_ptr<CLDNNExecutionContextImpl>;
using CPtr = std::shared_ptr<const CLDNNExecutionContextImpl>;
explicit CLDNNExecutionContextImpl(std::shared_ptr<IInferencePlugin> plugin,
const ParamMap& params,
const Config& config = {});
explicit CLDNNExecutionContextImpl(std::shared_ptr<InferenceEngine::IInferencePlugin> plugin,
const InferenceEngine::ParamMap& params,
const Config& config = {});
ParamMap getParams() const;
InferenceEngine::ParamMap getParams() const;
std::string getDeviceName() const noexcept;
std::shared_ptr<cldnn::engine> GetEngine() const { return m_engine; }
Config& GetConfig() { return m_config; }
ContextType GetType() const { return m_type; }
const std::weak_ptr<IInferencePlugin> GetPlugin() const { return m_plugin; }
const std::weak_ptr<InferenceEngine::IInferencePlugin> GetPlugin() const { return m_plugin; }
void acquire_lock() {
while (lock.test_and_set(std::memory_order_acquire)) {}
@ -235,11 +234,11 @@ public:
protected:
std::shared_ptr<cldnn::engine> m_engine;
gpu_handle_param m_va_display;
InferenceEngine::gpu_handle_param m_va_display;
Config m_config;
ContextType m_type;
std::weak_ptr<IInferencePlugin> m_plugin;
std::weak_ptr<InferenceEngine::IInferencePlugin> m_plugin;
std::atomic_flag lock;
};
@ -263,18 +262,19 @@ class typedCLDNNExecutionContext : public TpublicContextAPI,
#else
using surf_key = _Key<cldnn::shared_surface, uint32_t>;
#endif
std::map<surf_key, RemoteBlob::Ptr> shared_surf_reg;
std::map<cldnn::shared_handle, RemoteBlob::Ptr> shared_obj_reg;
std::map<surf_key, InferenceEngine::RemoteBlob::Ptr> shared_surf_reg;
std::map<cldnn::shared_handle, InferenceEngine::RemoteBlob::Ptr> shared_obj_reg;
RemoteBlob::Ptr reuse_surf(const TensorDesc& tensorDesc,
const ParamMap& params) {
RemoteBlob::Ptr ret = nullptr;
uint32_t plane = gpu::details::param_map_obj_getter::_ObjFromParamSimple<uint32_t>(params, GPU_PARAM_KEY(VA_PLANE));
InferenceEngine::RemoteBlob::Ptr reuse_surf(const InferenceEngine::TensorDesc& tensorDesc, const InferenceEngine::ParamMap& params) {
using namespace InferenceEngine;
using InferenceEngine::gpu::details::param_map_obj_getter;
InferenceEngine::RemoteBlob::Ptr ret = nullptr;
uint32_t plane = param_map_obj_getter::_ObjFromParamSimple<uint32_t>(params, GPU_PARAM_KEY(VA_PLANE));
#ifdef WIN32
cldnn::shared_handle mem = gpu::details::param_map_obj_getter::_ObjFromParamSimple<cldnn::shared_handle>(params, GPU_PARAM_KEY(DEV_OBJECT_HANDLE));
cldnn::shared_handle mem = param_map_obj_getter::_ObjFromParamSimple<cldnn::shared_handle>(params, GPU_PARAM_KEY(DEV_OBJECT_HANDLE));
surf_key skey(mem, plane);
#else
cldnn::shared_surface surf = gpu::details::param_map_obj_getter::_ObjFromParamSimple<cldnn::shared_surface>(params, GPU_PARAM_KEY(DEV_OBJECT_HANDLE));
cldnn::shared_surface surf = param_map_obj_getter::_ObjFromParamSimple<cldnn::shared_surface>(params, GPU_PARAM_KEY(DEV_OBJECT_HANDLE));
surf_key skey(surf, plane);
#endif
_impl.acquire_lock();
@ -289,7 +289,7 @@ class typedCLDNNExecutionContext : public TpublicContextAPI,
ImageFormatFromLayout(tensorDesc.getLayout()),
CldnnTensorFromIEDims(tensorDesc.getDims()));
auto smart_this =
std::dynamic_pointer_cast<gpu::ClContext>
std::dynamic_pointer_cast<InferenceEngine::gpu::ClContext>
(std::enable_shared_from_this<typedCLDNNExecutionContext<TpublicContextAPI>>::shared_from_this());
#ifdef WIN32
ret = std::make_shared<CLDNNRemoteD3DSurface>(smart_this,
@ -307,10 +307,10 @@ class typedCLDNNExecutionContext : public TpublicContextAPI,
return ret;
}
RemoteBlob::Ptr reuse_obj(const TensorDesc& tensorDesc,
cldnn::shared_handle mem,
CLDNNRemoteBlobImpl::BlobType blob_type) {
RemoteBlob::Ptr ret = nullptr;
InferenceEngine::RemoteBlob::Ptr reuse_obj(const InferenceEngine::TensorDesc& tensorDesc,
cldnn::shared_handle mem,
CLDNNRemoteBlobImpl::BlobType blob_type) {
InferenceEngine::RemoteBlob::Ptr ret = nullptr;
_impl.acquire_lock();
@ -321,26 +321,23 @@ class typedCLDNNExecutionContext : public TpublicContextAPI,
} else {
// unlickily, not found - create new and insert into registry
cldnn::layout layout(DataTypeFromPrecision(tensorDesc.getPrecision()),
FormatFromLayout(tensorDesc.getLayout()),
CldnnTensorFromIEDims(tensorDesc.getDims()));
FormatFromLayout(tensorDesc.getLayout()),
CldnnTensorFromIEDims(tensorDesc.getDims()));
auto smart_this =
std::dynamic_pointer_cast<gpu::ClContext>
std::dynamic_pointer_cast<InferenceEngine::gpu::ClContext>
(std::enable_shared_from_this<typedCLDNNExecutionContext<TpublicContextAPI>>::shared_from_this());
switch (blob_type) {
case CLDNNRemoteBlobImpl::BlobType::BT_BUF_SHARED:
ret = std::make_shared<CLDNNRemoteCLbuffer>(smart_this,
tensorDesc, layout, mem, 0, 0, blob_type);
ret = std::make_shared<CLDNNRemoteCLbuffer>(smart_this, tensorDesc, layout, mem, 0, 0, blob_type);
break;
case CLDNNRemoteBlobImpl::BlobType::BT_IMG_SHARED:
layout.format = ImageFormatFromLayout(tensorDesc.getLayout());
ret = std::make_shared<CLDNNRemoteCLImage2D>(smart_this,
tensorDesc, layout, mem, 0, 0, blob_type);
ret = std::make_shared<CLDNNRemoteCLImage2D>(smart_this, tensorDesc, layout, mem, 0, 0, blob_type);
break;
#ifdef WIN32
case CLDNNRemoteBlobImpl::BlobType::BT_DX_BUF_SHARED:
ret = std::make_shared<CLDNNRemoteD3DBuffer>(smart_this,
tensorDesc, layout, mem, 0, 0, blob_type);
ret = std::make_shared<CLDNNRemoteD3DBuffer>(smart_this, tensorDesc, layout, mem, 0, 0, blob_type);
break;
#endif
default:
@ -353,17 +350,17 @@ class typedCLDNNExecutionContext : public TpublicContextAPI,
return ret;
}
RemoteBlob::Ptr create_buffer(const TensorDesc& tensorDesc) {
InferenceEngine::RemoteBlob::Ptr create_buffer(const InferenceEngine::TensorDesc& tensorDesc) {
cldnn::layout layout(DataTypeFromPrecision(tensorDesc.getPrecision()),
FormatFromLayout(tensorDesc.getLayout()),
CldnnTensorFromIEDims(tensorDesc.getDims()));
auto smart_this = std::dynamic_pointer_cast<gpu::ClContext>
FormatFromLayout(tensorDesc.getLayout()),
CldnnTensorFromIEDims(tensorDesc.getDims()));
auto smart_this = std::dynamic_pointer_cast<InferenceEngine::gpu::ClContext>
(std::enable_shared_from_this<typedCLDNNExecutionContext<TpublicContextAPI>>::shared_from_this());
return std::make_shared<CLDNNRemoteCLbuffer>(smart_this,
tensorDesc,
layout,
nullptr, 0, 0,
CLDNNRemoteBlobImpl::BlobType::BT_BUF_INTERNAL);
tensorDesc,
layout,
nullptr, 0, 0,
CLDNNRemoteBlobImpl::BlobType::BT_BUF_INTERNAL);
}
void check_if_shared() {
@ -374,21 +371,23 @@ public:
using Ptr = std::shared_ptr<typedCLDNNExecutionContext>;
using CPtr = std::shared_ptr<const typedCLDNNExecutionContext>;
explicit typedCLDNNExecutionContext(std::shared_ptr<IInferencePlugin> plugin,
const ParamMap& params,
const Config& config = {})
explicit typedCLDNNExecutionContext(std::shared_ptr<InferenceEngine::IInferencePlugin> plugin,
const InferenceEngine::ParamMap& params,
const Config& config = {})
: _impl(plugin, params, config) {}
ParamMap getParams() const noexcept override { return _impl.getParams(); }
InferenceEngine::ParamMap getParams() const noexcept override { return _impl.getParams(); }
std::string getDeviceName() const noexcept override { return _impl.getDeviceName(); }
RemoteBlob::Ptr CreateBlob(const TensorDesc& tensorDesc, const ParamMap& params = {}) override {
InferenceEngine::RemoteBlob::Ptr CreateBlob(const InferenceEngine::TensorDesc& tensorDesc, const InferenceEngine::ParamMap& params = {}) override {
using namespace InferenceEngine;
using InferenceEngine::gpu::details::param_map_obj_getter;
if (params.empty()) {
// user wants clDNN to allocate blob by itself and return handle
return create_buffer(tensorDesc);
} else {
// user will supply shared object handle
std::string memTypeStr = gpu::details::param_map_obj_getter::_StrFromParams(params, GPU_PARAM_KEY(SHARED_MEM_TYPE));
std::string memTypeStr = param_map_obj_getter::_StrFromParams(params, GPU_PARAM_KEY(SHARED_MEM_TYPE));
if (GPU_PARAM_VALUE(VA_SURFACE) == memTypeStr) {
check_if_shared();
@ -399,14 +398,14 @@ public:
if (GPU_PARAM_VALUE(OCL_BUFFER) == memTypeStr) {
blob_type = CLDNNRemoteBlobImpl::BlobType::BT_BUF_SHARED;
mem = gpu::details::param_map_obj_getter::_ObjFromParamSimple<cldnn::shared_handle>(params, GPU_PARAM_KEY(MEM_HANDLE));
mem = param_map_obj_getter::_ObjFromParamSimple<cldnn::shared_handle>(params, GPU_PARAM_KEY(MEM_HANDLE));
} else if (GPU_PARAM_VALUE(OCL_IMAGE2D) == memTypeStr) {
blob_type = CLDNNRemoteBlobImpl::BlobType::BT_IMG_SHARED;
mem = gpu::details::param_map_obj_getter::_ObjFromParamSimple<cldnn::shared_handle>(params, GPU_PARAM_KEY(MEM_HANDLE));
mem = param_map_obj_getter::_ObjFromParamSimple<cldnn::shared_handle>(params, GPU_PARAM_KEY(MEM_HANDLE));
#ifdef WIN32
} else if (GPU_PARAM_VALUE(DX_BUFFER) == memTypeStr) {
blob_type = CLDNNRemoteBlobImpl::BlobType::BT_DX_BUF_SHARED;
mem = gpu::details::param_map_obj_getter::_ObjFromParamSimple<cldnn::shared_handle>(params, GPU_PARAM_KEY(DEV_OBJECT_HANDLE));
mem = param_map_obj_getter::_ObjFromParamSimple<cldnn::shared_handle>(params, GPU_PARAM_KEY(DEV_OBJECT_HANDLE));
check_if_shared();
#endif
} else {
@ -426,14 +425,14 @@ protected:
CLDNNExecutionContextImpl _impl;
};
using CLDNNRemoteCLContext = typedCLDNNExecutionContext<gpu::ClContext>;
using CLDNNRemoteCLContext = typedCLDNNExecutionContext<InferenceEngine::gpu::ClContext>;
#ifdef WIN32
using CLDNNRemoteD3DContext = typedCLDNNExecutionContext<gpu::D3DContext>;
using CLDNNRemoteD3DContext = typedCLDNNExecutionContext<InferenceEngine::gpu::D3DContext>;
#else
using CLDNNRemoteVAContext = typedCLDNNExecutionContext<gpu::VAContext>;
using CLDNNRemoteVAContext = typedCLDNNExecutionContext<InferenceEngine::gpu::VAContext>;
#endif
inline CLDNNExecutionContextImpl* getContextImpl(gpu::ClContext::Ptr ctxPtr) {
inline CLDNNExecutionContextImpl* getContextImpl(InferenceEngine::gpu::ClContext::Ptr ctxPtr) {
#ifdef WIN32
{
auto ptr = ctxPtr->as<CLDNNRemoteD3DContext>();

View File

@ -1,326 +0,0 @@
// Copyright (C) 2018-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <iostream>
#include <iomanip>
#ifndef NDEBUG
#include <algorithm>
#include <cmath>
#endif
#include "debug_options.h"
namespace CLDNNPlugin {
DebugOptions::DebugOptions() {
m_bDebugLayerContent =
#ifdef _DEBUG_LAYER_CONTENT
true;
#else
false;
#endif
m_bDebugLayerContentIndexed =
#ifdef _DEBUG_LAYER_CONTENT_INDEXED
true;
#else
false;
#endif
m_bDebugLayerFormat =
#ifdef _DEBUG_LAYER_FORMAT
true;
#else
false;
#endif
m_bPluginPerfPrints =
#ifdef _PLUGIN_PERF_PRINTS
true;
#else
false;
#endif
m_maxPrintSize =
#ifdef _DEBUG_LAYER_CONTENT_FULL
1000000000;
#else
3;
#endif
}
void DebugOptions::PrintOptions() const {
#ifndef NDEBUG
std::cout << "Debug Options:" << std::endl;
std::cout << "\tDebug Layer Content: " << m_bDebugLayerContent << std::endl;
std::cout << "\tDebug Layer Content Indexed: " << m_bDebugLayerContentIndexed << std::endl;
std::cout << "\tDebug Layers Format: " << m_bDebugLayerFormat << std::endl;
std::cout << "\tPlugin Performance Prints: " << m_bPluginPerfPrints << std::endl;
std::cout << "\tPrint Size: " << m_maxPrintSize << std::endl;
#endif // NDEBUG
}
std::string DebugOptions::GetFormatName(cldnn::format::type format) {
switch (format) {
case cldnn::format::yxfb:
return "yxfb";
case cldnn::format::byxf:
return "byxf";
case cldnn::format::bfyx:
return "bfyx";
case cldnn::format::fyxb:
return "fyxb";
default:
return "Unknown Format";
}
}
std::string DebugOptions::GetDataTypeName(cldnn::data_types dataType) {
switch (dataType) {
case cldnn::data_types::f16:
return "f16";
case cldnn::data_types::f32:
return "f32";
default:
return "Unknown Data Type";
}
}
void DebugOptions::PrintInput(const InferenceEngine::TBlob<float>& input) const {
#ifndef NDEBUG
const float* inputBlobPtr = input.readOnly();
if (m_bDebugLayerContent) {
std::cout << "Input (" << input.size() << ") = ";
for (size_t i = 0; i < std::min<size_t>(m_maxPrintSize, input.size()); i++) {
std::cout << inputBlobPtr[i] << ", ";
}
std::cout << std::endl;
}
#endif // NDEBUG
}
float DebugOptions::SimpleConvertFP16toFP32(uint16_t u16val) {
#ifndef NDEBUG
// convert to fp32 (1,5,10)->(1,8,23)
// trivial conversion not handling inf/denorm
uint32_t sign = (u16val & 0x8000U) << 16;
uint32_t mantissa = (u16val & 0x3FFU) << 13;
uint32_t exp_val_f16 = (u16val & 0x7C00U) >> 10;
uint32_t exp = (exp_val_f16 == 0x1FU ? 0xFFU : exp_val_f16 + 127 - 15) << 23;;
uint32_t val = sign | exp | mantissa;
float fval = *(reinterpret_cast<float*>(&val));
return (fabs(fval) < 1e-4f) ? 0.0f : fval; // clamp epsilon fp16 to 0
#endif // NDEBUG
return 0;
}
void DebugOptions::PrintIndexedValue(const cldnn::memory& mem, const cldnn::tensor index) const {
#ifndef NDEBUG
auto layout = mem.get_layout();
float fval;
switch (layout.data_type) {
case cldnn::data_types::f32: {
auto p32 = mem.pointer<float>();
auto resPtrF32 = p32.data();
fval = resPtrF32[CalcLinearIndex(layout, index)];
}
break;
case cldnn::data_types::f16:
{
auto p16 = mem.pointer<uint16_t>();
auto resPtrU16 = p16.data();
fval = SimpleConvertFP16toFP32(resPtrU16[CalcLinearIndex(layout, index)]);
}
break;
default:
assert(0); // unhandled data type
fval = 0.0f;
}
if (m_bDebugLayerContentIndexed) {
std::cout << "\t[";
for (size_t i = 0; i < index.raw.size(); i++) {
std::cout << index.raw[i] << ",";
}
std::cout << "] = " << fval << "\n";
} else {
std::cout << fval << ", ";
}
#endif // NDEBUG
}
uint32_t DebugOptions::CalcLinearIndex(const cldnn::layout& memLayout, const cldnn::tensor index) {
#ifndef NDEBUG
uint32_t bPitch, fPitch, xPitch, yPitch;
switch (memLayout.format) {
case cldnn::format::yxfb:
bPitch = 1;
fPitch = memLayout.size.batch[0] * bPitch;
xPitch = memLayout.size.feature[0] * fPitch;
yPitch = memLayout.size.spatial[1] * xPitch;
return (index.batch[0] * bPitch)
+ (index.feature[0] * fPitch)
+ (index.spatial[1] * xPitch)
+ (index.spatial[0] * yPitch);
break;
case cldnn::format::bfyx:
xPitch = 1;
yPitch = memLayout.size.spatial[1] * xPitch;
fPitch = memLayout.size.spatial[0] * yPitch;
bPitch = memLayout.size.feature[0] * fPitch;
return (index.batch[0] * bPitch)
+ (index.feature[0] * fPitch)
+ (index.spatial[1] * xPitch)
+ (index.spatial[0] * yPitch);
break;
default:
assert(0);
return 0;
}
#endif // NDEBUG
return 0;
}
void DebugOptions::PrintNetworkOutputs(std::map<cldnn::primitive_id, cldnn::network_output>& outputsMap) const {
#ifndef NDEBUG
if (!m_bDebugLayerContent && !m_bDebugLayerFormat) {
return;
}
for (auto& layer : outputsMap) {
std::cout << layer.first << ":\n";
auto mem = layer.second.get_memory();
auto layout = mem.get_layout();
if (m_bDebugLayerFormat) {
std::string formatName = GetFormatName(layout.format);
std::string datatypeName = GetDataTypeName(layout.data_type);
std::cout << " Layout: ( " <<
GetDataTypeName(layout.data_type) << ", " <<
GetFormatName(layout.format) << ", [";
for (auto s : layout.size.sizes()) {
std::cout << s << ",";
}
std::cout << "] )\n";
}
if (m_bDebugLayerContent) {
DumpSingleOutput(layer.first, outputsMap);
std::cout << "\n";
}
}
#endif // NDEBUG
}
void DebugOptions::DumpSingleOutput(cldnn::primitive_id name, std::map<cldnn::primitive_id, cldnn::network_output>& outputs, bool bSingleFeatureMap) const {
#ifndef NDEBUG
if (outputs.find(name) == outputs.end()) {
std::cout << "Couldn't find output: " << name << std::endl;
return;
}
auto output = outputs.at(name);
std::cout << name << ":\n";
auto mem = output.get_memory();
auto layout = mem.get_layout();
cldnn::tensor lowerPad = layout.data_padding.lower_size();
cldnn::tensor upperPad = layout.data_padding.upper_size();
{ // format
std::string formatName = GetFormatName(layout.format);
std::string datatypeName = GetDataTypeName(layout.data_type);
std::cout << " Layout: ( " <<
GetDataTypeName(layout.data_type) << ", " <<
GetFormatName(layout.format) << ", [";
for (auto s : layout.size.sizes()) {
std::cout << s << ",";
}
std::cout << "] [";
for (auto p : layout.data_padding.lower_size().sizes()) {
std::cout << p << ",";
}
std::cout << "] [";
for (auto p : layout.data_padding.upper_size().sizes()) {
std::cout << p << ",";
}
std::cout << "] )\n";
}
{ // content
switch (layout.format) {
case cldnn::format::bfyx:
{
std::vector<size_t> pitches;
size_t elements = 1;
if (bSingleFeatureMap) {
elements = layout.size.spatial[1] * layout.size.spatial[0];
} else {
for (int i = 0; i < 4; i++) {
elements *= layout.size.sizes()[i] + lowerPad.sizes()[i] + upperPad.sizes()[i];
}
}
pitches.push_back(layout.size.spatial[0] + lowerPad.spatial[0] + upperPad.spatial[0]); // x or width - rowpitch
pitches.push_back(pitches[0] * (layout.size.spatial[1] + lowerPad.spatial[1] + upperPad.spatial[1])); // slice pitch
pitches.push_back(pitches[0] * pitches[1] * layout.size.feature[0]); // depth/feature pitch
if (layout.data_type == cldnn::data_types::f32)
DumpElementsRaw<float>(mem, pitches, elements);
else
DumpElementsRaw<uint16_t>(mem, pitches, elements);
break;
}
default:
assert(0); // unhandled format
return;
}
std::cout << "\n";
}
#endif // NDEBUG
}
void DebugOptions::AddTimedEvent(std::string eventName, std::string startingAt) {
#ifdef _PLUGIN_PERF_PRINTS
m_TimedEventTimestamp[eventName] = std::chrono::steady_clock::now();
if (startingAt.compare(std::string()) == 0) {
startingAt = eventName;
}
m_TimedEventStart[eventName] = startingAt;
#endif // _PLUGIN_PERF_PRINTS
}
void DebugOptions::PrintTimedEvents() {
#ifdef _PLUGIN_PERF_PRINTS
for (auto& e : m_TimedEventStart) {
if (e.first.compare(e.second)) {
std::cout << "[Plugin Internal Metric]: \t" << e.first << " took: " <<
std::chrono::duration_cast<std::chrono::duration<double, std::chrono::milliseconds::period>>
(m_TimedEventTimestamp[e.first] - m_TimedEventTimestamp[e.second]).count() << " ms\n";
}
}
#endif // _PLUGIN_PERF_PRINTS
}
void DebugOptions::ClearTimedEvents() {
#ifdef _PLUGIN_PERF_PRINTS
m_TimedEventStart.clear();
m_TimedEventTimestamp.clear();
#endif // _PLUGIN_PERF_PRINTS
}
void DebugOptions::EnableWA(std::string name) {
#ifndef NDEBUG
m_workaroundNames.insert(name);
#endif // NDEBUG
}
void DebugOptions::DisableWA(std::string name) {
#ifndef NDEBUG
m_workaroundNames.erase(name);
#endif // NDEBUG
}
bool DebugOptions::IsWAActive(std::string name) {
#ifndef NDEBUG
return (m_workaroundNames.find(name) != m_workaroundNames.end());
#else
return false;
#endif // NDEBUG
}
}; // namespace CLDNNPlugin

View File

@ -1,85 +0,0 @@
// Copyright (C) 2018-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <vector>
#include <iostream>
#include <iomanip>
#include <string>
#include <set>
#include <map>
#include <algorithm>
#include "cpp/ie_cnn_network.h"
#include <api/memory.hpp>
#include <api/primitive.hpp>
#include <api/network.hpp>
// Debugging options flags
// #define _DEBUG_LAYER_CONTENT
// #define _DEBUG_LAYER_CONTENT_FULL
// #define _DEBUG_LAYER_FORMAT
// #define _PLUGIN_PERF_PRINTS
namespace CLDNNPlugin {
class DebugOptions {
public:
bool m_bDebugLayerContent;
bool m_bDebugLayerContentIndexed;
bool m_bDebugLayerFormat;
bool m_bPluginPerfPrints;
cldnn::tensor::value_type m_maxPrintSize;
DebugOptions();
void PrintOptions() const;
static std::string GetFormatName(cldnn::format::type format);
static std::string GetDataTypeName(cldnn::data_types dataType);
void PrintInput(const InferenceEngine::TBlob<float>& input) const;
void PrintIndexedValue(const cldnn::memory& mem, const cldnn::tensor index) const;
static uint32_t CalcLinearIndex(const cldnn::layout& memLayout, const cldnn::tensor index);
void PrintNetworkOutputs(std::map<cldnn::primitive_id, cldnn::network_output>& outputsMap) const;
void DumpSingleOutput(cldnn::primitive_id name, std::map<cldnn::primitive_id, cldnn::network_output>& outputs, bool bSingleFeatureMap = false)const;
// the functions below will work in release unlike the rest
void AddTimedEvent(std::string eventName, std::string startingAt = std::string());
void PrintTimedEvents();
void ClearTimedEvents();
void EnableWA(std::string name);
void DisableWA(std::string name);
bool IsWAActive(std::string name);
protected:
std::map<std::string, std::chrono::steady_clock::time_point> m_TimedEventTimestamp;
std::map<std::string, std::string> m_TimedEventStart;
std::set<std::string> m_workaroundNames;
static float SimpleConvertFP16toFP32(uint16_t u16val);
template <typename T>
static void DumpElementsRaw(cldnn::memory& mem, const std::vector<size_t>& pitches, size_t numElements) {
#ifndef NDEBUG
auto layout = mem.get_layout();
auto ptr = mem.pointer<T>();
auto data = ptr.data(); // +offset;
auto elements = std::min(layout.count(), numElements);
for (size_t i = 0; i < elements;) {
// size_t linearAddress = ... // todo calc linear with pitches
std::cout << std::setprecision(10)
<< ((layout.data_type == cldnn::data_types::f32) ? data[i] : cldnn::half_to_float(uint16_t(data[i])))
<< ", ";
i++;
for (auto& pitch : pitches) {
if ((i % pitch) == 0) {
std::cout << std::endl;
}
}
}
#endif // NDEBUG
}
};
}; // namespace CLDNNPlugin

View File

@ -0,0 +1,53 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "ngraph/op/batch_to_space.hpp"
#include "ngraph/op/constant.hpp"
#include "api/batch_to_space.hpp"
namespace CLDNNPlugin {
void CreateBatchToSpaceOp(Program& p, const std::shared_ptr<ngraph::op::v1::BatchToSpace>& op) {
p.ValidateInputs(op, {4});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
auto rank = op->get_input_shape(0).size();
auto format = DefaultFormatForDims(rank);
std::vector<cldnn::tensor> inputs;
inputs.reserve(3);
for (size_t i = 1; i < 4; ++i) {
auto inConst = std::dynamic_pointer_cast<ngraph::op::Constant>(op->get_input_node_shared_ptr(i));
if (!inConst)
THROW_IE_EXCEPTION << "Unsupported parameter nodes type in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
std::vector<int32_t> sizes = inConst->cast_vector<int32_t>();
int32_t default_size = i == 1 ? 1 : 0;
for (size_t s = sizes.size(); s < rank; s++) {
sizes.push_back(default_size);
}
inputs.emplace_back(format, sizes, default_size);
}
auto out_size = CldnnTensorFromIEDims(op->get_output_shape(0));
auto batchToSpacePrim = cldnn::batch_to_space(layerName,
inputPrimitives[0], // input
inputs[0], // block_shape
inputs[1], // crops_begin
inputs[2], // crops_end
out_size);
p.AddPrimitive(batchToSpacePrim);
p.AddPrimitiveToProfiler(op);
}
REGISTER_FACTORY_IMPL(v1, BatchToSpace);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,107 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "ngraph/op/broadcast.hpp"
#include "ngraph/op/constant.hpp"
#include "api/broadcast.hpp"
#include "api/reorder.hpp"
#include "api/reshape.hpp"
namespace CLDNNPlugin {
static void CreateCommonBroadcastOp(Program& p, const std::shared_ptr<ngraph::Node>& op, const ngraph::AxisSet axis_mapping) {
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
auto inputShape = op->get_input_shape(0);
auto outputShape = op->get_output_shape(0);
auto inputRank = inputShape.size();
auto outputRank = outputShape.size();
auto inputPrimitive = inputPrimitives[0];
if (inputRank != outputRank) {
// Add reorder if changing number of dimensions requires changing format
auto targetFormat = DefaultFormatForDims(outputRank);
if (targetFormat.value != DefaultFormatForDims(inputRank).value) {
auto reorderName = layerName + "_cldnn_in_reorder";
auto targetDatatype = DataTypeFromPrecision(op->get_input_element_type(0));
auto reorderPrim = cldnn::reorder(reorderName, inputPrimitive, targetFormat, targetDatatype);
p.AddPrimitive(reorderPrim);
p.AddInnerPrimitiveToProfiler(reorderName, layerName, op);
inputPrimitive = reorderName;
}
auto reshapeName = layerName + "_cldnn_in_reshape";
// Extend input dimensions with ones
if (axis_mapping.empty()) {
// If axis_mapping is not specified, then we prepend shape with neccesary count of 1-s
inputShape.insert(inputShape.begin(), outputRank - inputRank, 1ul);
} else {
// If axis_mapping is specified, then ones are inserted according to it.
ngraph::Shape tmp_shape;
int prev_axis = -1;
int next_axis = -1;
size_t currentRank = 0;
for (auto& axis : axis_mapping) {
prev_axis = next_axis;
next_axis = static_cast<int>(axis);
int ones_count = std::max(next_axis - prev_axis - 1, 0);
tmp_shape.insert(tmp_shape.begin() + currentRank, ones_count, 1ul);
tmp_shape.push_back(outputShape[axis]);
currentRank += ones_count + 1;
}
inputShape = tmp_shape;
}
auto targetShape = CldnnTensorFromIEDims(inputShape);
auto reshapePrim = cldnn::reshape(reshapeName, inputPrimitive, targetShape);
p.AddPrimitive(reshapePrim);
p.AddInnerPrimitiveToProfiler(reshapeName, layerName, op);
inputPrimitive = reshapeName;
}
auto broadcastPrim = cldnn::broadcast(layerName,
inputPrimitive,
CldnnTensorFromIEDims(op->get_output_shape(0)));
p.AddPrimitive(broadcastPrim);
p.AddPrimitiveToProfiler(op);
}
void CreateBroadcastOp(Program& p, const std::shared_ptr<ngraph::op::v1::Broadcast>& op) {
p.ValidateInputs(op, {2, 3});
if (op->get_broadcast_spec().m_type == ngraph::op::AutoBroadcastType::NONE && op->get_input_size() == 3) {
auto axis_mapping_node = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(2));
if (!axis_mapping_node)
THROW_IE_EXCEPTION << "Unsupported parameter nodes type in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
auto axis_mapping = axis_mapping_node->get_axis_set_val();
CreateCommonBroadcastOp(p, op, axis_mapping);
} else {
// TODO: check if axis_mapping is not needed in these cases and prepending input shape with ones works fine in all cases
CreateCommonBroadcastOp(p, op, {});
}
}
void CreateBroadcastOp(Program& p, const std::shared_ptr<ngraph::op::v3::Broadcast>& op) {
p.ValidateInputs(op, {2, 3});
CreateCommonBroadcastOp(p, op, op->get_broadcast_axes().second);
}
REGISTER_FACTORY_IMPL(v1, Broadcast);
REGISTER_FACTORY_IMPL(v3, Broadcast);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,56 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "ngraph/op/concat.hpp"
#include "api/concatenation.hpp"
namespace CLDNNPlugin {
static cldnn::concatenation::concatenation_axis GetConcatAxis(int32_t axis, size_t rank) {
if (axis >= rank)
THROW_IE_EXCEPTION << "Concatenation axis exceeds number of dimensions";
// Difference in dimension ordering between IE and clDNN,
// reverse spatial dimensions after batch and feature.
unsigned cldnn_axis = axis;
if (axis >= 2) {
auto spatial_axis = axis - 2;
// Default and minimum number of dimensions is 4
auto spatial_size = std::max<size_t>(rank, 4) - 2;
cldnn_axis = spatial_size - spatial_axis - 1 + 2;
}
switch (cldnn_axis) {
case 0: return cldnn::concatenation::concatenation_axis::along_b;
case 1: return cldnn::concatenation::concatenation_axis::along_f;
case 2: return cldnn::concatenation::concatenation_axis::along_x;
case 3: return cldnn::concatenation::concatenation_axis::along_y;
case 4: return cldnn::concatenation::concatenation_axis::along_z;
case 5: return cldnn::concatenation::concatenation_axis::along_w;
default: THROW_IE_EXCEPTION << "Unsupported concatenation axis: " << axis;
}
return cldnn::concatenation::concatenation_axis::along_f; // shouldn't get here
}
void CreateConcatOp(Program& p, const std::shared_ptr<ngraph::op::v0::Concat>& op) {
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
auto concatPrim = cldnn::concatenation(
layerName,
inputPrimitives,
GetConcatAxis(op->get_axis(), op->get_input_shape(0).size()),
DataTypeFromPrecision(op->get_output_element_type(0)));
p.AddPrimitive(concatPrim);
p.AddPrimitiveToProfiler(op);
}
REGISTER_FACTORY_IMPL(v0, Concat);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,190 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "ngraph/op/constant.hpp"
#include "ngraph/op/convolution.hpp"
#include "ngraph/op/binary_convolution.hpp"
#include "ngraph/op/deformable_convolution.hpp"
#include "ngraph/op/group_conv.hpp"
#include "ngraph/op/concat.hpp"
#include "ngraph/op/squared_difference.hpp"
#include "ngraph/op/gather.hpp"
#include "ngraph/op/split.hpp"
#include "ngraph/op/variadic_split.hpp"
#include "ngraph/op/util/op_types.hpp"
#include "api/data.hpp"
namespace CLDNNPlugin {
struct ConstProperties {
bool isWeights;
bool hasGroupDimension;
bool reversedChannelsOrder;
};
static ConstProperties getConstProperties(const std::shared_ptr<ngraph::op::Constant>& op) {
for (size_t i = 0; i < op->get_output_size(); i++) {
auto outTensors = op->get_output_target_inputs(i);
for (auto& t : outTensors) {
auto outOp = t.get_node();
if (dynamic_cast<ngraph::op::v1::Convolution*>(outOp)) {
return {true, false, false};
} else if (dynamic_cast<ngraph::op::v1::BinaryConvolution*>(outOp)) {
return {true, false, false};
} else if (auto castedOp = dynamic_cast<ngraph::op::v1::DeformableConvolution*>(outOp)) {
return {true, castedOp->get_group() > 1, false};
} else if (dynamic_cast<ngraph::op::v1::GroupConvolution*>(outOp)) {
return {true, true, false};
} else if (dynamic_cast<ngraph::op::v1::ConvolutionBackpropData*>(outOp)) {
return {true, false, true};
} else if (dynamic_cast<ngraph::op::v1::GroupConvolutionBackpropData*>(outOp)) {
return {true, true, true};
}
}
}
return {false, false, false};
}
void CreateConstantOp(Program& p, const std::shared_ptr<ngraph::op::v0::Constant>& op) {
auto constDims = op->get_shape();
cldnn::tensor constTensor;
switch (constDims.size()) {
case 6: constTensor = cldnn::tensor(TensorValue(constDims[0]), TensorValue(constDims[1]),
TensorValue(constDims[5]), TensorValue(constDims[4]),
TensorValue(constDims[3]), TensorValue(constDims[2]));
break;
case 5: constTensor = cldnn::tensor(TensorValue(constDims[0]), TensorValue(constDims[1]),
TensorValue(constDims[4]), TensorValue(constDims[3]), TensorValue(constDims[2]));
break;
case 4: constTensor = cldnn::tensor(TensorValue(constDims[0]), TensorValue(constDims[1]),
TensorValue(constDims[3]), TensorValue(constDims[2]));
break;
case 3: constTensor = cldnn::tensor(TensorValue(constDims[0]), TensorValue(constDims[1]),
1, TensorValue(constDims[2]));
break;
case 2: constTensor = cldnn::tensor(TensorValue(constDims[0]), TensorValue(constDims[1]), 1, 1);
break;
case 1: constTensor = cldnn::tensor(1, TensorValue(constDims[0]), 1, 1);
break;
case 0: constTensor = cldnn::tensor(1, 1, 1, 1);
break;
default: THROW_IE_EXCEPTION << "Invalid constant blob dimensions";
}
// WA to inconsistency between input and const 1d tensors
// For Concat along batch we go with batch interpretation
// For Gather input we go with batch interpretation
bool needsBatchInterpretation = false;
if (constDims.size() == 1) {
for (size_t i = 0; i < op->get_output_size(); i++) {
auto outTensors = op->get_output_target_inputs(i);
for (auto& t : outTensors) {
auto outOp = t.get_node();
if (auto castedOp = dynamic_cast<ngraph::op::v0::Concat*>(outOp)) {
if (castedOp->get_axis() == 0) {
needsBatchInterpretation = true;
break;
}
} else if (ngraph::op::is_binary_elementwise_arithmetic(outOp) ||
ngraph::op::is_binary_elementwise_logical(outOp) ||
ngraph::is_type<ngraph::op::v0::SquaredDifference>(outOp)) {
bool all_inputs_1d = true;
for (size_t j = 0; j < outOp->get_input_size(); j++) {
auto& in_shape = outOp->get_input_shape(j);
if (in_shape.size() != 1)
all_inputs_1d = false;
}
needsBatchInterpretation = all_inputs_1d;
break;
} else if (ngraph::is_type<ngraph::op::v1::Gather>(outOp) ||
ngraph::is_type<ngraph::op::v1::Split>(outOp) ||
ngraph::is_type<ngraph::op::v1::VariadicSplit>(outOp)) {
needsBatchInterpretation = true;
break;
}
}
}
}
if (needsBatchInterpretation) {
constTensor.batch[0] = constTensor.count();
constTensor.feature[0] = 1;
}
auto constFormat = DefaultFormatForDims(op->get_output_shape(0).size());
auto prop = getConstProperties(op);
if (prop.isWeights) {
// Deconvolution has reversed channels order (io instead of oi)
if (prop.reversedChannelsOrder) {
if (prop.hasGroupDimension) {
switch (op->get_output_shape(0).size()) {
case 5: constFormat = cldnn::format::gioyx; break;
case 6: constFormat = cldnn::format::giozyx; break;
}
} else {
switch (op->get_output_shape(0).size()) {
case 4: constFormat = cldnn::format::ioyx; break;
case 5: constFormat = cldnn::format::iozyx; break;
}
}
} else {
if (prop.hasGroupDimension) {
switch (op->get_output_shape(0).size()) {
case 5: constFormat = cldnn::format::goiyx; break;
case 6: constFormat = cldnn::format::goizyx; break;
}
} else {
switch (op->get_output_shape(0).size()) {
case 4: constFormat = cldnn::format::oiyx; break;
case 5: constFormat = cldnn::format::oizyx; break;
}
}
}
std::vector<cldnn::tensor::value_type> dims(constDims.begin(), constDims.end());
for (size_t i = dims.size(); i < 4; i++) {
dims.push_back(1);
}
constTensor = cldnn::tensor(constFormat, dims);
}
// If constDims has a dimension = 0, then create tensor with single value
// TODO: check if dim=0 is a valid case
if (std::accumulate(constDims.begin(), constDims.end(), 1, std::multiplies<size_t>()) == 0)
constTensor = cldnn::tensor{1};
cldnn::layout constLayout = cldnn::layout(DataTypeFromPrecision(op->get_output_element_type(0)),
constFormat,
constTensor);
cldnn::primitive_id initialconstPrimID = layer_type_name_ID(op);
cldnn::primitive_id constPrimID;
auto data = op->get_data_ptr<char>();
auto bufIter = p.blobMemCache.find(data);
if (bufIter != p.blobMemCache.end()) {
constPrimID = bufIter->second;
} else {
auto mem = cldnn::memory::allocate(p.GetEngine(), constLayout, 0, false);
auto tmpPointer = mem.pointer<char>(); // implicitly maps buffer - unmap in destructor
auto buf = tmpPointer.data();
auto bufSize = constLayout.bytes_count();
std::memcpy(&buf[0], &data[0], bufSize);
p.AddPrimitive(cldnn::data(initialconstPrimID, mem));
p.blobMemCache[data] = initialconstPrimID;
constPrimID = initialconstPrimID;
}
p.AddPrimitiveToProfiler(op, constPrimID);
}
REGISTER_FACTORY_IMPL(v0, Constant);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,44 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "ngraph/op/convert.hpp"
#include "ngraph/op/convert_like.hpp"
#include "api/reorder.hpp"
namespace CLDNNPlugin {
void CreateConvertLikeOp(Program& p, const std::shared_ptr<ngraph::op::v1::ConvertLike>& op) {
p.ValidateInputs(op, {2});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
auto outDataType = DataTypeFromPrecision(op->get_input_element_type(1));
auto reorderPrim = cldnn::reorder(layerName, inputPrimitives[0], cldnn::format::any, outDataType);
p.AddPrimitive(reorderPrim);
p.AddPrimitiveToProfiler(op);
}
void CreateConvertOp(Program& p, const std::shared_ptr<ngraph::op::v0::Convert>& op) {
p.ValidateInputs(op, {1});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
auto outDataType = DataTypeFromPrecision(op->get_destination_type());
auto reorderPrim = cldnn::reorder(layerName, inputPrimitives[0], cldnn::format::any, outDataType);
p.AddPrimitive(reorderPrim);
p.AddPrimitiveToProfiler(op);
}
REGISTER_FACTORY_IMPL(v0, Convert);
REGISTER_FACTORY_IMPL(v1, ConvertLike);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,326 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "ngraph/op/convolution.hpp"
#include "ngraph/op/binary_convolution.hpp"
#include "ngraph/op/deformable_convolution.hpp"
#include "ngraph/op/group_conv.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/op/fake_quantize.hpp"
#include "ngraph/op/util/op_types.hpp"
#include "api/convolution.hpp"
#include "api/deconvolution.hpp"
#include "api/binary_convolution.hpp"
#include "api/reshape.hpp"
#include "api/reorder.hpp"
namespace CLDNNPlugin {
struct ConvoltuionParameters {
cldnn::tensor stride;
cldnn::tensor padding;
cldnn::tensor dilation;
uint32_t groups;
};
static ConvoltuionParameters GetConvolutionParameters(const ngraph::CoordinateDiff& pads_begin,
const ngraph::Strides& dilations,
const ngraph::Strides& strides,
uint32_t groups) {
cldnn::tensor stride, padding, dilation;
if (pads_begin.size() != strides.size() || dilations.size() != strides.size())
THROW_IE_EXCEPTION << "Strides, Dilations and Pads are supposed to have the same elements count";
switch (strides.size()) {
case 3: {
stride = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(strides[2], strides[1], strides[0]));
padding = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), cldnn::spatial(-pads_begin[2], -pads_begin[1], -pads_begin[0]));
dilation = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(dilations[2], dilations[1], dilations[0]));
break;
}
case 2: {
stride = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(strides[1], strides[0], 1));
padding = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), cldnn::spatial(-pads_begin[1], -pads_begin[0], 0));
dilation = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(dilations[1], dilations[0], 1));
break;
}
case 1: {
stride = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(strides[0], 1, 1));
padding = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), cldnn::spatial(-pads_begin[0], 0, 0));
dilation = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(dilations[0], 1, 1));
break;
}
default: THROW_IE_EXCEPTION << "Unsupported convolve parameters size. Only 1d, 2d, and 3d cases are supported";
}
return {stride, padding, dilation, groups};
}
void CreateGroupConvolutionOp(Program& p, const std::shared_ptr<ngraph::op::v1::GroupConvolution>& op) {
p.ValidateInputs(op, {2});
auto inputs = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
uint32_t groups = op->get_input_shape(1)[0];
auto params = GetConvolutionParameters(op->get_pads_begin(), op->get_dilations(), op->get_strides(), groups);
auto outDims = op->get_output_shape(0);
auto outPrecision = op->get_output_element_type(0);
auto weightsName = inputs[1];
// WA: For the case with FakeQuantize op on weights that are not folderd by constant propagation pass for some reason.
// Dimensions order is GOIYZ, but
// the selected format is OIZYX by default.
if (std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(1)) == nullptr) {
std::string reshapeName = layerName + "_cldnn_weights_reshape";
std::string reorderName = layerName + "_cldnn_weights_reorder";
auto weights_shape = op->get_input_shape(1);
std::vector<size_t> new_weights_shape;
new_weights_shape.push_back(weights_shape[0] * weights_shape[1]); // Merged G and O dims
for (size_t i = 2; i < weights_shape.size(); i++) {
new_weights_shape.push_back(weights_shape[i]);
}
auto reshapePrim = cldnn::reshape(reshapeName,
weightsName,
CldnnTensorFromIEDims(new_weights_shape));
p.AddPrimitive(reshapePrim);
p.AddInnerPrimitiveToProfiler(reshapeName, layerName, op);
auto reorderPrim = cldnn::reorder(reorderName,
reshapeName,
DefaultFormatForDims(new_weights_shape.size()),
DataTypeFromPrecision(op->get_input_element_type(1)));
p.AddPrimitive(reorderPrim);
p.AddInnerPrimitiveToProfiler(reorderName, layerName, op);
weightsName = reorderName;
}
std::vector<cldnn::primitive_id> weights = {weightsName};
auto convPrim = cldnn::convolution(layerName,
inputs[0],
weights,
{},
params.groups,
params.stride,
params.padding,
params.dilation,
CldnnTensorFromIEDims(outDims),
DataTypeFromPrecision(outPrecision));
p.AddPrimitive(convPrim);
p.AddPrimitiveToProfiler(op);
}
void CreateConvolutionOp(Program& p, const std::shared_ptr<ngraph::op::v1::Convolution>& op) {
p.ValidateInputs(op, {2});
auto inputs = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
auto params = GetConvolutionParameters(op->get_pads_begin(), op->get_dilations(), op->get_strides(), 1);
auto outDims = op->get_output_shape(0);
auto outPrecision = op->get_output_element_type(0);
std::vector<cldnn::primitive_id> weights = {inputs[1]};
auto convPrim = cldnn::convolution(layerName,
inputs[0],
weights,
{},
params.groups,
params.stride,
params.padding,
params.dilation,
CldnnTensorFromIEDims(outDims),
DataTypeFromPrecision(outPrecision));
p.AddPrimitive(convPrim);
p.AddPrimitiveToProfiler(op);
}
void CreateConvolutionBackpropDataOp(Program& p, const std::shared_ptr<ngraph::op::v1::ConvolutionBackpropData>& op) {
// 3rd input is an optional output shape
p.ValidateInputs(op, {2, 3});
auto inputs = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
auto dilations = op->get_dilations();
for (auto d : dilations) {
if (d != 1) {
THROW_IE_EXCEPTION << "Unsupported dilation in ConvolutionBackpropData " << op->get_friendly_name();
}
}
auto weightsName = inputs[1];
// WA: For the case with FakeQuantize op on weights that are not folderd by constant propagation pass for some reason.
// Dimensions order of weights blob is IOYX, but
// the selected format is OIYX by default. So we need to swap I and O dimensions to match the format
if (IsNodeOnConstPath(op->get_input_node_shared_ptr(1))) {
std::string reshapeName = layerName + "_cldnn_weights_reshape";
auto weights_shape = op->get_input_shape(1);
std::swap(weights_shape[0], weights_shape[1]);
auto reshapePrim = cldnn::reshape(reshapeName,
weightsName,
CldnnTensorFromIEDims(weights_shape));
p.AddPrimitive(reshapePrim);
p.AddInnerPrimitiveToProfiler(reshapeName, layerName, op);
weightsName = reshapeName;
}
std::vector<cldnn::primitive_id> weights = {weightsName};
auto params = GetConvolutionParameters(op->get_pads_begin(), op->get_dilations(), op->get_strides(), 1);
auto deconvPrim = cldnn::deconvolution(layerName,
inputs[0],
weights,
{},
params.groups,
params.stride,
params.padding,
CldnnTensorFromIEDims(op->get_output_tensor(0).get_shape()));
p.AddPrimitive(deconvPrim);
p.AddPrimitiveToProfiler(op);
}
void CreateGroupConvolutionBackpropDataOp(Program& p, const std::shared_ptr<ngraph::op::v1::GroupConvolutionBackpropData>& op) {
p.ValidateInputs(op, {2});
auto inputs = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
auto dilations = op->get_dilations();
for (auto d : dilations) {
if (d != 1) {
THROW_IE_EXCEPTION << "Unsupported dilation in ConvolutionBackpropData " << op->get_friendly_name();
}
}
uint32_t groups = op->get_input_shape(1)[0];
auto params = GetConvolutionParameters(op->get_pads_begin(), op->get_dilations(), op->get_strides(), groups);
std::vector<cldnn::primitive_id> weights = {inputs[1]};
auto deconvPrim = cldnn::deconvolution(layerName,
inputs[0],
weights,
{},
params.groups,
params.stride,
params.padding,
CldnnTensorFromIEDims(op->get_output_tensor(0).get_shape()));
p.AddPrimitive(deconvPrim);
p.AddPrimitiveToProfiler(op);
}
void CreateDeformableConvolutionOp(Program& p, const std::shared_ptr<ngraph::op::v1::DeformableConvolution>& op) {
p.ValidateInputs(op, {3});
auto inputs = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
auto params = GetConvolutionParameters(op->get_pads_begin(), op->get_dilations(), op->get_strides(), op->get_group());
auto outDims = op->get_output_shape(0);
auto outPrecision = op->get_output_element_type(0);
std::vector<cldnn::primitive_id> weights = {inputs[2]};
if (params.groups > 1) {
auto convPrim = cldnn::convolution(layerName,
inputs[0],
inputs[1],
weights,
{},
params.groups,
op->get_deformable_group(),
params.stride,
params.padding,
params.dilation,
CldnnTensorFromIEDims(outDims));
p.AddPrimitive(convPrim);
p.AddPrimitiveToProfiler(op);
} else {
std::string defConvLayerNameInterp = layerName + "_interp";
std::string defConvLayerNameConv = layerName;
cldnn::tensor kernel;
auto weights_shape = op->get_input_shape(2);
size_t sidx = 2 + (params.groups > 1 ? 1 : 0);
if (weights_shape.size() == 3) {
kernel = cldnn::tensor(cldnn::batch(1),
cldnn::feature(1),
cldnn::spatial(weights_shape[sidx + 2],
weights_shape[sidx + 1],
weights_shape[sidx + 0]));
} else {
kernel = cldnn::tensor(cldnn::batch(1),
cldnn::feature(1),
cldnn::spatial(weights_shape[sidx + 1],
weights_shape[sidx + 0],
1));
}
auto defConvPrimInterp = cldnn::deformable_interp(defConvLayerNameInterp,
inputs[0],
inputs[1],
params.groups,
op->get_deformable_group(),
params.stride,
params.padding,
params.dilation,
CldnnTensorFromIEDims(outDims),
kernel);
p.AddPrimitive(defConvPrimInterp);
p.AddInnerPrimitiveToProfiler(defConvLayerNameInterp, defConvLayerNameConv, op);
auto defConvPrim = cldnn::deformable_conv(defConvLayerNameConv,
defConvLayerNameInterp,
weights,
{},
params.groups,
CldnnTensorFromIEDims(outDims));
p.AddPrimitive(defConvPrim);
p.AddPrimitiveToProfiler(defConvLayerNameConv, op);
}
}
void CreateBinaryConvolutionOp(Program& p, const std::shared_ptr<ngraph::op::v1::BinaryConvolution>& op) {
p.ValidateInputs(op, {2});
auto inputs = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
auto params = GetConvolutionParameters(op->get_pads_begin(), op->get_dilations(), op->get_strides(), 1);
auto outDims = op->get_output_shape(0);
auto outPrecision = op->get_output_element_type(0);
std::vector<cldnn::primitive_id> weights = {inputs[1]};
cldnn::data_types calc_precision = DataTypeFromPrecision(op->get_output_element_type(0));
auto convPrim = cldnn::binary_convolution(layerName,
inputs[0],
weights,
params.stride,
params.padding,
params.dilation,
CldnnTensorFromIEDims(outDims),
params.groups,
op->get_pad_value(),
calc_precision);
p.AddPrimitive(convPrim);
p.AddPrimitiveToProfiler(op);
}
REGISTER_FACTORY_IMPL(v1, GroupConvolution);
REGISTER_FACTORY_IMPL(v1, Convolution);
REGISTER_FACTORY_IMPL(v1, ConvolutionBackpropData);
REGISTER_FACTORY_IMPL(v1, GroupConvolutionBackpropData);
REGISTER_FACTORY_IMPL(v1, DeformableConvolution);
REGISTER_FACTORY_IMPL(v1, BinaryConvolution);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,32 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "ngraph/op/ctc_greedy_decoder.hpp"
#include "api/ctc_greedy_decoder.hpp"
namespace CLDNNPlugin {
void CreateCTCGreedyDecoderOp(Program& p, const std::shared_ptr<ngraph::op::v0::CTCGreedyDecoder>& op) {
p.ValidateInputs(op, {2});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
auto primitive = cldnn::ctc_greedy_decoder(layerName,
inputPrimitives[0],
inputPrimitives[1],
op->get_ctc_merge_repeated(),
DataTypeFromPrecision(op->get_output_element_type(0)),
CldnnTensorFromIEDims(op->get_output_shape(0)));
p.AddPrimitive(primitive);
p.AddPrimitiveToProfiler(op);
}
REGISTER_FACTORY_IMPL(v0, CTCGreedyDecoder);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,74 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "ngraph/op/cum_sum.hpp"
#include "ngraph/op/constant.hpp"
#include "api/cum_sum.hpp"
namespace CLDNNPlugin {
static inline cldnn::cum_sum::cum_sum_axis GetCumSumAxis(int32_t axis, uint32_t rank) {
if (axis < 0)
axis += rank;
if (axis < 0 || axis >= rank)
THROW_IE_EXCEPTION << "CumSum axis is not correspond to number of dimensions";
// Difference in dimension ordering between IE and clDNN,
// reverse spatial dimensions after batch and feature.
uint32_t cldnn_axis = axis;
if (axis >= 2) {
auto spatial_axis = axis - 2;
// Default and minimum number of dimensions is 4
auto spatial_size = std::max(rank, 4u) - 2;
cldnn_axis = spatial_size - spatial_axis - 1 + 2;
}
switch (cldnn_axis) {
case 0: return cldnn::cum_sum::cum_sum_axis::along_b;
case 1: return cldnn::cum_sum::cum_sum_axis::along_f;
case 2: return cldnn::cum_sum::cum_sum_axis::along_x;
case 3: return cldnn::cum_sum::cum_sum_axis::along_y;
case 4: return cldnn::cum_sum::cum_sum_axis::along_z;
case 5: return cldnn::cum_sum::cum_sum_axis::along_w;
default: THROW_IE_EXCEPTION << "Unsupported CumSum axis: " << axis;
}
return cldnn::cum_sum::cum_sum_axis::along_f; // shouldn't get here
}
void CreateCumSumOp(Program& p, const std::shared_ptr<ngraph::op::v0::CumSum>& op) {
p.ValidateInputs(op, {1, 2});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
auto exclusive = op->is_exclusive();
auto reverse = op->is_reverse();
size_t rank = op->get_input_shape(0).size();
int32_t axis = 0;
if (op->get_input_size() == 2) {
auto axes_constant = std::dynamic_pointer_cast<ngraph::op::Constant>(op->get_input_node_shared_ptr(1));
if (!axes_constant) {
THROW_IE_EXCEPTION << "Unsupported parameter nodes type in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
}
axis = axes_constant->cast_vector<int32_t>()[0];
}
auto primitive = cldnn::cum_sum(layerName,
inputPrimitives[0],
GetCumSumAxis(axis, rank),
exclusive,
reverse);
p.AddPrimitive(primitive);
p.AddPrimitiveToProfiler(op);
}
REGISTER_FACTORY_IMPL(v0, CumSum);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,251 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "simple_math.h"
#include "ngraph/attribute_visitor.hpp"
#include "ngraph/node.hpp"
#include "api/custom_gpu_primitive.hpp"
#include "api/reorder.hpp"
namespace CLDNNPlugin {
template<typename T>
static inline std::string vecToString(std::vector<T> vec) {
if (vec.empty())
return "";
std::string res = std::to_string(vec[0]);
for (size_t i = 1; i < vec.size(); i++) {
res += "," + std::to_string(vec[i]);
}
return res;
}
template<>
inline std::string vecToString<std::string>(std::vector<std::string> vec) {
if (vec.empty())
return "";
std::string res = vec[0];
for (size_t i = 1; i < vec.size(); i++) {
res += "," + vec[i];
}
return res;
}
class CustomLayerAttributeVisitor : public ngraph::AttributeVisitor {
public:
CustomLayerAttributeVisitor() : m_values({}) { }
void on_adapter(const std::string& name, ngraph::ValueAccessor<void>& adapter) override {
THROW_IE_EXCEPTION << "Attribute " << name << " can't be processed\n";
}
// The remaining adapter methods fall back on the void adapter if not implemented
void on_adapter(const std::string& name, ngraph::ValueAccessor<std::string>& adapter) override {
m_values[name] = adapter.get();
}
void on_adapter(const std::string& name, ngraph::ValueAccessor<bool>& adapter) override {
m_values[name] = std::to_string(adapter.get());
}
void on_adapter(const std::string& name, ngraph::ValueAccessor<int64_t>& adapter) override {
m_values[name] = std::to_string(adapter.get());
}
void on_adapter(const std::string& name, ngraph::ValueAccessor<double>& adapter) override {
m_values[name] = std::to_string(adapter.get());
}
void on_adapter(const std::string& name, ngraph::ValueAccessor<std::vector<std::string>>& adapter) override {
m_values[name] = vecToString(adapter.get());
}
void on_adapter(const std::string& name, ngraph::ValueAccessor<std::vector<float>>& adapter) override {
m_values[name] = vecToString(adapter.get());
}
void on_adapter(const std::string& name, ngraph::ValueAccessor<std::vector<double>>& adapter) override {
m_values[name] = vecToString(adapter.get());
}
void on_adapter(const std::string& name, ngraph::ValueAccessor<std::vector<int8_t>>& adapter) override {
m_values[name] = vecToString(adapter.get());
}
void on_adapter(const std::string& name, ngraph::ValueAccessor<std::vector<int16_t>>& adapter) override {
m_values[name] = vecToString(adapter.get());
}
void on_adapter(const std::string& name, ngraph::ValueAccessor<std::vector<int32_t>>& adapter) override {
m_values[name] = vecToString(adapter.get());
}
void on_adapter(const std::string& name, ngraph::ValueAccessor<std::vector<int64_t>>& adapter) override {
m_values[name] = vecToString(adapter.get());
}
void on_adapter(const std::string& name, ngraph::ValueAccessor<std::vector<uint8_t>>& adapter) override {
m_values[name] = vecToString(adapter.get());
}
void on_adapter(const std::string& name, ngraph::ValueAccessor<std::vector<uint16_t>>& adapter) override {
m_values[name] = vecToString(adapter.get());
}
void on_adapter(const std::string& name, ngraph::ValueAccessor<std::vector<uint32_t>>& adapter) override {
m_values[name] = vecToString(adapter.get());
}
void on_adapter(const std::string& name, ngraph::ValueAccessor<std::vector<uint64_t>>& adapter) override {
m_values[name] = vecToString(adapter.get());
}
std::map<std::string, std::string> get_parameters() const {
return m_values;
}
protected:
std::map<std::string, std::string> m_values;
};
void CreateCustomOp(Program& p, const std::shared_ptr<ngraph::Node>& op, CLDNNCustomLayerPtr customLayer) {
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
CustomLayerAttributeVisitor visitor;
op->visit_attributes(visitor);
auto params = visitor.get_parameters();
// Handle defines
std::string layerDefines;
for (const auto& def : customLayer->Defines()) {
std::string singleDefine("#define " + def.name + " " + def.prefix);
if (params.find(def.param) != params.end()) {
singleDefine += params.at(def.param);
} else {
singleDefine += def.default_value;
}
singleDefine += def.postfix + "\n";
layerDefines.append(singleDefine);
}
// reserve
std::vector<cldnn::primitive_id> reorderedInputs;
reorderedInputs.resize(inputPrimitives.size());
// Handle kernel parameters
std::vector<cldnn::custom_gpu_primitive::arg_desc> kernelParameters;
cldnn::format outputFormat(cldnn::format::any);
for (const auto& param : customLayer->KernelParams()) {
switch (param.type) {
case CLDNNCustomLayer::ParamType::Input: {
kernelParameters.resize(kernelParameters.size() > size_t(param.paramIndex + 1) ? kernelParameters.size() : size_t(param.paramIndex + 1));
kernelParameters[param.paramIndex].type = cldnn::custom_gpu_primitive::arg_input;
kernelParameters[param.paramIndex].index =
static_cast<cldnn::custom_gpu_primitive::arg_index>((param.portIndex >= inputPrimitives.size()) ? -1 : param.portIndex);
// Handle input reorder
if (param.portIndex < inputPrimitives.size() && reorderedInputs[param.portIndex].empty()) {
// todo: add support for multiple reorders of the same input? (read as bfyx for one arg and yxfb for another)
if (param.format != cldnn::format::any) {
auto reorderPrimName = inputPrimitives[param.portIndex] + "_" + op->get_friendly_name() + Program::m_preCustomLayerTag;
auto preprocessPrim = cldnn::reorder(
reorderPrimName,
inputPrimitives[param.portIndex],
param.format,
DataTypeFromPrecision(op->get_input_element_type(param.portIndex)));
p.AddPrimitive(preprocessPrim);
p.AddInnerPrimitiveToProfiler(reorderPrimName, layer_type_name_ID(op), op);
reorderedInputs[param.portIndex] = (reorderPrimName);
} else {
reorderedInputs[param.portIndex] = inputPrimitives[param.portIndex];
}
}
break;
}
case CLDNNCustomLayer::ParamType::Output: {
kernelParameters.resize(kernelParameters.size() > size_t(param.paramIndex + 1) ? kernelParameters.size() : size_t(param.paramIndex + 1));
kernelParameters[param.paramIndex].type = cldnn::custom_gpu_primitive::arg_output;
kernelParameters[param.paramIndex].index =
static_cast<cldnn::custom_gpu_primitive::arg_index>((param.portIndex >= inputPrimitives.size()) ? -1 : param.portIndex);
outputFormat = param.format;
break;
}
default:
THROW_IE_EXCEPTION << "Invalid custom layer param type: " << param.type << " in operation: " << op->get_friendly_name();
}
}
const std::string layerTitle("\n// Layer " + op->get_friendly_name() + " using Custom Layer " + customLayer->Name() + "\n");
const std::string defineTitle("// Custom Layer User Defines\n");
auto dims = op->get_output_shape(0);
size_t N = (dims.size() > 0) ? dims[0] : 1;
size_t C = (dims.size() > 1) ? dims[1] : 1;
size_t H = (dims.size() > 2) ? dims[2] : 1;
size_t W = (dims.size() > 3) ? dims[3] : 1;
cldnn::tensor outputTensor = cldnn::tensor(cldnn::batch(N), cldnn::feature(C), cldnn::spatial(W, H));
cldnn::layout outputLayout = cldnn::layout(DataTypeFromPrecision(op->get_output_element_type(0)), outputFormat, outputTensor);
// evaluate work sizes rules
std::vector<size_t> gws, lws;
// assume output tensor is dimension source by default
int batchDim = outputTensor.batch[0];
int featureDim = outputTensor.feature[0];
int yDim = outputTensor.spatial[1];
int xDim = outputTensor.spatial[0];
int iidx = customLayer->InputDimSourceIndex();
std::string genericLayerName = layer_type_name_ID(op);
// if input index is greater than -1, take dimension from input
if (iidx >= 0) {
if (iidx >= op->get_input_size())
THROW_IE_EXCEPTION << "Invalid input tensor for index: " << iidx;
auto inputDims = op->get_input_shape(iidx);
xDim = inputDims[inputDims.size() - 1];
yDim = dims.size() > 1 ? inputDims[inputDims.size() - 2] : 0;
featureDim = dims.size() > 2 ? inputDims[inputDims.size() - 3] : 0;
batchDim = dims.size() > 3 ? inputDims[inputDims.size() - 4]: 0;
}
const std::map<char, int> vars = {
{ 'b', batchDim } , { 'B', batchDim },
{ 'f', featureDim }, { 'F', featureDim },
{ 'y', yDim }, { 'Y', yDim },
{ 'x', xDim }, { 'X', xDim },
};
for (auto rule : customLayer->GlobalSizeRules()) {
SimpleMathExpression expr;
expr.SetVariables(vars);
expr.SetExpression(rule);
gws.push_back(expr.Evaluate());
}
for (auto rule : customLayer->LocalSizeRules()) {
SimpleMathExpression expr;
expr.SetVariables(vars);
expr.SetExpression(rule);
lws.push_back(expr.Evaluate());
}
auto customPrim = cldnn::custom_gpu_primitive(genericLayerName,
reorderedInputs,
{ layerTitle, defineTitle, layerDefines, customLayer->KernelSource() },
customLayer->KernelEntry(),
kernelParameters,
customLayer->CompilerOptions(),
outputLayout,
gws,
lws);
auto prevLayerName = genericLayerName;
if (outputLayout.format != cldnn::format::any) {
// Handle output reorder
auto reorderPrimName = genericLayerName + Program::m_postCustomLayerTag;
p.AddPrimitive(
cldnn::reorder(reorderPrimName,
genericLayerName,
DefaultFormatForDims(op->get_output_shape(0).size()),
customPrim.output_layout.data_type));
prevLayerName = reorderPrimName;
p.AddInnerPrimitiveToProfiler(reorderPrimName, layer_type_name_ID(op), op);
}
p.AddPrimitive(customPrim);
p.AddPrimitiveToProfiler(genericLayerName, op);
p.primitiveIDs[genericLayerName] = prevLayerName;
}
} // namespace CLDNNPlugin

View File

@ -0,0 +1,44 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "ngraph/op/depth_to_space.hpp"
#include "api/depth_to_space.hpp"
namespace CLDNNPlugin {
static cldnn::depth_to_space_mode GetDepthMode(ngraph::op::v0::DepthToSpace::DepthToSpaceMode mode) {
switch (mode) {
case ngraph::op::v0::DepthToSpace::DepthToSpaceMode::BLOCKS_FIRST:
return cldnn::depth_to_space_mode::blocks_first;
case ngraph::op::v0::DepthToSpace::DepthToSpaceMode::DEPTH_FIRST:
return cldnn::depth_to_space_mode::depth_first;
default: THROW_IE_EXCEPTION << "Unsupported DepthToSpaceMode value: " << static_cast<int>(mode);
}
return cldnn::depth_to_space_mode::blocks_first;
}
void CreateDepthToSpaceOp(Program& p, const std::shared_ptr<ngraph::op::v0::DepthToSpace>& op) {
p.ValidateInputs(op, {1});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
size_t blockSize = op->get_block_size();
cldnn::depth_to_space_mode mode = GetDepthMode(op->get_mode());
auto depthToSpacePrim = cldnn::depth_to_space(layerName,
inputPrimitives[0],
blockSize,
mode);
p.AddPrimitive(depthToSpacePrim);
p.AddPrimitiveToProfiler(op);
}
REGISTER_FACTORY_IMPL(v0, DepthToSpace);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,86 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "ngraph/op/detection_output.hpp"
#include "api/detection_output.hpp"
namespace CLDNNPlugin {
static cldnn::prior_box_code_type PriorBoxCodeFromString(const std::string& str) {
static const std::map<std::string, cldnn::prior_box_code_type> CodeNameToType = {
{ "caffe.PriorBoxParameter.CORNER" , cldnn::prior_box_code_type::corner },
{ "caffe.PriorBoxParameter.CENTER_SIZE" , cldnn::prior_box_code_type::center_size },
{ "caffe.PriorBoxParameter.CORNER_SIZE" , cldnn::prior_box_code_type::corner_size },
};
auto it = CodeNameToType.find(str);
if (it != CodeNameToType.end()) {
return it->second;
} else {
THROW_IE_EXCEPTION << "Unknown Prior-Box code type: " << str;
}
return cldnn::prior_box_code_type::corner;
}
void CreateDetectionOutputOp(Program& p, const std::shared_ptr<ngraph::op::v0::DetectionOutput>& op) {
p.ValidateInputs(op, {3});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
auto attrs = op->get_attrs();
uint32_t num_classes = attrs.num_classes;
bool share_location = attrs.share_location;
int background_label_id = attrs.background_label_id;
float nms_threshold = attrs.nms_threshold;
int top_k = attrs.top_k;
float confidence_threshold = attrs.confidence_threshold;
float eta = 1.0f;
int keep_top_k = attrs.keep_top_k[0];
bool variance_encoded_in_target = attrs.variance_encoded_in_target;
int input_width = attrs.input_width;
int input_height = attrs.input_height;
bool normalized = attrs.normalized;
std::string code_type = attrs.code_type;
bool clip_before_nms = attrs.clip_before_nms;
bool clip_after_nms = attrs.clip_after_nms;
bool decrease_label_id = attrs.decrease_label_id;
cldnn::prior_box_code_type cldnnCodeType = PriorBoxCodeFromString(code_type);
int32_t prior_info_size = normalized != 0 ? 4 : 5;
int32_t prior_coordinates_offset = normalized != 0 ? 0 : 1;
auto detectionPrim = cldnn::detection_output(layerName,
inputPrimitives[0],
inputPrimitives[1],
inputPrimitives[2],
num_classes,
keep_top_k,
share_location,
background_label_id,
nms_threshold,
top_k,
eta,
cldnnCodeType,
variance_encoded_in_target,
confidence_threshold,
prior_info_size,
prior_coordinates_offset,
normalized,
input_width,
input_height,
decrease_label_id,
clip_before_nms,
clip_after_nms);
p.AddPrimitive(detectionPrim);
p.AddPrimitiveToProfiler(op);
}
REGISTER_FACTORY_IMPL(v0, DetectionOutput);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,190 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "transformations/utils/utils.hpp"
#include "ngraph/op/add.hpp"
#include "ngraph/op/multiply.hpp"
#include "ngraph/op/maximum.hpp"
#include "ngraph/op/minimum.hpp"
#include "ngraph/op/subtract.hpp"
#include "ngraph/op/divide.hpp"
#include "ngraph/op/squared_difference.hpp"
#include "ngraph/op/equal.hpp"
#include "ngraph/op/not_equal.hpp"
#include "ngraph/op/less.hpp"
#include "ngraph/op/less_eq.hpp"
#include "ngraph/op/greater.hpp"
#include "ngraph/op/greater_eq.hpp"
#include "ngraph/op/and.hpp"
#include "ngraph/op/or.hpp"
#include "ngraph/op/xor.hpp"
#include "ngraph/op/power.hpp"
#include "ngraph/op/floor_mod.hpp"
#include "api/activation.hpp"
#include "api/eltwise.hpp"
#include "api/reorder.hpp"
#include "api/reshape.hpp"
namespace CLDNNPlugin {
void CreateElementwiseOp(Program& p, const std::shared_ptr<ngraph::Node>& op, cldnn::eltwise_mode mode) {
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
auto outRank = op->get_output_shape(0).size();
for (size_t i = 0; i < inputPrimitives.size(); ++i) {
auto inputShape = op->get_input_shape(i);
auto inputRank = inputShape.size();
if (inputRank != outRank) {
// Add reorder if changing number of dimensions requires changing format
auto targetFormat = DefaultFormatForDims(outRank);
if (targetFormat.value != DefaultFormatForDims(inputRank).value) {
auto reorderName = layerName + "_cldnn_in" + std::to_string(i) + "_reorder";
auto targetDatatype = DataTypeFromPrecision(op->get_input_element_type(i));
auto reorderPrim = cldnn::reorder(reorderName, inputPrimitives[i], targetFormat, targetDatatype);
p.AddPrimitive(reorderPrim);
p.AddInnerPrimitiveToProfiler(reorderName, layerName, op);
inputPrimitives[i] = reorderName;
}
auto reshapeName = layerName + "_cldnn_in" + std::to_string(i) + "_reshape";
// Extend input dimensions by prepending ones
inputShape.insert(inputShape.begin(), outRank - inputRank, 1ul);
auto targetShape = CldnnTensorFromIEDims(inputShape);
auto reshapePrim = cldnn::reshape(reshapeName, inputPrimitives[i], targetShape);
p.AddPrimitive(reshapePrim);
p.AddInnerPrimitiveToProfiler(reshapeName, layerName, op);
inputPrimitives[i] = reshapeName;
}
}
auto out_dt = DataTypeFromPrecision(op->get_output_element_type(0));
auto eltwisePrim = cldnn::eltwise(layerName,
inputPrimitives,
mode,
{},
out_dt);
p.AddPrimitive(eltwisePrim);
p.AddPrimitiveToProfiler(op);
}
void CreateAddOp(Program& p, const std::shared_ptr<ngraph::op::v1::Add>& op) {
CreateElementwiseOp(p, op, cldnn::eltwise_mode::sum);
}
void CreateMultiplyOp(Program& p, const std::shared_ptr<ngraph::op::v1::Multiply>& op) {
CreateElementwiseOp(p, op, cldnn::eltwise_mode::prod);
}
void CreateMaximumOp(Program& p, const std::shared_ptr<ngraph::op::v1::Maximum>& op) {
CreateElementwiseOp(p, op, cldnn::eltwise_mode::max);
}
void CreateMinimumOp(Program& p, const std::shared_ptr<ngraph::op::v1::Minimum>& op) {
CreateElementwiseOp(p, op, cldnn::eltwise_mode::min);
}
void CreateSubtractOp(Program& p, const std::shared_ptr<ngraph::op::v1::Subtract>& op) {
CreateElementwiseOp(p, op, cldnn::eltwise_mode::sub);
}
void CreateDivideOp(Program& p, const std::shared_ptr<ngraph::op::v1::Divide>& op) {
CreateElementwiseOp(p, op, cldnn::eltwise_mode::div);
}
void CreateSquaredDifferenceOp(Program& p, const std::shared_ptr<ngraph::op::v0::SquaredDifference>& op) {
CreateElementwiseOp(p, op, cldnn::eltwise_mode::squared_diff);
}
void CreateEqualOp(Program& p, const std::shared_ptr<ngraph::op::v1::Equal>& op) {
CreateElementwiseOp(p, op, cldnn::eltwise_mode::eq);
}
void CreateNotEqualOp(Program& p, const std::shared_ptr<ngraph::op::v1::NotEqual>& op) {
CreateElementwiseOp(p, op, cldnn::eltwise_mode::ne);
}
void CreateLessOp(Program& p, const std::shared_ptr<ngraph::op::v1::Less>& op) {
CreateElementwiseOp(p, op, cldnn::eltwise_mode::lt);
}
void CreateLessEqualOp(Program& p, const std::shared_ptr<ngraph::op::v1::LessEqual>& op) {
CreateElementwiseOp(p, op, cldnn::eltwise_mode::le);
}
void CreateGreaterOp(Program& p, const std::shared_ptr<ngraph::op::v1::Greater>& op) {
CreateElementwiseOp(p, op, cldnn::eltwise_mode::gt);
}
void CreateGreaterEqualOp(Program& p, const std::shared_ptr<ngraph::op::v1::GreaterEqual>& op) {
CreateElementwiseOp(p, op, cldnn::eltwise_mode::ge);
}
void CreateLogicalAndOp(Program& p, const std::shared_ptr<ngraph::op::v1::LogicalAnd>& op) {
CreateElementwiseOp(p, op, cldnn::eltwise_mode::logic_and);
}
void CreateLogicalOrOp(Program& p, const std::shared_ptr<ngraph::op::v1::LogicalOr>& op) {
CreateElementwiseOp(p, op, cldnn::eltwise_mode::logic_or);
}
void CreateLogicalXorOp(Program& p, const std::shared_ptr<ngraph::op::v1::LogicalXor>& op) {
CreateElementwiseOp(p, op, cldnn::eltwise_mode::logic_xor);
}
void CreatePowerOp(Program& p, const std::shared_ptr<ngraph::op::v1::Power>& op) {
p.ValidateInputs(op, {2});
auto power_node = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(1));
if (power_node) {
if (ngraph::shape_size(power_node->get_output_shape(0)) == 1) {
float pow;
if (!ngraph::op::util::get_single_value(power_node, pow))
THROW_IE_EXCEPTION << "Invalid parameter size in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
CreateUnaryEltwiseOp(p, op, cldnn::activation_func::pow, {pow});
return;
}
}
CreateElementwiseOp(p, op, cldnn::eltwise_mode::pow);
}
void CreateFloorModOp(Program& p, const std::shared_ptr<ngraph::op::v1::FloorMod>& op) {
CreateElementwiseOp(p, op, cldnn::eltwise_mode::floor_mod);
}
void CreateModOp(Program& p, const std::shared_ptr<ngraph::op::v1::Mod>& op) {
CreateElementwiseOp(p, op, cldnn::eltwise_mode::mod);
}
REGISTER_FACTORY_IMPL(v1, Add);
REGISTER_FACTORY_IMPL(v1, Multiply);
REGISTER_FACTORY_IMPL(v1, Maximum);
REGISTER_FACTORY_IMPL(v1, Minimum);
REGISTER_FACTORY_IMPL(v1, Subtract);
REGISTER_FACTORY_IMPL(v1, Divide);
REGISTER_FACTORY_IMPL(v0, SquaredDifference);
REGISTER_FACTORY_IMPL(v1, Equal);
REGISTER_FACTORY_IMPL(v1, NotEqual);
REGISTER_FACTORY_IMPL(v1, Less);
REGISTER_FACTORY_IMPL(v1, LessEqual);
REGISTER_FACTORY_IMPL(v1, Greater);
REGISTER_FACTORY_IMPL(v1, GreaterEqual);
REGISTER_FACTORY_IMPL(v1, LogicalAnd);
REGISTER_FACTORY_IMPL(v1, LogicalOr);
REGISTER_FACTORY_IMPL(v1, LogicalXor);
REGISTER_FACTORY_IMPL(v1, Power);
REGISTER_FACTORY_IMPL(v1, FloorMod);
REGISTER_FACTORY_IMPL(v1, Mod);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,166 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "ngraph/op/embedding_segments_sum.hpp"
#include "ngraph/op/embeddingbag_offsets_sum.hpp"
#include "ngraph/op/embeddingbag_packedsum.hpp"
#include "api/embedding_bag.hpp"
#include "api/reorder.hpp"
#include "transformations/utils/utils.hpp"
namespace CLDNNPlugin {
void CreateEmbeddingBagOffsetsSumOp(Program& p, const std::shared_ptr<ngraph::op::v3::EmbeddingBagOffsetsSum>& op) {
p.ValidateInputs(op, {3, 4, 5});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
int32_t defaultIndex = -1;
if (inputPrimitives.size() > 3) {
auto index_node = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(3));
if (!index_node) {
THROW_IE_EXCEPTION << "Unsupported parameter nodes type in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
}
float val;
if (ngraph::shape_size(index_node->get_output_shape(0)) != 1 || !ngraph::op::util::get_single_value(index_node, val))
THROW_IE_EXCEPTION << "Unsupported parameter size in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
defaultIndex = static_cast<int32_t>(val);
inputPrimitives.erase(inputPrimitives.begin() + 3); // Remove "default_index"
}
std::vector<cldnn::primitive_id> reorderedInputs;
reorderedInputs.resize(inputPrimitives.size());
for (size_t portIndex = 0; portIndex < inputPrimitives.size(); portIndex++) {
auto inputDataType = DataTypeFromPrecision(op->get_input_element_type(portIndex));
if (((portIndex == 1) || (portIndex == 2)) && (inputDataType == cldnn::data_types::i64)) {
// clDNN primitive supports only i32 data type for indices inputs,
// so we need additional reorders if they are provided as i64
auto reorderPrimName = inputPrimitives[portIndex] + "_" + op->get_friendly_name() + Program::m_preProcessTag;
auto targetFormat = DefaultFormatForDims(op->get_input_shape(portIndex).size());
auto preprocessPrim = cldnn::reorder(reorderPrimName,
inputPrimitives[portIndex],
targetFormat,
cldnn::data_types::i32);
p.AddPrimitive(preprocessPrim);
p.AddInnerPrimitiveToProfiler(reorderPrimName, layer_type_name_ID(op), op);
reorderedInputs[portIndex] = (reorderPrimName);
} else {
reorderedInputs[portIndex] = inputPrimitives[portIndex];
}
}
auto embeddingBagPrim = cldnn::embedding_bag(layerName,
reorderedInputs,
cldnn::embedding_bag::offsets_sum,
CldnnTensorFromIEDims(op->get_output_shape(0)),
defaultIndex);
p.AddPrimitive(embeddingBagPrim);
p.AddPrimitiveToProfiler(op);
}
void CreateEmbeddingBagPackedSumOp(Program& p, const std::shared_ptr<ngraph::op::v3::EmbeddingBagPackedSum>& op) {
p.ValidateInputs(op, {2, 3});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
std::vector<cldnn::primitive_id> reorderedInputs;
reorderedInputs.resize(inputPrimitives.size());
for (size_t portIndex = 0; portIndex < inputPrimitives.size(); portIndex++) {
auto inputDataType = DataTypeFromPrecision(op->get_input_element_type(portIndex));
if ((portIndex == 1) && (inputDataType == cldnn::data_types::i64)) {
// clDNN primitive supports only i32 data type for indices input,
// so we need additional reorder if it's provided as i64
auto reorderPrimName = inputPrimitives[portIndex] + "_" + op->get_friendly_name() + Program::m_preProcessTag;
auto targetFormat = DefaultFormatForDims(op->get_input_shape(portIndex).size());
auto preprocessPrim = cldnn::reorder(reorderPrimName,
inputPrimitives[portIndex],
targetFormat,
cldnn::data_types::i32);
p.AddPrimitive(preprocessPrim);
p.AddInnerPrimitiveToProfiler(reorderPrimName, layer_type_name_ID(op), op);
reorderedInputs[portIndex] = (reorderPrimName);
} else {
reorderedInputs[portIndex] = inputPrimitives[portIndex];
}
}
auto embeddingBagPrim = cldnn::embedding_bag(layerName,
reorderedInputs,
cldnn::embedding_bag::packed_sum,
CldnnTensorFromIEDims(op->get_output_shape(0)));
p.AddPrimitive(embeddingBagPrim);
p.AddPrimitiveToProfiler(op);
}
void CreateEmbeddingSegmentsSumOp(Program& p, const std::shared_ptr<ngraph::op::v3::EmbeddingSegmentsSum>& op) {
p.ValidateInputs(op, {4, 5, 6});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
inputPrimitives.erase(inputPrimitives.begin() + 3); // Remove "num_segments"
int32_t defaultIndex = -1;
// port of default_index is 4 by default, but we removed "num_segments" above, so now it's equal to 3
if (inputPrimitives.size() > 3) {
auto index_node = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(4));
if (!index_node) {
THROW_IE_EXCEPTION << "Unsupported parameter nodes type in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
}
float val;
if (ngraph::shape_size(index_node->get_output_shape(0)) != 1 || !ngraph::op::util::get_single_value(index_node, val))
THROW_IE_EXCEPTION << "Unsupported parameter size in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
defaultIndex = static_cast<int32_t>(val);
inputPrimitives.erase(inputPrimitives.begin() + 3); // Remove "default_index"
}
std::vector<cldnn::primitive_id> reorderedInputs;
reorderedInputs.resize(inputPrimitives.size());
for (size_t portIndex = 0; portIndex < inputPrimitives.size(); portIndex++) {
auto inputDataType = DataTypeFromPrecision(op->get_input_element_type(portIndex));
if (((portIndex == 1) || (portIndex == 2)) && (inputDataType == cldnn::data_types::i64)) {
// clDNN primitive supports only i32 data type for indices inputs,
// so we need additional reorders if they are provided as i64
auto reorderPrimName = inputPrimitives[portIndex] + "_" + op->get_friendly_name() + Program::m_preProcessTag;
auto targetFormat = DefaultFormatForDims(op->get_input_shape(portIndex).size());
auto preprocessPrim = cldnn::reorder(reorderPrimName,
inputPrimitives[portIndex],
targetFormat,
cldnn::data_types::i32);
p.AddPrimitive(preprocessPrim);
p.AddInnerPrimitiveToProfiler(reorderPrimName, layer_type_name_ID(op), op);
reorderedInputs[portIndex] = (reorderPrimName);
} else {
reorderedInputs[portIndex] = inputPrimitives[portIndex];
}
}
auto embeddingBagPrim = cldnn::embedding_bag(layerName,
reorderedInputs,
cldnn::embedding_bag::segments_sum,
CldnnTensorFromIEDims(op->get_output_shape(0)),
defaultIndex);
p.AddPrimitive(embeddingBagPrim);
p.AddPrimitiveToProfiler(op);
}
REGISTER_FACTORY_IMPL(v3, EmbeddingBagOffsetsSum);
REGISTER_FACTORY_IMPL(v3, EmbeddingBagPackedSum);
REGISTER_FACTORY_IMPL(v3, EmbeddingSegmentsSum);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,49 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "ngraph/op/extractimagepatches.hpp"
#include "api/extract_image_patches.hpp"
namespace CLDNNPlugin {
static inline std::string PadToString(ngraph::op::PadType pad) {
switch (pad) {
case ngraph::op::PadType::SAME_UPPER: return "same_upper";
case ngraph::op::PadType::SAME_LOWER: return "same_lower";
case ngraph::op::PadType::VALID: return "valid";
default: THROW_IE_EXCEPTION << "Unsupported pad type in ExtractImagePatches primitive " << pad;
}
return "";
}
void CreateExtractImagePatchesOp(Program& p, const std::shared_ptr<ngraph::op::v3::ExtractImagePatches>& op) {
p.ValidateInputs(op, {1});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
std::vector<uint32_t> sizes = std::vector<uint32_t>(op->get_sizes().begin(), op->get_sizes().end());
std::vector<uint32_t> strides = std::vector<uint32_t>(op->get_strides().begin(), op->get_strides().end());
std::vector<uint32_t> rates = std::vector<uint32_t>(op->get_rates().begin(), op->get_rates().end());
std::string auto_pad = PadToString(op->get_auto_pad());
auto extractImagePatchesPrim = cldnn::extract_image_patches(layerName,
inputPrimitives[0],
sizes,
strides,
rates,
auto_pad,
CldnnTensorFromIEDims(op->get_output_shape(0)));
p.AddPrimitive(extractImagePatchesPrim);
p.AddPrimitiveToProfiler(op);
}
REGISTER_FACTORY_IMPL(v3, ExtractImagePatches);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,42 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "ngraph/op/fake_quantize.hpp"
#include "api/quantize.hpp"
namespace CLDNNPlugin {
void CreateFakeQuantizeOp(Program& p, const std::shared_ptr<ngraph::op::v0::FakeQuantize>& op) {
p.ValidateInputs(op, {5});
std::string layerName = layer_type_name_ID(op);
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
auto input_id = inputPrimitives[0];
auto input_low_id = inputPrimitives[1];
auto input_high_id = inputPrimitives[2];
auto output_low_id = inputPrimitives[3];
auto output_high_id = inputPrimitives[4];
int levels = static_cast<int>(op->get_levels());
auto dt = DataTypeFromPrecision(op->get_output_element_type(0));
auto quantizationPrim = cldnn::quantize(layerName,
input_id,
input_low_id,
input_high_id,
output_low_id,
output_high_id,
levels,
dt);
p.AddPrimitive(quantizationPrim);
p.AddPrimitiveToProfiler(op);
}
REGISTER_FACTORY_IMPL(v0, FakeQuantize);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,54 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "ngraph/op/gather_tree.hpp"
#include "api/gather_tree.hpp"
#include "api/reorder.hpp"
namespace CLDNNPlugin {
void CreateGatherTreeOp(Program& p, const std::shared_ptr<ngraph::op::v1::GatherTree>& op) {
p.ValidateInputs(op, {4});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
std::vector<cldnn::primitive_id> reorderedInputs;
reorderedInputs.resize(inputPrimitives.size());
for (size_t portIndex = 0; portIndex < inputPrimitives.size(); portIndex++) {
auto inputDataType = DataTypeFromPrecision(op->get_input_element_type(portIndex));
if (inputDataType == cldnn::data_types::i64) {
// clDNN primitive does not support i64 inputs,
// so we need additional reorders to convert them to i32
auto reorderPrimName = inputPrimitives[portIndex] + "_" + op->get_friendly_name() + Program::m_preProcessTag;
auto targetFormat = DefaultFormatForDims(op->get_input_shape(portIndex).size());
auto preprocessPrim = cldnn::reorder(reorderPrimName,
inputPrimitives[portIndex],
targetFormat,
cldnn::data_types::i32);
p.AddPrimitive(preprocessPrim);
p.AddInnerPrimitiveToProfiler(reorderPrimName, layerName, op);
reorderedInputs[portIndex] = reorderPrimName;
} else {
reorderedInputs[portIndex] = inputPrimitives[portIndex];
}
}
auto gatherTreePrim = cldnn::gather_tree(layerName,
reorderedInputs[0],
reorderedInputs[1],
reorderedInputs[2],
reorderedInputs[3]);
p.AddPrimitive(gatherTreePrim);
p.AddPrimitiveToProfiler(op);
}
REGISTER_FACTORY_IMPL(v1, GatherTree);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,103 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "ngraph/op/gather.hpp"
#include "api/gather.hpp"
#include "api/reorder.hpp"
namespace CLDNNPlugin {
static cldnn::gather::gather_axis GetGatherAxis(int32_t axis, cldnn::format inputFormat) {
if (axis == 0) {
return cldnn::gather::gather_axis::along_b;
} else if (axis == 1) {
return cldnn::gather::gather_axis::along_f;
}
if (inputFormat == cldnn::format::bfyx) {
switch (axis) {
case 2: return cldnn::gather::gather_axis::along_y;
case 3: return cldnn::gather::gather_axis::along_x;
case -1: return cldnn::gather::gather_axis::along_y;
case -2: return cldnn::gather::gather_axis::along_f;
case -3: return cldnn::gather::gather_axis::along_b;
default: THROW_IE_EXCEPTION << "Unsupported gather axis: " << axis;
}
} else if (inputFormat == cldnn::format::bfzyx) {
switch (axis) {
case 2: return cldnn::gather::gather_axis::along_z;
case 3: return cldnn::gather::gather_axis::along_y;
case 4: return cldnn::gather::gather_axis::along_x;
case -1: return cldnn::gather::gather_axis::along_y;
case -2: return cldnn::gather::gather_axis::along_z;
case -3: return cldnn::gather::gather_axis::along_f;
case -4: return cldnn::gather::gather_axis::along_b;
default: THROW_IE_EXCEPTION << "Unsupported gather axis: " << axis;
}
} else if (inputFormat == cldnn::format::bfwzyx) {
switch (axis) {
case 2: return cldnn::gather::gather_axis::along_w;
case 3: return cldnn::gather::gather_axis::along_z;
case 4: return cldnn::gather::gather_axis::along_y;
case 5: return cldnn::gather::gather_axis::along_x;
case -1: return cldnn::gather::gather_axis::along_y;
case -2: return cldnn::gather::gather_axis::along_z;
case -3: return cldnn::gather::gather_axis::along_w;
case -4: return cldnn::gather::gather_axis::along_f;
case -5: return cldnn::gather::gather_axis::along_b;
default: THROW_IE_EXCEPTION << "Unsupported gather axis: " << axis;
}
} else {
THROW_IE_EXCEPTION << "Unsupported gather axis: " << axis;
}
}
void CreateGatherOp(Program& p, const std::shared_ptr<ngraph::op::v1::Gather>& op) {
p.ValidateInputs(op, {2, 3});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
int32_t axis = static_cast<int32_t>(op->get_axis());
std::vector<cldnn::primitive_id> reorderedInputs;
reorderedInputs.resize(inputPrimitives.size());
for (size_t portIndex = 0; portIndex < inputPrimitives.size(); portIndex++) {
auto inputDataType = DataTypeFromPrecision(op->get_input_element_type(portIndex));
if (inputDataType == cldnn::data_types::i64) {
// clDNN primitive does not support i64 inputs,
// so we need additional reorders to convert them to i32
auto reorderPrimName = inputPrimitives[portIndex] + "_" + op->get_friendly_name() + Program::m_preProcessTag;
auto targetFormat = DefaultFormatForDims(op->get_input_shape(portIndex).size());
auto preprocessPrim = cldnn::reorder(reorderPrimName,
inputPrimitives[portIndex],
targetFormat,
cldnn::data_types::i32);
p.AddPrimitive(preprocessPrim);
p.AddInnerPrimitiveToProfiler(reorderPrimName, layerName, op);
reorderedInputs[portIndex] = reorderPrimName;
} else {
reorderedInputs[portIndex] = inputPrimitives[portIndex];
}
}
auto outLayout = DefaultFormatForDims(op->get_output_shape(0).size());
auto gatherPrim = cldnn::gather(layerName,
reorderedInputs[0],
reorderedInputs[1],
GetGatherAxis(axis, DefaultFormatForDims(op->get_input_shape(0).size())),
outLayout,
CldnnTensorFromIEDims(op->get_output_shape(0)));
p.AddPrimitive(gatherPrim);
p.AddPrimitiveToProfiler(op);
}
REGISTER_FACTORY_IMPL(v1, Gather);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,30 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "ngraph/op/grn.hpp"
#include "api/grn.hpp"
namespace CLDNNPlugin {
void CreateGRNOp(Program& p, const std::shared_ptr<ngraph::op::v0::GRN>& op) {
p.ValidateInputs(op, {1});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
auto primitive = cldnn::grn(layerName,
inputPrimitives[0],
op->get_bias(),
DataTypeFromPrecision(op->get_output_element_type(0)));
p.AddPrimitive(primitive);
p.AddPrimitiveToProfiler(op);
}
REGISTER_FACTORY_IMPL(v0, GRN);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,203 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "caseless.hpp"
#include "ngraph/op/interpolate.hpp"
#include "ngraph/op/constant.hpp"
#include "api/resample.hpp"
namespace CLDNNPlugin {
static cldnn::coordinate_transformation_mode GetCoordinateTransformationMode(ngraph::op::v4::Interpolate::CoordinateTransformMode mode) {
switch (mode) {
case ngraph::op::v4::Interpolate::CoordinateTransformMode::half_pixel:
return cldnn::coordinate_transformation_mode::half_pixel;
case ngraph::op::v4::Interpolate::CoordinateTransformMode::pytorch_half_pixel:
return cldnn::coordinate_transformation_mode::pytorch_half_pixel;
case ngraph::op::v4::Interpolate::CoordinateTransformMode::asymmetric:
return cldnn::coordinate_transformation_mode::asymmetric;
case ngraph::op::v4::Interpolate::CoordinateTransformMode::tf_half_pixel_for_nn:
return cldnn::coordinate_transformation_mode::tf_half_pixel_for_nn;
case ngraph::op::v4::Interpolate::CoordinateTransformMode::align_corners:
return cldnn::coordinate_transformation_mode::align_corners;
}
THROW_IE_EXCEPTION << "Unknown coordinate transformation mode: " << static_cast<int>(mode);
}
static cldnn::nearest_mode GetNearestMode(ngraph::op::v4::Interpolate::NearestMode mode) {
switch (mode) {
case ngraph::op::v4::Interpolate::NearestMode::round_prefer_floor:
return cldnn::nearest_mode::round_prefer_floor;
case ngraph::op::v4::Interpolate::NearestMode::round_prefer_ceil:
return cldnn::nearest_mode::round_prefer_ceil;
case ngraph::op::v4::Interpolate::NearestMode::floor:
return cldnn::nearest_mode::floor;
case ngraph::op::v4::Interpolate::NearestMode::ceil:
return cldnn::nearest_mode::ceil;
case ngraph::op::v4::Interpolate::NearestMode::simple:
return cldnn::nearest_mode::simple;
}
THROW_IE_EXCEPTION << "Unknown nearest mode: " << static_cast<int>(mode);
}
static cldnn::shape_calculation_mode GetShapeCalculationMode(ngraph::op::v4::Interpolate::ShapeCalcMode mode) {
switch (mode) {
case ngraph::op::v4::Interpolate::ShapeCalcMode::sizes: return cldnn::shape_calculation_mode::sizes;
case ngraph::op::v4::Interpolate::ShapeCalcMode::scales: return cldnn::shape_calculation_mode::scales;
}
THROW_IE_EXCEPTION << "Unknown shape calculation mode: " << static_cast<int>(mode);
}
static cldnn::resample_type GetResampleType(ngraph::op::v4::Interpolate::InterpolateMode mode) {
switch (mode) {
case ngraph::op::v4::Interpolate::InterpolateMode::nearest: return cldnn::resample_type::nearest;
case ngraph::op::v4::Interpolate::InterpolateMode::linear: return cldnn::resample_type::caffe_bilinear;
case ngraph::op::v4::Interpolate::InterpolateMode::linear_onnx: return cldnn::resample_type::linear_onnx;
case ngraph::op::v4::Interpolate::InterpolateMode::cubic: return cldnn::resample_type::cubic;
}
THROW_IE_EXCEPTION << "Unknown interpolation mode: " << static_cast<int>(mode);
}
static cldnn::resample::resample_axis GetInterpolationAxis(int32_t axis, uint32_t sz) {
if (axis < 0)
axis += sz;
if (axis < 0 || axis >= sz)
THROW_IE_EXCEPTION << "Interpolate axis is not correspond to number of dimensions";
// Difference in dimension ordering between IE and clDNN,
// reverse spatial dimensions after batch and feature.
uint32_t cldnn_axis = axis;
if (axis >= 2) {
auto spatial_axis = axis - 2;
// Default and minimum number of dimensions is 4
auto spatial_size = std::max(sz, 4u) - 2;
cldnn_axis = spatial_size - spatial_axis - 1 + 2;
}
switch (cldnn_axis) {
case 0:
return cldnn::resample::resample_axis::along_b;
case 1:
return cldnn::resample::resample_axis::along_f;
case 2:
return cldnn::resample::resample_axis::along_x;
case 3:
return cldnn::resample::resample_axis::along_y;
case 4:
return cldnn::resample::resample_axis::along_z;
case 5:
return cldnn::resample::resample_axis::along_w;
default:
break;
}
THROW_IE_EXCEPTION << "Unsupported Interpolate axis: " << axis;
}
void CreateInterpolateOp(Program& p, const std::shared_ptr<ngraph::op::v4::Interpolate>& op) {
p.ValidateInputs(op, {3, 4});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
static const size_t SCALES_INDEX = 2;
static const size_t AXES_INDEX = 3;
auto attrs = op->get_attrs();
auto inputRank = op->get_input_shape(0).size();
auto outDims = op->get_output_shape(0).size();
auto outTensor = CldnnTensorFromIEDims(op->get_output_shape(0));
std::vector<int> pad_begin(attrs.pads_begin.begin(), attrs.pads_begin.end());
std::vector<int> pad_end(attrs.pads_end.begin(), attrs.pads_end.end());
for (size_t i = pad_begin.size(); i < outDims || i < 4; ++i)
pad_begin.push_back(0);
for (size_t i = pad_end.size(); i < outDims || i < 4; ++i)
pad_end.push_back(0);
int antialias = attrs.antialias;
float cube_coeff = attrs.cube_coeff;
auto cldnnSampleType = GetResampleType(attrs.mode);
auto shapeCalcMode = GetShapeCalculationMode(attrs.shape_calculation_mode);
auto coordTransMode = GetCoordinateTransformationMode(attrs.coordinate_transformation_mode);
auto nearestMode = GetNearestMode(attrs.nearest_mode);
auto scales_constant = std::dynamic_pointer_cast<ngraph::op::Constant>(op->get_input_node_shared_ptr(SCALES_INDEX));
if (!scales_constant) {
THROW_IE_EXCEPTION << "Unsupported parameter node type in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
}
std::vector<float> scales = scales_constant->cast_vector<float>();
std::vector<cldnn::resample::resample_axis> axes;
if (op->get_input_size() == 4) {
auto axes_constant = std::dynamic_pointer_cast<ngraph::op::Constant>(op->get_input_node_shared_ptr(AXES_INDEX));
if (!axes_constant) {
THROW_IE_EXCEPTION << "Unsupported parameter node type in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
}
auto ie_axes = axes_constant->cast_vector<int32_t>();
for (auto axis : ie_axes) {
axes.push_back(GetInterpolationAxis(axis, inputRank));
}
} else {
for (int i = 0; i < inputRank; ++i) {
axes.push_back(GetInterpolationAxis(i, inputRank));
}
}
if (axes.size() != scales.size())
THROW_IE_EXCEPTION << op->get_friendly_name() << " Incorrect axes and scales should be the same size";
cldnn::resample::AxesAndScales axesAndScales;
for (size_t i = 0; i < axes.size(); ++i) {
axesAndScales[axes[i]] = scales[i];
}
if (cldnnSampleType == cldnn::resample_type::linear_onnx) {
if (inputRank != 2 && inputRank != 4)
THROW_IE_EXCEPTION << "mode 'linear_onnx' supports only 2D or 4D tensors";
if (axes.size() != 2 && inputRank != axes.size())
THROW_IE_EXCEPTION << "mode 'linear_onnx' supports only axes with size 2 or equal to input rank";
bool correctAxes =
((axes[0] == cldnn::resample::resample_axis::along_b) &&
(axes[1] == cldnn::resample::resample_axis::along_f)) ||
((axes[0] == cldnn::resample::resample_axis::along_y) &&
(axes[1] == cldnn::resample::resample_axis::along_x));
if (axes.size() == 4 && inputRank == 4) {
correctAxes = axes[0] == cldnn::resample::resample_axis::along_b &&
axes[1] == cldnn::resample::resample_axis::along_f &&
axes[2] == cldnn::resample::resample_axis::along_y &&
axes[3] == cldnn::resample::resample_axis::along_x;
}
if (!correctAxes)
THROW_IE_EXCEPTION <<
"mode 'linear_onnx' supports only case when axes = {2, 3} or "
"axes = {0, 1} or axes = {0, 1, 2, 3}";
}
auto resamplePrim = cldnn::resample(layerName,
inputPrimitives[0],
outTensor,
axesAndScales,
pad_begin,
pad_end,
antialias,
cube_coeff,
cldnnSampleType,
shapeCalcMode,
coordTransMode,
nearestMode);
p.AddPrimitive(resamplePrim);
p.AddPrimitiveToProfiler(op);
}
REGISTER_FACTORY_IMPL(v4, Interpolate);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,49 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "ngraph/op/lrn.hpp"
#include "ngraph/op/constant.hpp"
#include "api/lrn.hpp"
namespace CLDNNPlugin {
static cldnn::lrn_norm_region GetNormRegion(std::vector<int64_t> axis_value) {
if (axis_value.size() == 1 && axis_value[0] == 1) {
return cldnn::lrn_norm_region_across_channel;
} else {
return cldnn::lrn_norm_region_within_channel;
}
}
void CreateLRNOp(Program& p, const std::shared_ptr<ngraph::op::v0::LRN>& op) {
p.ValidateInputs(op, {2});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
auto axis_const = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(1));
if (!axis_const) {
THROW_IE_EXCEPTION << "Unsupported axes node type in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
}
auto axis_value = axis_const->cast_vector<int64_t>();
auto localSize = op->get_nsize();
auto lrnPrim = cldnn::lrn(layerName,
inputPrimitives[0],
localSize,
static_cast<float>(op->get_bias()),
static_cast<float>(op->get_alpha()),
static_cast<float>(op->get_beta()),
GetNormRegion(axis_value));
p.AddPrimitive(lrnPrim);
p.AddPrimitiveToProfiler(op);
}
REGISTER_FACTORY_IMPL(v0, LRN);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,248 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "ngraph/op/matmul.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/op/fake_quantize.hpp"
#include "api/gemm.hpp"
#include "api/fully_connected.hpp"
#include "api/reshape.hpp"
#include "api/reorder.hpp"
#include "api/permute.hpp"
namespace CLDNNPlugin {
/*
* get_aligned_shapes function align two input shapes to have the same size and
* the same batch dimensions (last two dimensions are not comparable).
* It also checks that dimensions are compatible so in case with two shapes
* for example: [2, 32, 64] [3, 64, 64] it will raise an exception.
*/
static std::pair<ngraph::Shape, ngraph::Shape> get_aligned_shapes(const ngraph::Shape& shape_a,
const ngraph::Shape& shape_b,
const std::shared_ptr<ngraph::op::v0::MatMul>& matmul) {
ngraph::Shape shape_a_aligned(shape_a), shape_b_aligned(shape_b);
size_t max_size = std::max(shape_a_aligned.size(), shape_b_aligned.size());
for (size_t i = 0, cnt = max_size - shape_a_aligned.size(); i < cnt; ++i)
shape_a_aligned.insert(shape_a_aligned.begin(), 1);
for (size_t i = 0, cnt = max_size - shape_b_aligned.size(); i < cnt; ++i)
shape_b_aligned.insert(shape_b_aligned.begin(), 1);
if (matmul->get_transpose_a()) {
std::swap(*(shape_a_aligned.end() - 1), *(shape_a_aligned.end() - 2));
}
if (matmul->get_transpose_b()) {
std::swap(*(shape_b_aligned.end() - 1), *(shape_b_aligned.end() - 2));
}
for (size_t i = 0; i < max_size - 2; ++i) {
if (shape_a_aligned[i] != shape_b_aligned[i] && shape_a_aligned[i] > 1 && shape_b_aligned[i] > 1) {
THROW_IE_EXCEPTION << "Shapes can't be aligned: " << shape_a_aligned << " " << shape_b_aligned;
}
size_t max_value = std::max(shape_a_aligned[i], shape_b_aligned[i]);
shape_a_aligned[i] = shape_b_aligned[i] = max_value;
}
return {shape_a_aligned, shape_b_aligned};
}
void CreateMatMulOp(Program& p, const std::shared_ptr<ngraph::op::v0::MatMul>& op) {
p.ValidateInputs(op, {2});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
auto shape_a = op->get_input_shape(0);
auto shape_b = op->get_input_shape(1);
bool is_fc = ngraph::is_type<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(1)) ||
ngraph::is_type<ngraph::op::v0::FakeQuantize>(op->get_input_node_shared_ptr(1));
is_fc &= std::count_if(shape_b.begin(), shape_b.end(), [](size_t x) { return x != 1; }) <= 2;
if (is_fc) {
ngraph::Shape shape_a_aligned, shape_b_aligned;
std::tie(shape_a_aligned, shape_b_aligned) = get_aligned_shapes(shape_a, shape_b, op);
if (shape_a_aligned.size() < 2 || shape_b_aligned.size() < 2) {
THROW_IE_EXCEPTION << "MatMul " << op->get_friendly_name() << " shapes are inconsistent.";
}
size_t K = *(shape_a_aligned.end() - 1);
size_t O = *(shape_b_aligned.end() - 1);
auto inputName = inputPrimitives[0];
auto weightsName = inputPrimitives[1];
// Weights normalization
if (!op->get_transpose_b()) {
ngraph::Shape output_shape = shape_b;
std::vector<uint16_t> transpose_order(output_shape.size());
std::iota(transpose_order.begin(), transpose_order.end(), 0);
std::swap(*(transpose_order.end() - 1), *(transpose_order.end() - 2));
for (auto o = transpose_order.size(); o < 4; o++)
transpose_order.push_back((uint16_t)o);
auto permuteName = op->get_friendly_name() + "/transpose_b";
auto permutePrim = cldnn::permute(permuteName,
weightsName,
transpose_order);
p.AddPrimitive(permutePrim);
p.AddInnerPrimitiveToProfiler(permuteName, layerName, op);
weightsName = permuteName;
}
// Input normalization
if (op->get_transpose_a()) {
ngraph::Shape output_shape = shape_a;
std::vector<uint16_t> transpose_order(output_shape.size());
std::iota(transpose_order.begin(), transpose_order.end(), 0);
std::swap(*(transpose_order.end() - 1), *(transpose_order.end() - 2));
for (auto o = transpose_order.size(); o < 4; o++)
transpose_order.push_back((uint16_t)o);
auto permuteName = op->get_friendly_name() + "/transpose_a";
auto permutePrim = cldnn::permute(permuteName,
inputName,
transpose_order);
p.AddPrimitive(permutePrim);
p.AddInnerPrimitiveToProfiler(permuteName, layerName, op);
inputName = permuteName;
}
bool reshape_fc = shape_a_aligned.size() > 3;
auto reshape_to_2d = [&](const ngraph::Shape& shape, std::string inputName, size_t features, std::string suffix) -> std::string {
auto total = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<size_t>());
std::vector<size_t> reshapeSize = { total / features, features };
if (total != reshapeSize[0] * reshapeSize[1])
THROW_IE_EXCEPTION << "Inconsistent reshape in Matmul op: " << op->get_friendly_name();
auto reshapeInName = op->get_friendly_name() + suffix;
auto reshapeInPrim = cldnn::reshape(reshapeInName, inputName, CldnnTensorFromIEDims(reshapeSize));
p.AddPrimitive(reshapeInPrim);
p.AddInnerPrimitiveToProfiler(reshapeInName, layerName, op);
return reshapeInName;
};
if (reshape_fc) {
inputName = reshape_to_2d(shape_a, inputName, shape_a.back(), "_cldnn_reshape_in");
weightsName = reshape_to_2d(shape_b, weightsName, K, "_cldnn_reshape_weights");
}
auto fcPrim = cldnn::fully_connected(layerName,
inputName,
weightsName,
"",
DataTypeFromPrecision(op->get_output_element_type(0)),
cldnn::padding(),
op->get_output_shape(0).size());
p.AddPrimitive(fcPrim);
auto lastLayerName = layerName;
if (reshape_fc) {
auto outputShape = CldnnTensorFromIEDims(op->get_output_shape(0));
auto outReshapeName = layerName + "_cldnn_out_reshape";
auto outReshapePrim = cldnn::reshape(outReshapeName, layerName, outputShape);
p.AddPrimitive(outReshapePrim);
p.AddInnerPrimitiveToProfiler(outReshapeName, layerName, op);
lastLayerName = outReshapeName;
}
p.AddPrimitiveToProfiler(op, lastLayerName);
} else {
auto outDims = op->get_output_shape(0);
auto outDimsN = outDims.size();
auto gemmSpecificTensor = [](const InferenceEngine::SizeVector& dims) {
switch (dims.size()) {
case 2: return cldnn::tensor(cldnn::spatial(dims[1], dims[0]));
case 3: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::spatial(dims[2], dims[1]));
case 4: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(dims[3], dims[2]));
case 5: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(dims[4], dims[3], dims[2]));
case 6: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(dims[5], dims[4], dims[3], dims[2]));
default: THROW_IE_EXCEPTION << "Invalid dimensions size(" << dims.size() << ") for Gemm layer";
}
};
// Preprocess inputs
for (size_t i = 0; i < inputPrimitives.size(); ++i) {
auto inputDims = op->get_input_shape(i);
auto inputDimsN = inputDims.size();
// Add reorder if changing number of dimensions requires changing format
auto targetFormat = DefaultFormatForDims(outDimsN);
if (targetFormat.value != DefaultFormatForDims(inputDimsN).value) {
auto reorderName = layerName + "_cldnn_in" + std::to_string(i) + "_reorder";
auto targetDatatype = DataTypeFromPrecision(op->get_output_element_type(0));
auto reorderPrim = cldnn::reorder(reorderName, inputPrimitives[i], targetFormat, targetDatatype);
p.AddPrimitive(reorderPrim);
p.AddInnerPrimitiveToProfiler(reorderName, layerName, op);
inputPrimitives[i] = reorderName;
}
// Reshape input if they differ or gemm specific shape matches default one
if (inputDimsN != outDimsN || inputDimsN < 4) {
auto reshapeName = layerName + "_cldnn_in" + std::to_string(i) + "_reshape";
// Extend input dimensions by prepending ones
inputDims.insert(inputDims.begin(), outDimsN - inputDimsN, 1ul);
auto targetShape = gemmSpecificTensor(inputDims);
auto reshapePrim = cldnn::reshape(reshapeName, inputPrimitives[i], targetShape);
p.AddPrimitive(reshapePrim);
p.AddInnerPrimitiveToProfiler(reshapeName, layerName, op);
inputPrimitives[i] = reshapeName;
}
}
// Add actual gemm
auto alpha = 1.0f;
auto beta = 0.0f;
auto transA = op->get_transpose_a();
auto transB = op->get_transpose_b();
auto gemmPrim = cldnn::gemm(layerName,
inputPrimitives,
DataTypeFromPrecision(op->get_output_element_type(0)),
transA,
transB,
alpha,
beta);
p.AddPrimitive(gemmPrim);
auto lastLayerName = layerName;
// Reshape output if gemm specific shape does not match default one
if (outDimsN < 4) {
auto outputShape = CldnnTensorFromIEDims(outDims);
auto outReshapeName = layerName + "_cldnn_out_reshape";
auto outReshapePrim = cldnn::reshape(outReshapeName, layerName, outputShape);
p.AddPrimitive(outReshapePrim);
p.AddInnerPrimitiveToProfiler(outReshapeName, layerName, op);
lastLayerName = outReshapeName;
}
p.AddPrimitiveToProfiler(op, lastLayerName);
}
}
REGISTER_FACTORY_IMPL(v0, MatMul);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,38 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "ngraph/op/mvn.hpp"
#include "api/mvn.hpp"
namespace CLDNNPlugin {
void CreateMVNOp(Program& p, const std::shared_ptr<ngraph::op::v0::MVN>& op) {
p.ValidateInputs(op, {1});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
const size_t chanelAxis = 1;
ngraph::AxisSet reductionAxes = op->get_reduction_axes();
// FIXME: op->get_across_channels(); doesn't work for some reason. Is it expected?
bool across_channels = reductionAxes.count(chanelAxis) > 0;
bool normalize_variance = op->get_normalize_variance();
float eps = op->get_eps();
auto mvnPrim = cldnn::mvn(layerName,
inputPrimitives[0],
across_channels,
normalize_variance,
eps);
p.AddPrimitive(mvnPrim);
p.AddPrimitiveToProfiler(op);
}
REGISTER_FACTORY_IMPL(v0, MVN);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,163 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "ngraph/op/non_max_suppression.hpp"
#include <ngraph/opsets/opset3.hpp>
#include <ngraph_ops/nms_ie_internal.hpp>
#include "api/reorder.hpp"
#include "api/mutable_data.hpp"
#include "api/non_max_suppression.hpp"
namespace CLDNNPlugin {
static bool GetCenterPointBox(ngraph::op::v5::NonMaxSuppression::BoxEncodingType encoding) {
switch (encoding) {
case ::ngraph::op::v5::NonMaxSuppression::BoxEncodingType::CENTER: return true;
case ::ngraph::op::v5::NonMaxSuppression::BoxEncodingType::CORNER: return false;
default: THROW_IE_EXCEPTION << "NonMaxSuppression layer has unsupported box encoding";
}
return false;
}
void CreateNonMaxSuppressionIEInternalOp(Program& p, const std::shared_ptr<ngraph::op::internal::NonMaxSuppressionIEInternal>& op) {
p.ValidateInputs(op, {2, 3, 4, 5, 6});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::vector<cldnn::primitive_id> reorderedInputs;
reorderedInputs.resize(inputPrimitives.size());
for (size_t portIndex = 0; portIndex < inputPrimitives.size(); portIndex++) {
auto inputDataType = DataTypeFromPrecision(op->get_input_element_type(portIndex));
if ((portIndex == 2) && (inputDataType == cldnn::data_types::i64)) {
// clDNN primitive supports only i32 data type for 'max_output_boxes_per_class' input
// so we need additional reorder if it's provided as i64
auto reorderPrimName = inputPrimitives[portIndex] + "_" + op->get_friendly_name() + Program::m_preProcessTag;
auto targetFormat = DefaultFormatForDims(op->get_input_shape(portIndex).size());
auto preprocessPrim = cldnn::reorder(reorderPrimName,
inputPrimitives[portIndex],
targetFormat,
cldnn::data_types::i32);
p.AddPrimitive(preprocessPrim);
p.AddInnerPrimitiveToProfiler(reorderPrimName, layer_type_name_ID(op), op);
reorderedInputs[portIndex] = (reorderPrimName);
} else {
reorderedInputs[portIndex] = inputPrimitives[portIndex];
}
}
// clDNN primitive supports only i32 as output data type
auto out_type = op->get_output_element_type(0);
if (out_type == ngraph::element::i64) {
out_type = ngraph::element::i32;
}
auto outputIndices = op->get_output_shape(0)[0];
auto boxesShape = op->get_input_shape(0);
int32_t num_batches = boxesShape.at(0);
int32_t num_boxes = boxesShape.at(1);
auto scoresShape = op->get_input_shape(1);
int32_t num_classes = scoresShape.at(1);
std::size_t num_output = op->get_output_size();
std::vector<cldnn::memory> shared_memory;
switch (num_output) {
case 3: {
auto mutable_precision_second = op->get_output_element_type(2);
if (mutable_precision_second == ngraph::element::i64) {
mutable_precision_second = ngraph::element::i32;
}
cldnn::layout mutableLayoutSecond = cldnn::layout(
DataTypeFromPrecision(mutable_precision_second),
DefaultFormatForDims(op->get_output_shape(2).size()),
CldnnTensorFromIEDims(op->get_output_shape(2)));
shared_memory.emplace_back(cldnn::memory::allocate(p.GetEngine(), mutableLayoutSecond));
cldnn::primitive_id non_max_supression_mutable_id_w_second = layer_type_name_ID(op) + "_md_write_second";
auto nms_mutable_prim_second = cldnn::mutable_data(non_max_supression_mutable_id_w_second, shared_memory.back());
p.primitivesToIRLayersMap[non_max_supression_mutable_id_w_second] = { op->get_friendly_name() };
p.primitiveIDs[non_max_supression_mutable_id_w_second] = non_max_supression_mutable_id_w_second;
p.AddPrimitive(nms_mutable_prim_second);
inputPrimitives.push_back(non_max_supression_mutable_id_w_second);
}
case 2: {
auto mutable_precision_first = op->get_output_element_type(1);
cldnn::layout mutableLayoutFirst = cldnn::layout(
DataTypeFromPrecision(mutable_precision_first),
cldnn::format::bfyx,
cldnn::tensor(outputIndices, 3, 1, 1));
shared_memory.emplace_back(cldnn::memory::allocate(p.GetEngine(), mutableLayoutFirst));
cldnn::primitive_id non_max_supression_mutable_id_w_first = layer_type_name_ID(op) + "_md_write_first";
auto nms_mutable_prim_first = cldnn::mutable_data(non_max_supression_mutable_id_w_first, shared_memory.back());
p.primitivesToIRLayersMap[non_max_supression_mutable_id_w_first] = { op->get_friendly_name() };
p.primitiveIDs[non_max_supression_mutable_id_w_first] = non_max_supression_mutable_id_w_first;
p.AddPrimitive(nms_mutable_prim_first);
inputPrimitives.push_back(non_max_supression_mutable_id_w_first);
}
case 1: break;
default: THROW_IE_EXCEPTION << "Incorrect number of output for layer: " << op->get_friendly_name();
}
auto nonMaxSupressionLayerName = num_output > 1 ? layer_type_name_ID(op) + ".0" : layer_type_name_ID(op);
auto prim = cldnn::non_max_suppression(
nonMaxSupressionLayerName,
reorderedInputs[0],
reorderedInputs[1],
static_cast<int>(outputIndices),
op->m_center_point_box,
op->m_sort_result_descending);
prim.output_data_type = DataTypeFromPrecision(out_type);
switch (reorderedInputs.size()) {
case 6: prim.soft_nms_sigma = reorderedInputs[5];
case 5: prim.score_threshold = reorderedInputs[4];
case 4: prim.iou_threshold = reorderedInputs[3];
case 3: prim.num_select_per_class = reorderedInputs[2];
case 2: break;
default: THROW_IE_EXCEPTION << "Incorrect number of input primitives for layer: " << op->get_friendly_name();
}
switch (num_output) {
case 3: prim.third_output = inputPrimitives[inputPrimitives.size() - 2];
case 2: prim.second_output = inputPrimitives[inputPrimitives.size() - 1];
default: break;
}
p.AddPrimitive(prim);
switch (num_output) {
case 3: {
cldnn::primitive_id non_max_supression_id_r_second = layer_type_name_ID(op) + ".2";
auto nms_mutable_prim_r_second = cldnn::mutable_data(non_max_supression_id_r_second, { nonMaxSupressionLayerName }, shared_memory.front());
p.primitivesToIRLayersMap[non_max_supression_id_r_second] = { op->get_friendly_name() };
p.primitiveIDs[non_max_supression_id_r_second] = non_max_supression_id_r_second;
p.AddPrimitive(nms_mutable_prim_r_second);
}
case 2: {
cldnn::primitive_id non_max_supression_id_r_first = layer_type_name_ID(op) + ".1";
auto nms_mutable_prim_r_first = cldnn::mutable_data(non_max_supression_id_r_first, { nonMaxSupressionLayerName }, shared_memory.back());
p.primitivesToIRLayersMap[non_max_supression_id_r_first] = { op->get_friendly_name() };
p.primitiveIDs[non_max_supression_id_r_first] = non_max_supression_id_r_first;
p.AddPrimitive(nms_mutable_prim_r_first);
}
default: break;
}
p.AddPrimitiveToProfiler(nonMaxSupressionLayerName, op);
}
REGISTER_FACTORY_IMPL(internal, NonMaxSuppressionIEInternal);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,63 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "ngraph/op/normalize_l2.hpp"
#include "ngraph/op/constant.hpp"
#include "api/normalize.hpp"
#include "api/data.hpp"
namespace CLDNNPlugin {
void CreateNormalizeL2Op(Program& p, const std::shared_ptr<ngraph::op::v0::NormalizeL2>& op) {
p.ValidateInputs(op, {2});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
// params
auto const_axis = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(1));
if (!const_axis)
THROW_IE_EXCEPTION << "Unsupported axis node type in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
auto axis = const_axis->cast_vector<size_t>();
bool across_spatial = !(axis.size() == 1 && axis[0] == 1);
float eps = op->get_eps();
// WA for MO outputting %.6f
if (eps == 0.0f) {
eps = 1e-10f;
}
// We create fake scale constant and fill it with ones to keep the same behavior as current primitive
auto scale = std::make_shared<ngraph::op::v0::Constant>(op->get_output_element_type(0), ngraph::Shape{1}, std::vector<float>{1.0});
cldnn::layout constLayout = cldnn::layout(DataTypeFromPrecision(op->get_output_element_type(0)), cldnn::format::bfyx, cldnn::tensor{1});
auto mem = cldnn::memory::allocate(p.GetEngine(), constLayout, 0, false);
auto tmpPointer = mem.pointer<char>(); // implicitly maps buffer - unmap in destructor
auto buf = tmpPointer.data();
auto bufSize = scale->get_output_tensor(0).size();
if (bufSize != constLayout.bytes_count())
THROW_IE_EXCEPTION << "Invalid scales buffer in NormalizeL2 op " << op->get_friendly_name();
std::memcpy(&buf[0], scale->get_data_ptr(), bufSize);
auto scalesName = layerName + "_cldnn_input_scales";
p.AddPrimitive(cldnn::data(scalesName, mem));
p.AddInnerPrimitiveToProfiler(scalesName, layerName, op);
auto normPrim = cldnn::normalize(layerName,
inputPrimitives[0],
scalesName,
across_spatial,
eps);
p.AddPrimitive(normPrim);
p.AddPrimitiveToProfiler(op);
}
REGISTER_FACTORY_IMPL(v0, NormalizeL2);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,64 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "transformations/utils/utils.hpp"
#include "ngraph/op/one_hot.hpp"
#include "api/one_hot.hpp"
namespace CLDNNPlugin {
void CreateOneHotOp(Program& p, const std::shared_ptr<ngraph::op::v1::OneHot>& op) {
p.ValidateInputs(op, {4});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
int16_t axis = op->get_axis();
auto on_value_node = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(2));
auto off_value_node = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(3));
if (on_value_node == nullptr || off_value_node == nullptr)
THROW_IE_EXCEPTION << "Unsupported on/off node type in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
float on_value;
float off_value;
if (!ngraph::op::util::get_single_value(on_value_node, on_value) ||
!ngraph::op::util::get_single_value(off_value_node, off_value)) {
THROW_IE_EXCEPTION << "Unsupported parameter size in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
}
auto dims = op->get_input_shape(0);
if (axis < -1 || axis > static_cast<int16_t>(dims.size()))
THROW_IE_EXCEPTION << op->get_friendly_name() << " Incorrect OneHot axis value: " << axis << ". Should be between -1 and " << dims.size();
if (axis == -1) {
axis = dims.size();
for (int i = dims.size() - 1; i >= 0; i--) {
if (dims[i] == 1)
axis--;
else
break;
}
}
auto oneHotPrim = cldnn::one_hot(layerName,
inputPrimitives[0],
CldnnTensorFromIEDims(op->get_output_shape(0)),
DataTypeFromPrecision(op->get_output_element_type(0)),
static_cast<uint16_t>(axis),
on_value,
off_value);
p.AddPrimitive(oneHotPrim);
p.AddPrimitiveToProfiler(op);
}
REGISTER_FACTORY_IMPL(v1, OneHot);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,75 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "transformations/utils/utils.hpp"
#include "ngraph/op/pad.hpp"
#include "api/border.hpp"
namespace CLDNNPlugin {
static cldnn::border_type GetBorderType(ngraph::op::PadMode mode) {
switch (mode) {
case ngraph::op::PadMode::CONSTANT: return cldnn::border_type::constant;
case ngraph::op::PadMode::EDGE: return cldnn::border_type::edge;
case ngraph::op::PadMode::REFLECT: return cldnn::border_type::mirror_101;
case ngraph::op::PadMode::SYMMETRIC: return cldnn::border_type::mirror;
default: THROW_IE_EXCEPTION << "Invalid border mode " << mode << " in layer ";
}
return cldnn::border_type::constant;
}
static std::vector<int32_t> GetPermuteOrder(const ngraph::CoordinateDiff& ie_order) {
std::vector<int32_t> cldnn_order(ie_order.begin(), ie_order.end());
// 1. Align to min. 4 sizes
if (cldnn_order.size() < 4)
cldnn_order.push_back(0);
// 2. Swap spatial positions
for (int i = 0; i < (cldnn_order.size() - 2) / 2; i++) {
std::swap(cldnn_order[2 + i], cldnn_order[1 + cldnn_order.size() - (2 + i)]);
}
return cldnn_order;
}
void CreatePadOp(Program& p, const std::shared_ptr<ngraph::op::v1::Pad>& op) {
p.ValidateInputs(op, {3, 4});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
auto pads_begin = cldnn::tensor(GetPermuteOrder(op->get_pads_begin()), 0);
auto pads_end = cldnn::tensor(GetPermuteOrder(op->get_pads_end()), 0);
float pad_value = 0.f;
if (op->get_input_size() == 4) {
auto const_node = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(3));
if (!const_node) {
THROW_IE_EXCEPTION << "Unsupported const node type in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
}
if (!ngraph::op::util::get_single_value(const_node, pad_value)) {
THROW_IE_EXCEPTION << "Unsupported pad value in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
}
}
cldnn::border_type border_mode = GetBorderType(op->get_pad_mode());
auto tilePrim = cldnn::border(layerName,
inputPrimitives[0],
pads_begin,
pads_end,
border_mode,
pad_value);
p.AddPrimitive(tilePrim);
p.AddPrimitiveToProfiler(op);
}
REGISTER_FACTORY_IMPL(v1, Pad);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,257 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "ngraph/op/parameter.hpp"
#include "api/input_layout.hpp"
#include "api/reorder.hpp"
#include "api/data.hpp"
using namespace InferenceEngine;
namespace CLDNNPlugin {
void CreateParameterOp(Program& p, const std::shared_ptr<ngraph::op::v0::Parameter>& op) {
auto networkInputs = p.GetNetworkInputs();
if (networkInputs.find(op->get_friendly_name()) == networkInputs.end()) {
THROW_IE_EXCEPTION << "Can't find input " << op->get_friendly_name() << " in InputsDataMap";
}
auto inputInfo = networkInputs.at(op->get_friendly_name());
// first create and add the input layout
const auto inputDesc = inputInfo->getTensorDesc();
const auto inputDims = inputDesc.getDims();
Layout l = inputDesc.getLayout();
Precision ip = inputDesc.getPrecision();
cldnn::format inputFormat = cldnn::format::bfyx;
if (Layout::BLOCKED == l && 6 == inputDims.size()) {
inputFormat = cldnn::format::bfwzyx;
} else {
inputFormat = FormatFromLayout(l);
}
cldnn::tensor dataTensor;
cldnn::tensor::value_type batch = (p.m_max_batch <= 1)
? (inputDims.size() > 3 ? TensorValue(inputDims[0]) : 1)
: TensorValue(p.m_curBatch);
switch (inputDims.size()) {
case 6:
dataTensor = cldnn::tensor(cldnn::batch(batch),
cldnn::feature(inputDims[1]),
cldnn::spatial(inputDims[5], inputDims[4], inputDims[3], inputDims[2]));
break;
case 5:
if (Layout::NCDHW == l) {
dataTensor = cldnn::tensor(cldnn::batch(batch),
cldnn::feature(inputDims[1]),
cldnn::spatial(inputDims[4], inputDims[3], inputDims[2]));
} else {
THROW_IE_EXCEPTION << "Unsupported layout (" << l << ") in 5D input " << inputInfo->name();
}
break;
case 4:
if (Layout::NCHW == l || Layout::CHW == l) {
dataTensor = cldnn::tensor(batch,
TensorValue(inputDims[1]), TensorValue(inputDims[3]), TensorValue(inputDims[2]));
} else if (Layout::NHWC == l) {
dataTensor = cldnn::tensor(batch,
TensorValue(inputDims[1]), TensorValue(inputDims[3]), TensorValue(inputDims[2]));
} else {
THROW_IE_EXCEPTION << "Unsupported layout (" << l << ") in 4D input " + inputInfo->name();
}
break;
case 3:
if (Layout::CHW == l) {
dataTensor = cldnn::tensor(TensorValue(inputDims[0]), TensorValue(inputDims[1]), 1, TensorValue(inputDims[2]));
} else {
THROW_IE_EXCEPTION << "Unsupported layout (" << l << ") in 3D input " + inputInfo->name();
}
break;
case 2:
if (Layout::NCHW == l || NC == l) {
dataTensor = cldnn::tensor(TensorValue(inputDims[0]), TensorValue(inputDims[1]), 1, 1);
} else {
THROW_IE_EXCEPTION << "Unsupported layout (" << l << ") in 2D input " << inputInfo->name();
}
break;
case 1:
dataTensor = cldnn::tensor(TensorValue(inputDims[0]), 1, 1, 1);
break;
case 0:
dataTensor = cldnn::tensor(1, 1, 1, 1);
break;
default: THROW_IE_EXCEPTION << "Invalid data dimensions";
}
cldnn::layout networkInputLayout(DataTypeFromPrecision(ip),
inputFormat,
dataTensor);
// look at the expected color format of this input
auto inputName = layer_type_name_ID(op);
auto preProcess = inputInfo->getPreProcess();
size_t meanChannels = preProcess.getNumberOfChannels();
networkInputLayout.format = inputFormat;
networkInputLayout.size = networkInputLayout.size.transform(inputFormat, 1);
networkInputLayout.data_type = DataTypeFromPrecision(op->get_output_element_type(0));
auto preprocessPrimID = "reorder:" + inputName + Program::m_preProcessTag;
cldnn::primitive_id meanBlobID = inputName + Program::m_meanValuesTag;
std::vector<float> meanValues;
if ((meanChannels > 0) &&
(meanChannels != networkInputLayout.size.feature[0])) {
THROW_IE_EXCEPTION << "Mismatched mean values channels in input " << inputName;
}
switch (preProcess.getMeanVariant()) {
case NONE:
case MEAN_VALUE: {
if (meanChannels > 0) {
for (size_t c = 0; c < meanChannels; c++) {
if (fabs(preProcess[c]->stdScale - 1.0f) > 1e-10)
THROW_IE_EXCEPTION << "not supporting stdScale yet in input " << inputName;
meanValues.push_back(preProcess[c]->meanValue);
}
}
break;
}
case MEAN_IMAGE: {
IE_ASSERT(meanChannels);
// first merge all mean values to a single blob
// todo make sure mean blob precision is the same as the input precision
auto meanDims = inputDims;
// overwrite batches with 1
switch (meanDims.size()) {
case 4: meanDims[0] = 1;
break;
default:
THROW_IE_EXCEPTION << "Missing batch dimensions in input image";
}
const TensorDesc desc(Precision::FP32, meanDims, TensorDesc::getLayoutByDims(meanDims));
TBlob<float> meanBlob(desc);
meanBlob.allocate();
auto meanBlobData = meanBlob.data();
for (size_t c = 0; c < meanChannels; c++) {
if (fabs(preProcess[c]->stdScale - 1.0f) > 1e-10)
THROW_IE_EXCEPTION << "not supporting stdScale yet in input " << inputName;
auto channelMeanBlob = std::dynamic_pointer_cast<TBlob<float>>(preProcess[c]->meanData);
auto channelSize = channelMeanBlob->size();
auto channelBlobData = channelMeanBlob->data();
for (size_t i = 0; i < channelSize; i++) {
meanBlobData[(c * channelSize) + i] = channelBlobData[i];
}
}
// then create a data primitive for the mean values
auto meanBlobPtr = std::make_shared<TBlob<float>>(meanBlob);
// mean values will use external format (sub in the input format before convert to new format)
cldnn::tensor meanBlobTensor(networkInputLayout.size);
meanBlobTensor.batch[0] = 1; // mean values have no batches
cldnn::layout meanBlobLayout(cldnn::data_types::f32, cldnn::format::bfyx, meanBlobTensor);
auto data = static_cast<const char *>(meanBlobPtr->buffer());
auto bufIter = p.blobMemCache.find(data);
if (bufIter != p.blobMemCache.end()) {
meanBlobID = bufIter->second;
} else {
auto mem = cldnn::memory::allocate(p.GetEngine(), meanBlobLayout, 0, false);
auto tmpPointer = mem.pointer<char>(); // implicitly maps buffer - unmap in destructor
auto buf = tmpPointer.data();
auto bufSize = meanBlobLayout.bytes_count();
std::memcpy(&buf[0], &data[0], bufSize);
p.AddPrimitive(cldnn::data(meanBlobID, mem));
p.blobMemCache[data] = meanBlobID;
}
break;
}
default: THROW_IE_EXCEPTION << "Invalid mean variant in input " << inputName;
break;
}
if (ColorFormat::NV12 == preProcess.getColorFormat() && p.GetConfig().nv12_two_inputs) {
// for NV12, create two input layouts with reorder instead of one,
// and then would expect compound blob in inferRequest
if (Layout::NCHW != l &&
(Precision::I8 != ip || Precision::U8 != ip)) {
THROW_IE_EXCEPTION << "Unsupported layout (" << l << ") or precision "
<< ip.name() << ") for NV12 input " + inputInfo->name();
}
int height = inputDims[2];
int width = inputDims[3];
std::string y_name = inputName + "_Y";
std::string uv_name = inputName + "_UV";
cldnn::layout y_layout(DataTypeFromPrecision(ip),
cldnn::format::nv12, { 1, 1, width, height });
cldnn::layout uv_layout(DataTypeFromPrecision(ip),
cldnn::format::nv12, { 1, 2, width / 2, height / 2 });
auto inputY = cldnn::input_layout(y_name, y_layout);
auto inputUV = cldnn::input_layout(uv_name, uv_layout);
p.AddPrimitive(inputY);
p.inputLayouts.insert({ inputInfo->name() + "_Y", y_layout });
p.AddPrimitive(inputUV);
p.inputLayouts.insert({ inputInfo->name() + "_UV", uv_layout });
switch (preProcess.getMeanVariant()) {
case NONE:
case MEAN_VALUE: {
p.AddPrimitive(cldnn::reorder(preprocessPrimID, y_name, uv_name, networkInputLayout, meanValues));
break;
}
case MEAN_IMAGE: {
p.AddPrimitive(cldnn::reorder(preprocessPrimID, y_name, uv_name, networkInputLayout, meanBlobID));
break;
}
default: THROW_IE_EXCEPTION << "Invalid mean variant in input " + inputName;
break;
}
p.primitivesToIRLayersMap[preprocessPrimID] = { inputInfo->name() };
p.primitivesToIRLayersMap[y_name] = { inputInfo->name() };
p.primitivesToIRLayersMap[uv_name] = { inputInfo->name() };
p.profilingIDs.push_back(preprocessPrimID);
p.InitProfileInfo(preprocessPrimID, "Reorder");
} else {
cldnn::layout inputLayout(networkInputLayout);
inputLayout.data_type = DataTypeFromPrecision(ip);
p.inputLayouts.insert({ inputInfo->name(), inputLayout });
p.AddPrimitive(cldnn::input_layout(inputName, inputLayout));
p.primitivesToIRLayersMap[inputName] = { inputInfo->name() };
switch (preProcess.getMeanVariant()) {
case NONE:
case MEAN_VALUE: {
p.AddPrimitive(cldnn::reorder(preprocessPrimID, inputName, networkInputLayout, meanValues));
break;
}
case MEAN_IMAGE: {
p.AddPrimitive(cldnn::reorder(preprocessPrimID,
inputName,
networkInputLayout,
meanBlobID));
break;
}
default: THROW_IE_EXCEPTION << "Invalid mean variant in input " << inputName;
break;
}
p.InitProfileInfo(preprocessPrimID, "reorder");
p.primitiveIDs[preprocessPrimID] = preprocessPrimID;
p.profilingIDs.push_back(preprocessPrimID);
}
p.primitiveIDs[inputName] = preprocessPrimID;
p.primitiveIDs[preprocessPrimID] = preprocessPrimID;
}
REGISTER_FACTORY_IMPL(v0, Parameter);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,101 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "ngraph/op/max_pool.hpp"
#include "ngraph/op/avg_pool.hpp"
#include "api/pooling.hpp"
namespace CLDNNPlugin {
struct PoolingParameters {
cldnn::tensor kernel;
cldnn::tensor stride;
cldnn::tensor pad_begin;
cldnn::tensor pad_end;
};
static PoolingParameters GetPoolingParameters(const ngraph::Shape& kernel,
const ngraph::Strides& strides,
const ngraph::Shape& pads_begin,
const ngraph::Shape& pads_end) {
cldnn::tensor k, s, pb, pe;
if (pads_begin.size() != strides.size() || pads_end.size() != strides.size() || kernel.size() != strides.size())
THROW_IE_EXCEPTION << "Strides, KernelSizes and Pads are supposed to have the same elements count";
std::vector<cldnn::tensor::value_type> pb_casted(pads_begin.begin(), pads_begin.end());
std::vector<cldnn::tensor::value_type> pe_casted(pads_end.begin(), pads_end.end());
switch (strides.size()) {
case 3: {
k = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(kernel[2], kernel[1], kernel[0]));
s = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(strides[2], strides[1], strides[0]));
pb = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), cldnn::spatial(-pb_casted[2], -pb_casted[1], -pb_casted[0]));
pe = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), cldnn::spatial(-pe_casted[2], -pe_casted[1], -pe_casted[0]));
break;
}
case 2: {
k = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(kernel[1], kernel[0], 1));
s = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(strides[1], strides[0], 1));
pb = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), cldnn::spatial(-pb_casted[1], -pb_casted[0], 0));
pe = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), cldnn::spatial(-pe_casted[1], -pe_casted[0], 0));
break;
}
case 1: {
k = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(kernel[0], 1, 1));
s = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(strides[0], 1, 1));
pb = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), cldnn::spatial(-pb_casted[0], 0, 0));
pe = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), cldnn::spatial(-pe_casted[0], 0, 0));
break;
}
default: THROW_IE_EXCEPTION << "Unsupported pooling parameters size. Only 1d, 2d, and 3d cases are supported";
}
return {k, s, pb, pe};
}
void CreateAvgPoolOp(Program& p, const std::shared_ptr<ngraph::op::v1::AvgPool>& op) {
p.ValidateInputs(op, {1});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
auto params = GetPoolingParameters(op->get_kernel(), op->get_strides(), op->get_pads_begin(), op->get_pads_end());
auto poolPrim = cldnn::pooling(layerName,
inputPrimitives[0],
op->get_exclude_pad() ? cldnn::pooling_mode::average_no_padding : cldnn::pooling_mode::average,
params.kernel,
params.stride,
params.pad_begin,
CldnnTensorFromIEDims(op->get_output_shape(0)),
DataTypeFromPrecision(op->get_output_element_type(0)));
poolPrim.pad_end = params.pad_end;
p.AddPrimitive(poolPrim);
p.AddPrimitiveToProfiler(op);
}
void CreateMaxPoolOp(Program& p, const std::shared_ptr<ngraph::op::v1::MaxPool>& op) {
p.ValidateInputs(op, {1});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
auto params = GetPoolingParameters(op->get_kernel(), op->get_strides(), op->get_pads_begin(), op->get_pads_end());
auto poolPrim = cldnn::pooling(layerName,
inputPrimitives[0],
cldnn::pooling_mode::max,
params.kernel,
params.stride,
params.pad_begin,
CldnnTensorFromIEDims(op->get_output_shape(0)),
DataTypeFromPrecision(op->get_output_element_type(0)));
poolPrim.pad_end = params.pad_end;
p.AddPrimitive(poolPrim);
p.AddPrimitiveToProfiler(op);
}
REGISTER_FACTORY_IMPL(v1, MaxPool);
REGISTER_FACTORY_IMPL(v1, AvgPool);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,115 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "ngraph/op/prior_box.hpp"
#include "ngraph/op/prior_box_clustered.hpp"
#include "api/prior_box.hpp"
namespace CLDNNPlugin {
void CreatePriorBoxClusteredOp(Program& p, const std::shared_ptr<ngraph::op::v0::PriorBoxClustered>& op) {
p.ValidateInputs(op, {2});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
auto attrs = op->get_attrs();
std::vector<float> width = attrs.widths;
std::vector<float> height = attrs.heights;
std::vector<float> variance = attrs.variances;
float offset = attrs.offset;
bool clip = attrs.clip;
auto inp_dims = op->get_input_shape(0);
auto img_dims = op->get_input_shape(1);
int img_w = static_cast<int>(img_dims.back());
int img_h = static_cast<int>(img_dims.at(img_dims.size() - 2));
cldnn::tensor img_size = (cldnn::tensor) cldnn::spatial(TensorValue(img_w), TensorValue(img_h));
auto step_w = attrs.step_widths;
auto step_h = attrs.step_heights;
if (std::abs(attrs.step_heights - attrs.step_widths) < 1e-5) {
step_w = attrs.step_widths;
step_h = attrs.step_widths;
}
if (step_w == 0.0f && step_h == 0.0f) {
step_w = static_cast<float>(img_w) / inp_dims.back();
step_h = static_cast<float>(img_h) / inp_dims.at(img_dims.size() - 2);
}
auto priorBoxPrim = cldnn::prior_box(layerName,
inputPrimitives[0],
img_size,
clip,
variance,
step_w,
step_h,
offset,
width,
height,
DataTypeFromPrecision(op->get_output_element_type(0)));
p.AddPrimitive(priorBoxPrim);
p.AddPrimitiveToProfiler(op);
}
void CreatePriorBoxOp(Program& p, const std::shared_ptr<ngraph::op::v0::PriorBox>& op) {
p.ValidateInputs(op, {2});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
auto attrs = op->get_attrs();
// params
std::vector<float> min_size = attrs.min_size;
std::vector<float> max_size = attrs.max_size;
std::vector<float> aspect_ratio = attrs.aspect_ratio;
std::vector<float> variance = attrs.variance;
std::vector<float> fixed_size = attrs.fixed_size;
std::vector<float> fixed_ratio = attrs.fixed_ratio;
std::vector<float> density = attrs.density;
bool flip = attrs.flip;
bool clip = attrs.clip;
bool scale_all_sizes = attrs.scale_all_sizes;
float offset = attrs.offset;
auto step_w = attrs.step;
auto step_h = attrs.step;
auto img_dims = op->get_input_shape(1);
auto wdim = img_dims.back();
auto hdim = img_dims.at(img_dims.size()-2);
cldnn::tensor img_size = (cldnn::tensor) cldnn::spatial(TensorValue(wdim), TensorValue(hdim));
auto priorBoxPrim = cldnn::prior_box(layerName,
inputPrimitives[0],
img_size,
min_size,
max_size,
aspect_ratio,
flip,
clip,
variance,
step_w,
step_h,
offset,
scale_all_sizes,
fixed_ratio,
fixed_size,
density);
p.AddPrimitive(priorBoxPrim);
p.AddPrimitiveToProfiler(op);
}
REGISTER_FACTORY_IMPL(v0, PriorBoxClustered);
REGISTER_FACTORY_IMPL(v0, PriorBox);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,146 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "ngraph/op/proposal.hpp"
#include "api/proposal.hpp"
#include "api/mutable_data.hpp"
namespace CLDNNPlugin {
void CreateProposalOp(Program& p, const std::shared_ptr<ngraph::op::v0::Proposal>& op) {
p.ValidateInputs(op, {3});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
auto attrs = op->get_attrs();
float nms_thresh = attrs.nms_thresh;
int min_size = attrs.min_size;
int feature_stride = attrs.feat_stride;
int pre_nms_topn = attrs.pre_nms_topn;
int post_nms_topn = attrs.post_nms_topn;
const std::vector<float> ratio = attrs.ratio;
const std::vector<float> scale = attrs.scale;
float box_coordinate_scale = attrs.box_coordinate_scale;
float box_size_scale = attrs.box_size_scale;
int base_size = attrs.base_size;
std::string framework = attrs.framework;
bool normalize = attrs.normalize;
bool clip_before_nms = attrs.clip_before_nms;
bool clip_after_nms = attrs.clip_after_nms;
float coordinates_offset;
bool swap_xy;
bool initial_clip;
bool round_ratios;
bool shift_anchors;
if (framework == "tensorflow") {
coordinates_offset = 0.0f;
initial_clip = true;
shift_anchors = true;
round_ratios = false;
swap_xy = true;
} else {
coordinates_offset = 1.0f;
initial_clip = false;
shift_anchors = false;
round_ratios = true;
swap_xy = false;
}
if (op->get_output_size() == 2) {
auto mutable_precision = op->get_output_element_type(1);
if (mutable_precision == ngraph::element::i64) {
mutable_precision = ngraph::element::i32;
}
cldnn::layout mutableLayout = cldnn::layout(DataTypeFromPrecision(mutable_precision),
DefaultFormatForDims(op->get_output_shape(1).size()),
CldnnTensorFromIEDims(op->get_output_shape(1)));
auto shared_memory = cldnn::memory::allocate(p.GetEngine(), mutableLayout);
cldnn::primitive_id proposal_mutable_id_w = layer_type_name_ID(op) + "_md_write";
auto argmax_mutable_prim = cldnn::mutable_data(proposal_mutable_id_w, shared_memory);
p.primitivesToIRLayersMap[proposal_mutable_id_w] = { op->get_friendly_name() };
p.primitiveIDs[proposal_mutable_id_w] = proposal_mutable_id_w;
p.AddPrimitive(argmax_mutable_prim);
inputPrimitives.push_back(proposal_mutable_id_w);
std::string proposalLayerName = layer_type_name_ID(op) + ".0";
auto proposalPrim = cldnn::proposal(proposalLayerName,
inputPrimitives[0], // cls_score
inputPrimitives[1], // bbox_pred
inputPrimitives[2], // im_info
inputPrimitives[3], // second_output
0, // max_num_proposals is unused
nms_thresh,
base_size,
min_size,
feature_stride,
pre_nms_topn,
post_nms_topn,
ratio,
scale,
coordinates_offset,
box_coordinate_scale,
box_size_scale,
false,
swap_xy,
initial_clip,
clip_before_nms,
clip_after_nms,
round_ratios,
shift_anchors,
normalize);
p.AddPrimitive(proposalPrim);
cldnn::primitive_id proposal_mutable_id_r = layer_type_name_ID(op) + ".1";
auto argmax_mutable_prim_r = cldnn::mutable_data(proposal_mutable_id_r, { proposalLayerName }, shared_memory);
p.primitivesToIRLayersMap[proposal_mutable_id_r] = { op->get_friendly_name() };
p.primitiveIDs[proposal_mutable_id_r] = proposal_mutable_id_r;
p.AddPrimitive(argmax_mutable_prim_r);
p.AddPrimitiveToProfiler(proposalLayerName, op);
return;
}
std::string proposalLayerName = layer_type_name_ID(op);
auto proposalPrim = cldnn::proposal(proposalLayerName,
inputPrimitives[0], // cls_score
inputPrimitives[1], // bbox_pred
inputPrimitives[2], // im_info
0, // max_num_proposals is unused
nms_thresh,
base_size,
min_size,
feature_stride,
pre_nms_topn,
post_nms_topn,
ratio,
scale,
coordinates_offset,
box_coordinate_scale,
box_size_scale,
false,
swap_xy,
initial_clip,
clip_before_nms,
clip_after_nms,
round_ratios,
shift_anchors,
normalize);
p.AddPrimitive(proposalPrim);
p.AddPrimitiveToProfiler(op);
}
REGISTER_FACTORY_IMPL(v0, Proposal);
REGISTER_FACTORY_IMPL(v4, Proposal);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,146 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "ngraph/op/reduce_sum.hpp"
#include "ngraph/op/reduce_prod.hpp"
#include "ngraph/op/reduce_mean.hpp"
#include "ngraph/op/reduce_logical_or.hpp"
#include "ngraph/op/reduce_logical_and.hpp"
#include "ngraph/op/reduce_l1.hpp"
#include "ngraph/op/reduce_l2.hpp"
#include "ngraph/op/min.hpp"
#include "ngraph/op/max.hpp"
#include "ngraph/op/constant.hpp"
#include "api/reduce.hpp"
#include "api/reorder.hpp"
namespace CLDNNPlugin {
void CreateReduceOp(Program& p, const std::shared_ptr<ngraph::Node>& op, cldnn::reduce_mode mode, bool keep_dims) {
p.ValidateInputs(op, {2});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
size_t rank = op->get_input_shape(0).size();
auto axes_constant = std::dynamic_pointer_cast<ngraph::op::Constant>(op->get_input_node_shared_ptr(1));
if (!axes_constant) {
THROW_IE_EXCEPTION << "Unsupported parameter nodes type in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
}
std::vector<int32_t> rawAxes = axes_constant->cast_vector<int32_t>();
std::vector<uint16_t> axes;
for (size_t a = 0; a < rawAxes.size(); a++) {
if (rawAxes[a] < 0)
rawAxes[a] = rawAxes[a] + rank;
if (rawAxes[a] < 0 || rawAxes[a] > rank - 1)
THROW_IE_EXCEPTION << op->get_friendly_name() << " Incorrect Reduce axis value: " << rawAxes[a];
if (rank == 6) {
switch (rawAxes[a]) {
case 0: axes.push_back(cldnn::reduce::along_b); break;
case 1: axes.push_back(cldnn::reduce::along_f); break;
case 2: axes.push_back(cldnn::reduce::along_w); break;
case 3: axes.push_back(cldnn::reduce::along_z); break;
case 4: axes.push_back(cldnn::reduce::along_y); break;
case 5: axes.push_back(cldnn::reduce::along_x); break;
}
} else if (rank == 5) {
switch (rawAxes[a]) {
case 0: axes.push_back(cldnn::reduce::along_b); break;
case 1: axes.push_back(cldnn::reduce::along_f); break;
case 2: axes.push_back(cldnn::reduce::along_z); break;
case 3: axes.push_back(cldnn::reduce::along_y); break;
case 4: axes.push_back(cldnn::reduce::along_x); break;
}
} else {
switch (rawAxes[a]) {
case 0: axes.push_back(cldnn::reduce::along_b); break;
case 1: axes.push_back(cldnn::reduce::along_f); break;
case 2: axes.push_back(cldnn::reduce::along_y); break;
case 3: axes.push_back(cldnn::reduce::along_x); break;
}
}
}
sort(axes.begin(), axes.end());
axes.erase(unique(axes.begin(), axes.end()), axes.end());
auto reducePrim = cldnn::reduce(layerName,
inputPrimitives[0],
mode,
axes,
static_cast<int32_t>(keep_dims));
p.AddPrimitive(reducePrim);
auto reorderLayerName = layerName + "_reorder";
cldnn::format out_format = cldnn::format::any;
auto out_dt = DataTypeFromPrecision(op->get_output_element_type(0));
if (!keep_dims && rank > 4) {
if (rank - rawAxes.size() == 6)
out_format = cldnn::format::bfwzyx;
else if (rank - rawAxes.size() == 5)
out_format = cldnn::format::bfzyx;
else if (rank - rawAxes.size() <= 4)
out_format = cldnn::format::bfyx;
auto reorder_prim = cldnn::reorder(reorderLayerName, layerName, out_format, out_dt);
p.AddPrimitive(reorder_prim);
p.AddPrimitiveToProfiler(op, reorderLayerName);
} else {
p.AddPrimitiveToProfiler(op);
}
}
void CreateReduceMaxOp(Program& p, const std::shared_ptr<ngraph::op::v1::ReduceMax>& op) {
CreateReduceOp(p, op, cldnn::reduce_mode::max, op->get_keep_dims());
}
void CreateReduceLogicalAndOp(Program& p, const std::shared_ptr<ngraph::op::v1::ReduceLogicalAnd>& op) {
CreateReduceOp(p, op, cldnn::reduce_mode::logical_and, op->get_keep_dims());
}
void CreateReduceLogicalOrOp(Program& p, const std::shared_ptr<ngraph::op::v1::ReduceLogicalOr>& op) {
CreateReduceOp(p, op, cldnn::reduce_mode::logical_or, op->get_keep_dims());
}
void CreateReduceMeanOp(Program& p, const std::shared_ptr<ngraph::op::v1::ReduceMean>& op) {
CreateReduceOp(p, op, cldnn::reduce_mode::mean, op->get_keep_dims());
}
void CreateReduceMinOp(Program& p, const std::shared_ptr<ngraph::op::v1::ReduceMin>& op) {
CreateReduceOp(p, op, cldnn::reduce_mode::min, op->get_keep_dims());
}
void CreateReduceProdOp(Program& p, const std::shared_ptr<ngraph::op::v1::ReduceProd>& op) {
CreateReduceOp(p, op, cldnn::reduce_mode::prod, op->get_keep_dims());
}
void CreateReduceSumOp(Program& p, const std::shared_ptr<ngraph::op::v1::ReduceSum>& op) {
CreateReduceOp(p, op, cldnn::reduce_mode::sum, op->get_keep_dims());
}
void CreateReduceL1Op(Program& p, const std::shared_ptr<ngraph::op::v4::ReduceL1>& op) {
CreateReduceOp(p, op, cldnn::reduce_mode::l1, op->get_keep_dims());
}
void CreateReduceL2Op(Program& p, const std::shared_ptr<ngraph::op::v4::ReduceL2>& op) {
CreateReduceOp(p, op, cldnn::reduce_mode::l2, op->get_keep_dims());
}
REGISTER_FACTORY_IMPL(v1, ReduceMax);
REGISTER_FACTORY_IMPL(v1, ReduceLogicalAnd);
REGISTER_FACTORY_IMPL(v1, ReduceLogicalOr);
REGISTER_FACTORY_IMPL(v1, ReduceMean);
REGISTER_FACTORY_IMPL(v1, ReduceMin);
REGISTER_FACTORY_IMPL(v1, ReduceProd);
REGISTER_FACTORY_IMPL(v1, ReduceSum);
REGISTER_FACTORY_IMPL(v4, ReduceL1);
REGISTER_FACTORY_IMPL(v4, ReduceL2);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,39 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "ngraph/op/region_yolo.hpp"
#include "api/region_yolo.hpp"
namespace CLDNNPlugin {
void CreateRegionYoloOp(Program& p, const std::shared_ptr<ngraph::op::v0::RegionYolo>& op) {
p.ValidateInputs(op, {1});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
uint32_t coords = op->get_num_coords();
uint32_t classes = op->get_num_classes();
uint32_t num = op->get_num_regions();
bool do_softmax = op->get_do_softmax();
uint32_t mask_size = op->get_mask().size();
auto regionPrim = cldnn::region_yolo(layerName,
inputPrimitives[0],
coords,
classes,
num,
mask_size,
do_softmax);
p.AddPrimitive(regionPrim);
p.AddPrimitiveToProfiler(op);
}
REGISTER_FACTORY_IMPL(v0, RegionYolo);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,31 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "ngraph/op/reorg_yolo.hpp"
#include "api/reorg_yolo.hpp"
namespace CLDNNPlugin {
void CreateReorgYoloOp(Program& p, const std::shared_ptr<ngraph::op::v0::ReorgYolo>& op) {
p.ValidateInputs(op, {1});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
uint32_t stride = op->get_strides()[0];
auto reorgPrim = cldnn::reorg_yolo(layerName,
inputPrimitives[0],
stride);
p.AddPrimitive(reorgPrim);
p.AddPrimitiveToProfiler(op);
}
REGISTER_FACTORY_IMPL(v0, ReorgYolo);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,72 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "ngraph/op/reshape.hpp"
#include "ngraph/op/squeeze.hpp"
#include "ngraph/op/unsqueeze.hpp"
#include "api/reshape.hpp"
#include "api/reorder.hpp"
namespace CLDNNPlugin {
void CreateCommonReshapeOp(Program& p, const std::shared_ptr<ngraph::Node>& op) {
p.ValidateInputs(op, {1, 2});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
auto inDims = op->get_input_shape(0);
auto outDims = op->get_output_shape(0);
auto outTensor = CldnnTensorFromIEDims(outDims);
// if we convert from or to 5D/6D, additional reorder also required to change format
cldnn::primitive_id reshapeInputId = inputPrimitives[0];
if (inDims.size() != outDims.size()) {
cldnn::primitive_id reorderId = "reorder:" + op->get_friendly_name() + "_reorder";
cldnn::format outputFormat = cldnn::format::bfyx;
switch (outDims.size()) {
case 5: outputFormat = cldnn::format::bfzyx; break;
case 6: outputFormat = cldnn::format::bfwzyx; break;
default: break;
}
cldnn::layout outputLayout(DataTypeFromPrecision(op->get_output_element_type(0)), outputFormat, outTensor);
p.AddPrimitive(cldnn::reorder(reorderId, reshapeInputId, outputLayout));
p.InitProfileInfo(reorderId, "Reorder", false, InferenceEngine::InferenceEngineProfileInfo::EXECUTED, layerName);
p.primitivesToIRLayersMap[reorderId] = { op->get_friendly_name() };
p.primitiveIDs[layerName + "_reorder"] = reorderId;
p.primitiveIDs[reorderId] = reorderId;
p.profilingIDs.push_back(reorderId);
reshapeInputId = reorderId;
}
auto reshapePrim = cldnn::reshape(layerName,
reshapeInputId,
outTensor);
p.AddPrimitive(reshapePrim);
p.AddPrimitiveToProfiler(op);
}
void CreateReshapeOp(Program& p, const std::shared_ptr<ngraph::op::v1::Reshape>& op) {
CreateCommonReshapeOp(p, op);
}
void CreateSqueezeOp(Program& p, const std::shared_ptr<ngraph::op::v0::Squeeze>& op) {
CreateCommonReshapeOp(p, op);
}
void CreateUnsqueezeOp(Program& p, const std::shared_ptr<ngraph::op::v0::Unsqueeze>& op) {
CreateCommonReshapeOp(p, op);
}
REGISTER_FACTORY_IMPL(v1, Reshape);
REGISTER_FACTORY_IMPL(v0, Squeeze);
REGISTER_FACTORY_IMPL(v0, Unsqueeze);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,71 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "ngraph/op/result.hpp"
#include "api/reorder.hpp"
using namespace InferenceEngine;
namespace CLDNNPlugin {
void CreateResultOp(Program& p, const std::shared_ptr<ngraph::op::v0::Result>& op) {
OutputsDataMap networkOutputs = p.GetNetworkOutputs();
p.ValidateInputs(op, {1});
auto prev = op->get_input_node_shared_ptr(0);
auto inputID = op->get_input_source_output(0).get_tensor().get_name();
if (inputID.empty()) {
inputID = prev->get_friendly_name();
if (prev->get_output_size() > 1) {
inputID += "." + std::to_string(op->get_input_source_output(0).get_index());
}
}
auto it = networkOutputs.find(inputID);
if (it == networkOutputs.end()) {
THROW_IE_EXCEPTION << "Can't find output " << inputID << " in OutputsDataMap";
}
std::string originalOutName = it->first;
DataPtr outputData = it->second;
auto inputs = p.GetInputPrimitiveIDs(op);
const auto outputDesc = outputData->getTensorDesc();
const auto outputlayout = outputDesc.getLayout();
// TODO: add precision check once there's an outputInfo object
if (outputlayout != NCHW &&
// TODO: change 6d case once new layout added in IE
outputlayout != BLOCKED &&
outputlayout != NCDHW &&
outputlayout != NHWC &&
outputlayout != CHW &&
outputlayout != NC &&
outputlayout != C &&
outputlayout != SCALAR) {
THROW_IE_EXCEPTION << "Unsupported layout (" << outputlayout << ") in output: " << originalOutName;
}
auto outLayerName = layer_type_name_ID(op);
Precision precision = outputData->getPrecision();
std::string outputID = inputs[0];
p.AddPrimitive(cldnn::reorder(outLayerName,
outputID,
FormatFromLayout(outputData->getLayout()),
DataTypeFromPrecision(precision)));
p.InitProfileInfo(outLayerName, "reorder");
p.profilingIDs.push_back(outLayerName);
p.primitiveIDs[outLayerName] = outLayerName;
p.primitiveIDs[originalOutName] = outLayerName;
p.outputDims[originalOutName] = outputDesc.getDims();
p.prevPrimitiveIDs[outLayerName] = {originalOutName};
}
REGISTER_FACTORY_IMPL(v0, Result);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,33 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "ngraph/op/reverse_sequence.hpp"
#include "api/reverse_sequence.hpp"
namespace CLDNNPlugin {
void CreateReverseSequenceOp(Program& p, const std::shared_ptr<ngraph::op::v0::ReverseSequence>& op) {
p.ValidateInputs(op, {2});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
size_t batch_axis = op->get_batch_axis();
size_t seq_axis = op->get_sequence_axis();
auto reverseSequencePrim = cldnn::reverse_sequence(layerName,
inputPrimitives[0],
inputPrimitives[1],
seq_axis,
batch_axis);
p.AddPrimitive(reverseSequencePrim);
p.AddPrimitiveToProfiler(op);
}
REGISTER_FACTORY_IMPL(v0, ReverseSequence);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,315 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "ngraph/op/lstm_cell.hpp"
#include "ngraph/op/lstm_sequence.hpp"
#include "api/reshape.hpp"
#include "api/reorder.hpp"
#include "api/fully_connected.hpp"
#include "api/lstm.hpp"
#include "api/crop.hpp"
#include "api/concatenation.hpp"
namespace CLDNNPlugin {
cldnn::activation_func GetActivationFunc(std::string name) {
static const std::map<std::string, cldnn::activation_func> name_mapping = {
{"sigmoid", cldnn::activation_func::logistic},
{"tanh", cldnn::activation_func::hyperbolic_tan},
{"relu", cldnn::activation_func::relu},
};
auto itr = name_mapping.find(name);
if (itr != name_mapping.end())
return itr->second;
else
return cldnn::activation_func::none;
}
template <typename T>
void GetLSTMActivationParams(const std::shared_ptr<T>& op,
std::vector<cldnn::activation_func>& activations,
std::vector<cldnn::activation_additional_params>& activation_params) {
activations = { cldnn::activation_func::logistic,
cldnn::activation_func::hyperbolic_tan,
cldnn::activation_func::hyperbolic_tan };
activation_params = {};
auto op_activations = op->get_activations();
if (!op_activations.empty()) {
if (op_activations.size() != 3)
THROW_IE_EXCEPTION << "Wrong number of activations for LSTMCell op " << op->get_friendly_name();
for (int i = 0; i < 3; i++) {
auto af = GetActivationFunc(op_activations[i]);
if (af == cldnn::activation_func::none)
THROW_IE_EXCEPTION << "Wrong or unsupported activation type " << op_activations[i]
<< " for LSTMCell op " << op->get_friendly_name();
activations[i] = af;
}
}
auto op_a = op->get_activations_alpha();
auto op_b = op->get_activations_beta();
if (!op_a.empty()) {
if (op_a.size() != 3 || op_b.size() != 3)
THROW_IE_EXCEPTION << "Wrong number of activation parameters for LSTMCell op " << op->get_friendly_name();
for (int i = 0; i < 3; i++) {
cldnn::activation_additional_params params = { op_a[i], op_b[i] };
activation_params.push_back(cldnn::activation_additional_params(params));
}
}
}
void CreateLSTMCellOp(Program& p, const std::shared_ptr<ngraph::op::v4::LSTMCell>& op) {
p.ValidateInputs(op, {6});
int lstm_batch_size, lstm_input_size, lstm_hidden_size;
bool hasBias = true;
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
cldnn::primitive_id weightID = inputPrimitives[3];
cldnn::primitive_id recurrentID = inputPrimitives[4];
cldnn::primitive_id biasID = inputPrimitives[5];
/* check incoming CNN layer and setup required variables */
{
const auto in_dims0 = op->get_input_shape(0);
const auto out_dims0 = op->get_output_shape(0);
if (in_dims0.size() != 2 ||
op->get_input_shape(1).size() != 2 ||
op->get_input_shape(2).size() != 2)
THROW_IE_EXCEPTION << "Wrong input shapes for LSTMCell op " << op->get_friendly_name();
lstm_input_size = in_dims0.back();
lstm_batch_size = in_dims0.at(in_dims0.size()-2);
lstm_hidden_size = out_dims0.back();
}
std::vector<cldnn::activation_func> activations;
std::vector<cldnn::activation_additional_params> activation_params;
GetLSTMActivationParams(op, activations, activation_params);
float clip = op->get_clip();
// LSTM primitive works with single precision for all in/out/weights tensors
auto lstm_dtype = DataTypeFromPrecision(op->get_output_element_type(0));
cldnn::primitive_id inReshapeID = layerName + "_inReshape";
cldnn::primitive_id permuteID = layerName + "_inputReorder";
cldnn::primitive_id inHiddenReshapeID = layerName + "_inHiddenReshape";
cldnn::primitive_id inHiddenReorderID = layerName + "_inHiddenReorder";
cldnn::primitive_id gemmReshapeID = layerName + "_gemmReshape";
cldnn::primitive_id gemmReorderID = layerName + "_gemmReorder";
cldnn::primitive_id input_concatID = layerName + "_inputConcat";
cldnn::tensor inputShape = { lstm_batch_size, 1, lstm_input_size, 1 };
cldnn::tensor inStateShape = { lstm_batch_size, 1, lstm_hidden_size, 1 };
cldnn::layout inputLayout = cldnn::layout(lstm_dtype, cldnn::format::bfyx, inputShape);
cldnn::layout hiddenLayout = cldnn::layout(lstm_dtype, cldnn::format::bfyx, inStateShape);
p.AddPrimitive(cldnn::reshape(inReshapeID, inputPrimitives[0], inputShape));
p.AddPrimitive(cldnn::reorder(permuteID, inReshapeID, inputLayout));
p.AddInnerPrimitiveToProfiler(inReshapeID, op->get_friendly_name(), op);
p.AddInnerPrimitiveToProfiler(permuteID, op->get_friendly_name(), op);
std::string hiddenInResh = inHiddenReshapeID + "_1";
std::string hiddenInStr = inHiddenReorderID + "_1";
std::string cellInResh = inHiddenReshapeID + "_2";
std::string cellInStr = inHiddenReorderID + "_2";
p.AddPrimitive(cldnn::reshape(hiddenInResh, inputPrimitives[1], inStateShape));
p.AddPrimitive(cldnn::reorder(hiddenInStr, hiddenInResh, hiddenLayout));
p.AddPrimitive(cldnn::reshape(cellInResh, inputPrimitives[2], inStateShape));
p.AddPrimitive(cldnn::reorder(cellInStr, cellInResh, hiddenLayout));
p.AddPrimitive(cldnn::concatenation(input_concatID, { permuteID, hiddenInStr }, cldnn::concatenation::concatenation_axis::along_x));
p.AddInnerPrimitiveToProfiler(hiddenInResh, op->get_friendly_name(), op);
p.AddInnerPrimitiveToProfiler(hiddenInStr, op->get_friendly_name(), op);
p.AddInnerPrimitiveToProfiler(cellInResh, op->get_friendly_name(), op);
p.AddInnerPrimitiveToProfiler(cellInStr, op->get_friendly_name(), op);
p.AddInnerPrimitiveToProfiler(input_concatID, op->get_friendly_name(), op);
cldnn::tensor gemmSz = cldnn::tensor{ lstm_batch_size, 1, 4 * lstm_hidden_size, 1 };
cldnn::layout gemmLayout = cldnn::layout(lstm_dtype, cldnn::format::bfyx, gemmSz);
cldnn::tensor hiddenSz = cldnn::tensor{ lstm_batch_size, 1, lstm_hidden_size, 1 };
cldnn::tensor cellCropSz = cldnn::tensor{0, 1, 0, 0};
std::string lstm_fc_id = layerName + "_fully_connected";
std::string lstm_elt_id = layerName + "_lstm_elt";
std::string crop_id = layerName + "_crop";
cldnn::primitive_id WRconcatID = layerName + "_WRconcat";
p.AddPrimitive(cldnn::concatenation(WRconcatID, { weightID, recurrentID }, cldnn::concatenation::concatenation_axis::along_f));
p.AddInnerPrimitiveToProfiler(WRconcatID, op->get_friendly_name(), op);
p.AddPrimitive(cldnn::fully_connected(lstm_fc_id, input_concatID, WRconcatID, hasBias ? biasID : ""));
p.AddPrimitive(cldnn::reshape(gemmReshapeID, lstm_fc_id, gemmSz));
p.AddPrimitive(cldnn::reorder(gemmReorderID, gemmReshapeID, gemmLayout));
p.AddPrimitive(cldnn::lstm_elt(lstm_elt_id, gemmReorderID, cellInStr,
clip, 0, activations, activation_params, cldnn::lstm_weights_order::fizo));
p.AddInnerPrimitiveToProfiler(lstm_fc_id, op->get_friendly_name(), op);
p.AddInnerPrimitiveToProfiler(gemmReshapeID, op->get_friendly_name(), op);
p.AddInnerPrimitiveToProfiler(gemmReorderID, op->get_friendly_name(), op);
p.AddInnerPrimitiveToProfiler(lstm_elt_id, op->get_friendly_name(), op);
cldnn::primitive_id outputHiddenID = layerName + ".0";
p.AddPrimitive(cldnn::crop(outputHiddenID, lstm_elt_id, hiddenSz, cldnn::tensor{0, 0, 0, 0}));
p.AddInnerPrimitiveToProfiler(outputHiddenID, op->get_friendly_name(), op);
cldnn::primitive_id outputCellID = layerName + ".1";
p.AddPrimitive(cldnn::crop(outputCellID, lstm_elt_id, hiddenSz, cellCropSz));
p.AddInnerPrimitiveToProfiler(outputCellID, op->get_friendly_name(), op);
// output primitive IDs
p.primitiveIDs[outputHiddenID] = outputHiddenID; // LSTMCell:LSTMCell - "concat hidden"
p.primitiveIDs[layerName] = outputHiddenID; // LSTMCell:LSTMCell:0 - hidden state
p.primitiveIDs[outputCellID] = outputCellID; // LSTMCell:LSTMCell:1 - cell state
p.AddPrimitiveToProfiler(layerName, op, outputHiddenID);
}
void CreateLSTMSequenceOp(Program& p, const std::shared_ptr<ngraph::op::v5::LSTMSequence>& op) {
p.ValidateInputs(op, {7});
std::string layerName = layer_type_name_ID(op);
int lstm_batch_size, lstm_input_size, lstm_hidden_size, lstm_sequence_len;
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
cldnn::primitive_id weightID = inputPrimitives[4];
cldnn::primitive_id recurrentID = inputPrimitives[5];
cldnn::primitive_id biasID = inputPrimitives[6];
{
const auto in_dims0 = op->get_input_shape(0);
const auto out_dims0 = op->get_output_shape(0);
if (in_dims0.size() != 3 ||
op->get_input_shape(1).size() != 3 ||
op->get_input_shape(2).size() != 3)
THROW_IE_EXCEPTION << "Wrong input shapes for LSTMSequence op " << op->get_friendly_name();
lstm_input_size = in_dims0.back();
lstm_sequence_len = in_dims0.at(in_dims0.size() - 2);
lstm_batch_size = in_dims0.at(in_dims0.size() - 3);
lstm_hidden_size = out_dims0.back();
}
std::vector<cldnn::activation_func> activations;
std::vector<cldnn::activation_additional_params> activation_params;
GetLSTMActivationParams(op, activations, activation_params);
float clip = op->get_clip();
bool isForward = op->get_direction() == ngraph::op::RecurrentSequenceDirection::FORWARD;
// LSTM primitive works with single precision for all in/out/weights tensors
auto lstm_dtype = DataTypeFromPrecision(op->get_output_element_type(0));
cldnn::primitive_id inReshapeID = layerName + "_inReshape";
cldnn::primitive_id permuteID = layerName + "_inputReorder";
cldnn::primitive_id inHiddenReshapeID = layerName + "_inHiddenReshape";
cldnn::primitive_id inHiddenReorderID = layerName + "_inHiddenReorder";
cldnn::primitive_id inHiddenStateID = inHiddenReshapeID + "_1";
cldnn::primitive_id inCellStateID = inHiddenReshapeID + "_2";
std::vector<cldnn::primitive_id> output_ids_offsets;
cldnn::tensor inputShape = { lstm_batch_size, lstm_sequence_len, lstm_input_size, 1 };
cldnn::tensor inStateShape = { lstm_batch_size, 1, lstm_hidden_size, 1 };
cldnn::layout inputLayout = cldnn::layout(lstm_dtype, cldnn::format::bfyx, inputShape);
p.AddPrimitive(cldnn::reshape(inReshapeID, inputPrimitives[0], inputShape));
p.AddPrimitive(cldnn::reorder(permuteID, inReshapeID, inputLayout));
p.AddPrimitive(cldnn::reshape(inHiddenStateID, inputPrimitives[1], inStateShape));
p.AddPrimitive(cldnn::reshape(inCellStateID, inputPrimitives[2], inStateShape));
p.AddInnerPrimitiveToProfiler(inReshapeID, op->get_friendly_name(), op);
p.AddInnerPrimitiveToProfiler(permuteID, op->get_friendly_name(), op);
p.AddInnerPrimitiveToProfiler(inHiddenStateID, op->get_friendly_name(), op);
p.AddInnerPrimitiveToProfiler(inCellStateID, op->get_friendly_name(), op);
cldnn::tensor gemmSz = cldnn::tensor{ lstm_batch_size, 1, 4 * lstm_hidden_size, 1 };
cldnn::layout gemmLayout = cldnn::layout(lstm_dtype, cldnn::format::bfyx, gemmSz);
cldnn::tensor hiddenSz = cldnn::tensor{ lstm_batch_size, 1, lstm_hidden_size, 1 };
cldnn::tensor cellCropSz = cldnn::tensor{0, 1, 0, 0};
cldnn::primitive_id hiddenStr = inHiddenReshapeID + "_1";
cldnn::primitive_id cellStr = inHiddenReshapeID + "_2";
cldnn::primitive_id inputCropID = layerName + "_inputCrop";
cldnn::primitive_id WRconcatID = layerName + "_WRconcat";
p.AddPrimitive(cldnn::concatenation(WRconcatID, { weightID, recurrentID }, cldnn::concatenation::concatenation_axis::along_y));
p.AddInnerPrimitiveToProfiler(WRconcatID, op->get_friendly_name(), op);
std::vector<size_t> WRreshapeSize = { 4 * size_t(lstm_hidden_size), size_t(lstm_input_size + lstm_hidden_size) };
cldnn::primitive_id WRreshapeID = WRconcatID + "_reshape";
auto reshapeInPrim = cldnn::reshape(WRreshapeID, WRconcatID, CldnnTensorFromIEDims(WRreshapeSize));
p.AddPrimitive(reshapeInPrim);
p.AddInnerPrimitiveToProfiler(WRreshapeID, op->get_friendly_name(), op);
for (int i = 0; i < lstm_sequence_len; ++i) {
const std::string id_str = std::to_string(i);
cldnn::primitive_id concatID = layerName + "_inputConcat" + id_str;
cldnn::primitive_id lstm_fc_id = layerName + "_fully_connected" + id_str;
cldnn::primitive_id lstm_fc_resh_id = layerName + "_gemmReshape" + id_str;
cldnn::primitive_id lstm_fc_reor_id = layerName + "_gemmReorder" + id_str;
cldnn::primitive_id lstm_elt_id = layerName + "_lstm_elt" + id_str;
cldnn::primitive_id crop_id = layerName + "_crop" + id_str;
int seqIdx = isForward ? i : lstm_sequence_len - 1 - i;
const std::string seqIdx_str = std::to_string(seqIdx);
cldnn::tensor crop_tensor{ inputShape.batch[0], 1, inputShape.spatial[0], inputShape.spatial[1] };
cldnn::tensor offset_tensor{ 0, static_cast<cldnn::tensor::value_type>(seqIdx), 0, 0 };
cldnn::primitive_id inputCrop_id = inputCropID + ":" + seqIdx_str;
p.AddPrimitive(cldnn::crop(inputCrop_id, permuteID, crop_tensor, offset_tensor));
p.AddInnerPrimitiveToProfiler(inputCrop_id, op->get_friendly_name(), op);
p.AddPrimitive(cldnn::concatenation(concatID, { inputCrop_id, hiddenStr }, cldnn::concatenation::concatenation_axis::along_x));
p.AddInnerPrimitiveToProfiler(concatID, op->get_friendly_name(), op);
p.AddPrimitive(cldnn::fully_connected(lstm_fc_id, concatID, WRreshapeID, biasID));
p.AddInnerPrimitiveToProfiler(lstm_fc_id, op->get_friendly_name(), op);
p.AddPrimitive(cldnn::reshape(lstm_fc_resh_id, lstm_fc_id, gemmSz));
p.AddPrimitive(cldnn::reorder(lstm_fc_reor_id, lstm_fc_resh_id, gemmLayout));
p.AddPrimitive(cldnn::lstm_elt(lstm_elt_id, lstm_fc_reor_id, cellStr,
clip, 0, activations, activation_params, cldnn::lstm_weights_order::fizo));
p.AddInnerPrimitiveToProfiler(lstm_fc_resh_id, op->get_friendly_name(), op);
p.AddInnerPrimitiveToProfiler(lstm_fc_reor_id, op->get_friendly_name(), op);
p.AddInnerPrimitiveToProfiler(lstm_elt_id, op->get_friendly_name(), op);
hiddenStr = crop_id + ":hidden";
cellStr = crop_id + ":cell";
p.AddPrimitive(cldnn::crop(hiddenStr, lstm_elt_id, hiddenSz, cldnn::tensor{ 0, 0, 0, 0 }));
p.AddInnerPrimitiveToProfiler(hiddenStr, op->get_friendly_name(), op);
output_ids_offsets.push_back(hiddenStr);
if (i < lstm_sequence_len - 1) {
p.AddPrimitive(cldnn::crop(cellStr, lstm_elt_id, hiddenSz, cellCropSz));
p.AddInnerPrimitiveToProfiler(cellStr, op->get_friendly_name(), op);
} else {
// last hidden state crop (output 2)
cldnn::primitive_id outputHiddenID = layerName + ".1";
p.primitiveIDs[hiddenStr] = hiddenStr;
p.primitiveIDs[outputHiddenID] = hiddenStr;
// last cell state crop (output 3)
p.AddPrimitive(cldnn::crop(cellStr, lstm_elt_id, hiddenSz, cellCropSz));
cldnn::primitive_id outputCellID = layerName + ".2";
p.AddInnerPrimitiveToProfiler(cellStr, op->get_friendly_name(), op);
p.primitiveIDs[outputCellID] = cellStr;
}
}
if (!isForward) std::reverse(output_ids_offsets.begin(), output_ids_offsets.end());
// concatenated hidden state (output 1)
cldnn::primitive_id outputConcatID = layerName + ".0";
cldnn::primitive_id concatStr = layerName + ":hiddenConcat";
p.AddPrimitive(cldnn::concatenation(concatStr, output_ids_offsets, cldnn::concatenation::along_f));
p.primitiveIDs[outputConcatID] = concatStr;
p.primitiveIDs[layerName] = concatStr;
p.AddPrimitiveToProfiler(layerName, op);
}
REGISTER_FACTORY_IMPL(v4, LSTMCell);
REGISTER_FACTORY_IMPL(v5, LSTMSequence);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,122 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "ngraph/op/roi_pooling.hpp"
#include "ngraph/op/psroi_pooling.hpp"
#include "ngraph/op/deformable_psroi_pooling.hpp"
#include "api/roi_pooling.hpp"
namespace CLDNNPlugin {
static cldnn::pooling_mode GetPoolingMode(std::string method) {
if (method == "bilinear")
return cldnn::pooling_mode::bilinear;
else if (method == "max")
return cldnn::pooling_mode::max;
else if (method == "average")
return cldnn::pooling_mode::average;
else
return cldnn::pooling_mode::deformable_bilinear;
}
void CreateDeformablePSROIPoolingOp(Program& p, const std::shared_ptr<ngraph::op::v1::DeformablePSROIPooling>& op) {
p.ValidateInputs(op, {2, 3});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
cldnn::pooling_mode mode = GetPoolingMode(op->get_mode());
float trans_std = op->get_trans_std();
int part_size = op->get_part_size();
bool no_trans = op->get_input_size() == 2 ? true : false;
// temporary workaround due to incorrect usage of group_size in the nGraph operation for the DeformablePSROIPooling
int pooled_width = op->get_group_size();
int pooled_height = op->get_group_size();
int group_size = op->get_group_size();
int output_dim = op->get_output_dim();
float spatial_scale = op->get_spatial_scale();
int spatial_bins_x = op->get_spatial_bins_x();
int spatial_bins_y = op->get_spatial_bins_y();
bool position_sensitive = true;
auto psROIPoolingPrim = cldnn::roi_pooling(layerName,
inputPrimitives,
mode,
position_sensitive,
pooled_width,
pooled_height,
spatial_scale,
trans_std,
no_trans,
part_size,
group_size,
output_dim,
spatial_bins_x,
spatial_bins_y);
p.AddPrimitive(psROIPoolingPrim);
}
void CreatePSROIPoolingOp(Program& p, const std::shared_ptr<ngraph::op::v0::PSROIPooling>& op) {
p.ValidateInputs(op, {2});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
cldnn::pooling_mode mode = GetPoolingMode(op->get_mode());
int group_size = op->get_group_size();
int output_dim = op->get_output_dim();
float spatial_scale = op->get_spatial_scale();
int spatial_bins_x = op->get_spatial_bins_x();
int spatial_bins_y = op->get_spatial_bins_y();
bool position_sensitive = true;
auto psROIPoolingPrim = cldnn::roi_pooling(layerName,
inputPrimitives[0], // input data
inputPrimitives[1], // input rois
mode,
position_sensitive,
group_size,
group_size,
spatial_scale,
output_dim,
spatial_bins_x,
spatial_bins_y);
p.AddPrimitive(psROIPoolingPrim);
p.AddPrimitiveToProfiler(op);
}
void CreateROIPoolingOp(Program& p, const std::shared_ptr<ngraph::op::v0::ROIPooling>& op) {
p.ValidateInputs(op, {2});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
// params
auto out_size = op->get_output_size();
int pooled_height = out_size[0];
int pooled_width = out_size[1];
float spatial_scale = op->get_spatial_scale();
bool position_sensitive = false;
cldnn::pooling_mode mode = GetPoolingMode(op->get_method());
auto roiPoolingPrim = cldnn::roi_pooling(layerName,
inputPrimitives[0], // input data
inputPrimitives[1], // input rois
mode,
position_sensitive,
pooled_width,
pooled_height,
spatial_scale);
p.AddPrimitive(roiPoolingPrim);
p.AddPrimitiveToProfiler(op);
}
REGISTER_FACTORY_IMPL(v1, DeformablePSROIPooling);
REGISTER_FACTORY_IMPL(v0, PSROIPooling);
REGISTER_FACTORY_IMPL(v0, ROIPooling);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,68 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "ngraph/op/scatter_update.hpp"
#include "ngraph/op/constant.hpp"
#include "api/scatter_update.hpp"
namespace CLDNNPlugin {
static inline cldnn::scatter_update::scatter_update_axis GetScatterUpdateAxis(int axis, unsigned rank) {
if (axis < 0)
axis += rank;
if (axis < 0 || axis >= rank)
THROW_IE_EXCEPTION << "ScatterUpdate axis is not correspond to number of dimensions";
// Difference in dimension ordering between IE and clDNN,
// reverse spatial dimensions after batch and feature.
unsigned cldnn_axis = axis;
if (axis >= 2) {
auto spatial_axis = axis - 2;
// Default and minimum number of dimensions is 4
auto spatial_size = std::max(rank, 4u) - 2;
cldnn_axis = spatial_size - spatial_axis - 1 + 2;
}
switch (cldnn_axis) {
case 0: return cldnn::scatter_update::scatter_update_axis::along_b;
case 1: return cldnn::scatter_update::scatter_update_axis::along_f;
case 2: return cldnn::scatter_update::scatter_update_axis::along_x;
case 3: return cldnn::scatter_update::scatter_update_axis::along_y;
case 4: return cldnn::scatter_update::scatter_update_axis::along_z;
case 5: return cldnn::scatter_update::scatter_update_axis::along_w;
default: THROW_IE_EXCEPTION << "Unsupported ScatterUpdate axis: " << axis;
}
return cldnn::scatter_update::scatter_update_axis::along_f; // shouldn't get here
}
void CreateScatterUpdateOp(Program& p, const std::shared_ptr<ngraph::op::v3::ScatterUpdate>& op) {
p.ValidateInputs(op, {4});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
size_t rank = op->get_input_shape(0).size();
auto axes_constant = std::dynamic_pointer_cast<ngraph::op::Constant>(op->get_input_node_shared_ptr(3));
if (!axes_constant) {
THROW_IE_EXCEPTION << "Unsupported parameter nodes type in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
}
int32_t axis = axes_constant->cast_vector<int32_t>()[0];
auto primitive = cldnn::scatter_update(layerName,
inputPrimitives[0],
inputPrimitives[1],
inputPrimitives[2],
GetScatterUpdateAxis(axis, rank));
p.AddPrimitive(primitive);
p.AddPrimitiveToProfiler(op);
}
REGISTER_FACTORY_IMPL(v3, ScatterUpdate);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,85 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "ngraph/op/select.hpp"
#include "api/select.hpp"
#include "api/reorder.hpp"
#include "api/reshape.hpp"
namespace CLDNNPlugin {
void CreateSelectOp(Program& p, const std::shared_ptr<ngraph::op::v1::Select>& op) {
p.ValidateInputs(op, {3});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
auto outDims = op->get_output_shape(0);
auto outDimsN = outDims.size();
auto broadcast_type = op->get_auto_broadcast();
if (broadcast_type.m_type != ngraph::op::AutoBroadcastType::NONE &&
broadcast_type.m_type != ngraph::op::AutoBroadcastType::NUMPY) {
THROW_IE_EXCEPTION << "Unsupported broadcast type (" << broadcast_type.m_type << ") in layer " + op->get_friendly_name();
}
if (broadcast_type.m_type == ngraph::op::AutoBroadcastType::NUMPY) {
// Preprocess inputs
for (size_t i = 0; i < inputPrimitives.size(); ++i) {
auto inputDims = op->get_input_shape(i);
auto inputDimsN = inputDims.size();
// Add reorder if changing number of dimensions requires changing format
auto targetFormat = DefaultFormatForDims(outDimsN);
if (targetFormat.value != DefaultFormatForDims(inputDimsN).value) {
auto reorderName = layerName + "_cldnn_in" + std::to_string(i) + "_reorder";
auto targetDatatype = DataTypeFromPrecision(op->get_input_element_type(i));
auto reorderPrim = cldnn::reorder(reorderName, inputPrimitives[i], targetFormat, targetDatatype);
p.AddPrimitive(reorderPrim);
p.AddInnerPrimitiveToProfiler(reorderName, layerName, op);
inputPrimitives[i] = reorderName;
}
// Reshape input if they differ or select specific shape matches default one
if (inputDimsN != outDimsN || inputDimsN < 4) {
auto reshapeName = layerName + "_cldnn_in" + std::to_string(i) + "_reshape";
// Extend input dimensions to the same size as output dimensions by prepending ones
inputDims.insert(inputDims.begin(), outDimsN - inputDimsN, 1ul);
auto targetShape = CldnnTensorFromIEDims(inputDims);
auto reshapePrim = cldnn::reshape(reshapeName, inputPrimitives[i], targetShape);
p.AddPrimitive(reshapePrim);
p.AddInnerPrimitiveToProfiler(reshapeName, layerName, op);
inputPrimitives[i] = reshapeName;
}
}
}
std::string bc_string = broadcast_type.m_type == ngraph::op::AutoBroadcastType::NUMPY ? "numpy" : "none";
auto selectPrim = cldnn::select(layerName,
inputPrimitives[0],
inputPrimitives[1],
inputPrimitives[2],
cldnn::padding(),
bc_string);
p.AddPrimitive(selectPrim);
p.AddPrimitiveToProfiler(op);
}
REGISTER_FACTORY_IMPL(v1, Select);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,47 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "ngraph/op/shuffle_channels.hpp"
#include "api/shuffle_channels.hpp"
namespace CLDNNPlugin {
void CreateShuffleChannelsOp(Program& p, const std::shared_ptr<ngraph::op::v0::ShuffleChannels>& op) {
p.ValidateInputs(op, {1, 2});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
auto in_rank = op->get_input_shape(0).size();
int32_t group = op->get_group();
int32_t axis = op->get_axis();
if (axis < 0)
axis += in_rank;
if (axis < 0 || axis >= in_rank)
THROW_IE_EXCEPTION << "Incorrect axis value! Actual axis is" + std::to_string(group);
if (group < 1)
THROW_IE_EXCEPTION << "Invalid group size value (should equal at least one). Actual block size is" << std::to_string(group);
if (op->get_input_shape(0)[axis] % group != 0)
THROW_IE_EXCEPTION << "Group parameter must evenly divide the channel dimension. Actual group size is " << std::to_string(axis);
auto shuffleChannelsPrim = cldnn::shuffle_channels(layerName,
inputPrimitives[0],
group,
axis);
p.AddPrimitive(shuffleChannelsPrim);
p.AddPrimitiveToProfiler(op);
}
REGISTER_FACTORY_IMPL(v0, ShuffleChannels);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,74 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "ngraph/op/softmax.hpp"
#include "ngraph/op/log_softmax.hpp"
#include "api/softmax.hpp"
#include "api/activation.hpp"
namespace CLDNNPlugin {
static cldnn::softmax::dimension_t GetSoftmaxAxis(int64_t axis, size_t rank) {
switch (axis) {
// FIXME: it seems that axis=0 should correspond to normalize_b;
case 0: return cldnn::softmax::normalize_all;
case 1: return cldnn::softmax::normalize_f;
case 2:
if (rank > 4)
return cldnn::softmax::normalize_z;
else
return cldnn::softmax::normalize_y;
case 3:
if (rank > 4)
return cldnn::softmax::normalize_y;
else
return cldnn::softmax::normalize_x;
case 4:
return cldnn::softmax::normalize_x;
default: THROW_IE_EXCEPTION << "Invalid softmax axis " << axis;
}
return cldnn::softmax::normalize_fyx;
}
void CreateSoftmaxOp(Program& p, const std::shared_ptr<ngraph::op::v1::Softmax>& op) {
p.ValidateInputs(op, {1});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
auto softmaxPrim = cldnn::softmax(layerName,
inputPrimitives[0],
GetSoftmaxAxis(op->get_axis(), op->get_input_shape(0).size()));
p.AddPrimitive(softmaxPrim);
p.AddPrimitiveToProfiler(op);
}
void CreateLogSoftmaxOp(Program& p, const std::shared_ptr<ngraph::op::v5::LogSoftmax>& op) {
p.ValidateInputs(op, {1});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
std::string layerNameSoftmax = layer_type_name_ID(op) + "_softmax";
auto axis = op->get_axis();
if (axis < 0)
axis += op->get_input_shape(0).size();
auto softmaxPrim = cldnn::softmax(layerNameSoftmax,
inputPrimitives[0],
GetSoftmaxAxis(static_cast<size_t>(axis), op->get_input_shape(0).size()));
auto logPrim = cldnn::activation(layerName, layerNameSoftmax, cldnn::activation_func::log);
p.AddPrimitive(softmaxPrim);
p.AddPrimitive(logPrim);
p.AddPrimitiveToProfiler(layerNameSoftmax, op);
p.AddPrimitiveToProfiler(layerName, op);
}
REGISTER_FACTORY_IMPL(v1, Softmax);
REGISTER_FACTORY_IMPL(v5, LogSoftmax);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,53 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "ngraph/op/space_to_batch.hpp"
#include "ngraph/op/constant.hpp"
#include "api/space_to_batch.hpp"
namespace CLDNNPlugin {
void CreateSpaceToBatchOp(Program& p, const std::shared_ptr<ngraph::op::v1::SpaceToBatch>& op) {
p.ValidateInputs(op, {4});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
auto rank = op->get_input_shape(0).size();
auto format = DefaultFormatForDims(rank);
std::vector<cldnn::tensor> inputs;
inputs.reserve(3);
for (size_t i = 1; i < 4; ++i) {
auto inConst = std::dynamic_pointer_cast<ngraph::op::Constant>(op->get_input_node_shared_ptr(i));
if (!inConst)
THROW_IE_EXCEPTION << "Unsupported parameter nodes type in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
std::vector<int32_t> sizes = inConst->cast_vector<int32_t>();
int32_t default_size = i == 1 ? 1 : 0;
for (size_t s = sizes.size(); s < rank; s++) {
sizes.push_back(default_size);
}
inputs.emplace_back(format, sizes, default_size);
}
auto out_size = CldnnTensorFromIEDims(op->get_output_shape(0));
auto batchToSpacePrim = cldnn::space_to_batch(layerName,
inputPrimitives[0], // input
inputs[0], // block_shape
inputs[1], // crops_begin
inputs[2], // crops_end
out_size);
p.AddPrimitive(batchToSpacePrim);
p.AddPrimitiveToProfiler(op);
}
REGISTER_FACTORY_IMPL(v1, SpaceToBatch);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,38 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "ngraph/op/space_to_depth.hpp"
#include "api/space_to_depth.hpp"
namespace CLDNNPlugin {
static cldnn::space_to_depth::depth_mode GetDepthMode(ngraph::op::v0::SpaceToDepth::SpaceToDepthMode mode) {
switch (mode) {
case ngraph::op::v0::SpaceToDepth::SpaceToDepthMode::BLOCKS_FIRST: return cldnn::space_to_depth::blocks_first;
case ngraph::op::v0::SpaceToDepth::SpaceToDepthMode::DEPTH_FIRST: return cldnn::space_to_depth::depth_first;
default: THROW_IE_EXCEPTION << "Unsupported SpaceToDepthMode value: " << static_cast<int>(mode);
}
return cldnn::space_to_depth::blocks_first;
}
void CreateSpaceToDepthOp(Program& p, const std::shared_ptr<ngraph::op::v0::SpaceToDepth>& op) {
p.ValidateInputs(op, {1});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
auto spaceToDepthPrim = cldnn::space_to_depth(layerName,
inputPrimitives[0],
GetDepthMode(op->get_mode()),
op->get_block_size());
p.AddPrimitive(spaceToDepthPrim);
p.AddPrimitiveToProfiler(op);
}
REGISTER_FACTORY_IMPL(v0, SpaceToDepth);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,73 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "ngraph/op/split.hpp"
#include "ngraph/op/variadic_split.hpp"
#include "api/crop.hpp"
namespace CLDNNPlugin {
void CreateCommonSplitOp(Program& p, const std::shared_ptr<ngraph::Node>& op) {
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
auto inputDims = op->get_input_shape(0);
InferenceEngine::SizeVector startOffset(inputDims.size());
bool is_single_out_split = op->get_output_size() == 1;
for (size_t i = 0; i < op->get_output_size(); i++) {
std::string outLayerName = layerName + (is_single_out_split ? "" : "." + std::to_string(i));
const auto outLayerDims = op->get_output_shape(i);
if (outLayerDims.size() != startOffset.size()) {
THROW_IE_EXCEPTION << "Invalid dimesions in split layer: " << op->get_friendly_name()
<< " output: " << op->get_output_tensor_name(i);
}
for (size_t i = 0; i < inputDims.size(); i++) {
if ((outLayerDims[i] + startOffset[i]) > inputDims[i]) {
THROW_IE_EXCEPTION << "Invalid dimesions in split layer: " << op->get_friendly_name()
<< " output: " << op->get_output_tensor_name(i);
}
}
auto outTensor = CldnnTensorFromIEDims(outLayerDims, 1);
auto offsetTensor = CldnnTensorFromIEDims(startOffset, 0);
auto cropPrim = cldnn::crop(outLayerName, inputPrimitives[0], outTensor, offsetTensor);
p.primitivesToIRLayersMap[outLayerName] = { op->get_friendly_name() };
p.primitiveIDs[outLayerName] = outLayerName;
p.AddPrimitive(cropPrim);
p.profilingIDs.push_back(outLayerName);
p.InitProfileInfo(outLayerName, "Crop");
for (size_t i = 0; i < inputDims.size(); i++) {
if (outLayerDims[i] != inputDims[i]) {
startOffset[i] += outLayerDims[i];
}
}
}
// set split as not_run
p.InitProfileInfo(op->get_friendly_name(), op->get_type_name(), false, InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT);
}
void CreateSplitOp(Program& p, const std::shared_ptr<ngraph::op::v1::Split>& op) {
p.ValidateInputs(op, {2});
CreateCommonSplitOp(p, op);
}
void CreateVariadicSplitOp(Program& p, const std::shared_ptr<ngraph::op::v1::VariadicSplit>& op) {
p.ValidateInputs(op, {3});
CreateCommonSplitOp(p, op);
}
REGISTER_FACTORY_IMPL(v1, Split);
REGISTER_FACTORY_IMPL(v1, VariadicSplit);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,276 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "ngraph/op/strided_slice.hpp"
#include "ngraph/op/constant.hpp"
#include "api/strided_slice.hpp"
#include "api/reshape.hpp"
#include "api/crop.hpp"
namespace CLDNNPlugin {
void CreateStridedSliceOp(Program& p, const std::shared_ptr<ngraph::op::v1::StridedSlice>& op) {
p.ValidateInputs(op, {4});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
do {
auto data_output = op->input_value(0);
auto begin_node = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->input_value(1).get_node_shared_ptr());
auto end_node = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->input_value(2).get_node_shared_ptr());
auto stride_node = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->input_value(3).get_node_shared_ptr());
auto partial_input_shape = op->get_input_partial_shape(0);
if (!begin_node || !end_node || !stride_node || partial_input_shape.is_dynamic()) {
break;
}
for (auto& m : op->get_begin_mask()) {
if (m != 0)
break;
}
for (auto& m : op->get_end_mask()) {
if (m != 0)
break;
}
auto input_shape = op->get_input_shape(0);
auto output_shape = op->get_output_shape(0);
auto begin = begin_node->cast_vector<int64_t>();
auto end = end_node->cast_vector<int64_t>();
auto strides = stride_node->cast_vector<int64_t>();
bool ones_stride = true;
for (auto & s : strides) {
if (s != 1)
ones_stride = false;
}
if (!ones_stride)
break;
auto convert_to_set = [](const std::vector<int64_t> mask) {
ngraph::AxisSet axis_set{};
for (size_t i = 0; i < static_cast<size_t>(mask.size()); ++i) {
if (mask[i] == 1) {
axis_set.emplace(i);
}
}
return axis_set;
};
auto shrink_axis_mask = convert_to_set(op->get_shrink_axis_mask());
auto new_axis_mask = convert_to_set(op->get_new_axis_mask());
auto ellipsis_mask = convert_to_set(op->get_ellipsis_mask());
auto begin_mask = convert_to_set(op->get_begin_mask());
auto end_mask = convert_to_set(op->get_end_mask());
std::vector<size_t> reshape_pattern,
axes,
offset,
dim;
size_t input_shape_idx = 0;
uint64_t uniq_id = 0;
for (size_t axis = 0; axis < begin.size(); ++axis) {
// add dimensions hidden under the ellipsis mask if ellipsis mask is set
if (ellipsis_mask.count(axis)) {
// only one bit in ellipsis mask is allowed
int num_new_axis_after_ellipses = 0;
int num_input_axis_before_ellipses = 0;
for (size_t i = 0; i < axis; ++i) {
if (!new_axis_mask.count(i))
num_input_axis_before_ellipses++;
}
for (size_t i = axis + 1; i < begin.size(); ++i) {
if (new_axis_mask.count(i))
num_new_axis_after_ellipses++;
}
// -1 because it's a position of ellipses
unsigned long num_input_axis_after_ellipses = (begin.size() - axis - num_new_axis_after_ellipses - 1);
unsigned long num_of_hidden_dims = input_shape.size() - num_input_axis_after_ellipses
- num_input_axis_before_ellipses;
for (size_t i = 0; i < num_of_hidden_dims; ++i) {
axes.emplace_back(uniq_id);
uniq_id++;
reshape_pattern.emplace_back(input_shape[input_shape_idx]);
offset.emplace_back(0);
dim.emplace_back(input_shape[input_shape_idx]);
input_shape_idx++;
}
} else {
// add new single dimension if new_axis_mask is set
if (new_axis_mask.count(axis)) {
reshape_pattern.emplace_back(1);
dim.emplace_back(1);
offset.emplace_back(0);
} else if (shrink_axis_mask.count(axis)) {
// skip this dimension if shrink_axis_mask is set (input_shape_idx++)
dim.emplace_back(1);
offset.emplace_back(begin_mask.count(axis) ? 0 : begin[axis]);
reshape_pattern.emplace_back(1);
input_shape_idx++;
} else {
// calculate dimension using begin, end, begin_mask, end_mask, stride
reshape_pattern.emplace_back(input_shape[input_shape_idx]);
int64_t lb = begin[axis];
int64_t ub = end[axis];
// convert negative indexes to positive
if (lb < 0)
lb = std::max(static_cast<int64_t>(input_shape[input_shape_idx]) + lb,
static_cast<int64_t>(0));
if (ub < 0)
ub = std::max(static_cast<int64_t>(input_shape[input_shape_idx]) + ub,
static_cast<int64_t>(0));
// apply restrictions when begin or end values more/less than max/min possible values.
lb = std::min(static_cast<int64_t>(input_shape[input_shape_idx]), lb);
ub = std::min(static_cast<int64_t>(input_shape[input_shape_idx]), ub);
offset.emplace_back(lb);
// set default value for stride or use given value
int64_t stride = 1;
if (strides.size() > axis)
stride = strides[axis];
int64_t dimension = 0;
if (stride < 0) {
// apply masks
if (begin_mask.count(axis))
lb = static_cast<int64_t>(input_shape[input_shape_idx]) - 1;
if (end_mask.count(axis))
ub = -1;
lb = std::min(lb, static_cast<int64_t>(input_shape[input_shape_idx]) - 1);
lb -= 1; // we always get 1st element, so we need decrease range
if (ub <= lb)
dimension = (ub - lb) / stride + 1;
} else {
// apply masks
if (begin_mask.count(axis))
lb = 0;
if (end_mask.count(axis))
ub = static_cast<int64_t>(input_shape[input_shape_idx]);
lb += 1; // we always get 1st element, so we need decrease range
if (ub >= lb)
dimension = (ub - lb) / stride + 1;
}
dim.emplace_back(dimension);
input_shape_idx++;
}
axes.emplace_back(uniq_id);
uniq_id++;
}
}
for (; input_shape_idx < input_shape.size(); ++input_shape_idx) {
reshape_pattern.emplace_back(input_shape[input_shape_idx]);
offset.emplace_back(0);
dim.emplace_back(input_shape[input_shape_idx]);
axes.emplace_back(uniq_id);
uniq_id++;
}
if (axes.size() != 4) {
break;
}
auto inPrimitive = inputPrimitives[0];
// Reshape in case of new axis
if (!new_axis_mask.empty()) {
auto targetShape = CldnnTensorFromIEDims(reshape_pattern);
auto reshapeInName = op->get_friendly_name() + "/Reshape_before";
auto reshapePrim = cldnn::reshape(reshapeInName, inputPrimitives[0], targetShape);
p.AddPrimitive(reshapePrim);
p.AddInnerPrimitiveToProfiler(reshapeInName, layerName, op);
inPrimitive = reshapeInName;
}
auto data_node_shape = data_output.get_shape();
std::vector<cldnn::tensor::value_type> offset_tensor{ 0, 0, 0, 0 };
for (size_t i = 0; i < axes.size(); i++) {
if (axes[i] < 0 || axes[i] > 3) {
THROW_IE_EXCEPTION << "Invalid crop axis: " << std::to_string(axes[i]) << " in op " + op->get_friendly_name();
}
offset_tensor[axes[i]] = offset[i];
}
ngraph::Shape crop_shape(reshape_pattern);
for (int i = 0; i < axes.size(); ++i) {
crop_shape[axes[i]] = dim[i];
}
const size_t ods = crop_shape.size();
cldnn::tensor refSize = CldnnTensorFromIEDims(crop_shape);
cldnn::tensor offSize = CldnnTensorFromIEDims(offset, 0);
auto cropPrim = cldnn::crop(layerName, inPrimitive, refSize, offSize);
p.AddPrimitive(cropPrim);
p.AddPrimitiveToProfiler(layerName, op);
// Reshape in case of deleting of axis
if (!shrink_axis_mask.empty()) {
auto targetShape = CldnnTensorFromIEDims(output_shape);
auto reshapeOutName = op->get_friendly_name() + "/Crop";
auto reshapePrim = cldnn::reshape(reshapeOutName, layerName, targetShape);
p.AddPrimitive(reshapePrim);
p.AddInnerPrimitiveToProfiler(reshapeOutName, layerName, op);
}
return;
} while (false);
auto end_mask_ = op->get_end_mask();
auto begin_mask_ = op->get_begin_mask();
auto new_axis_mask_ = op->get_new_axis_mask();
auto shrink_axis_mask_ = op->get_shrink_axis_mask();
std::vector<uint8_t> begin_mask(begin_mask_.begin(), begin_mask_.end());
std::vector<uint8_t> end_mask(end_mask_.begin(), end_mask_.end());
std::vector<uint8_t> new_axis_mask(new_axis_mask_.begin(), new_axis_mask_.end());
std::vector<uint8_t> shrink_axis_mask(shrink_axis_mask_.begin(), shrink_axis_mask_.end());
// Plugin requires inverted mask values. Consider changing primitive impl to be aligned with the spec.
for (auto& b : begin_mask) {
b = 1 - b;
}
for (auto& e : end_mask) {
e = 1 - e;
}
auto out_size = CldnnTensorFromIEDims(op->get_output_shape(0));
auto stridedSlicePrim = cldnn::strided_slice(layerName,
inputPrimitives[0],
inputPrimitives[1],
inputPrimitives[2],
inputPrimitives[3],
begin_mask,
end_mask,
new_axis_mask,
shrink_axis_mask,
out_size);
p.AddPrimitive(stridedSlicePrim);
p.AddPrimitiveToProfiler(op);
}
REGISTER_FACTORY_IMPL(v1, StridedSlice);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,29 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "ngraph/op/tile.hpp"
#include "api/tile.hpp"
namespace CLDNNPlugin {
void CreateTileOp(Program& p, const std::shared_ptr<ngraph::op::v0::Tile>& op) {
p.ValidateInputs(op, {2});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
auto tilePrim = cldnn::tile(layerName,
inputPrimitives[0],
CldnnTensorFromIEDims(op->get_output_shape(0)));
p.AddPrimitive(tilePrim);
p.AddPrimitiveToProfiler(op);
}
REGISTER_FACTORY_IMPL(v0, Tile);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,123 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "cldnn_common_utils.h"
#include "ngraph/op/topk.hpp"
#include "api/arg_max_min.hpp"
#include "api/mutable_data.hpp"
namespace CLDNNPlugin {
static cldnn::arg_max_min::axis_name GetAxis(int32_t axis, size_t in_rank) {
if (in_rank == 5) {
if (-5 <= axis && axis <= -1)
axis += 5;
switch (axis) {
case 0: return cldnn::arg_max_min::axis_name::batch;
case 1: return cldnn::arg_max_min::axis_name::feature;
case 2: return cldnn::arg_max_min::axis_name::z;
case 3: return cldnn::arg_max_min::axis_name::y;
case 4: return cldnn::arg_max_min::axis_name::x;
}
} else {
if (-static_cast<int32_t>(in_rank) <= axis && axis <= -1)
axis += in_rank;
switch (axis) {
case 0: return cldnn::arg_max_min::axis_name::batch;
case 1: return cldnn::arg_max_min::axis_name::feature;
case 2: return cldnn::arg_max_min::axis_name::y;
case 3: return cldnn::arg_max_min::axis_name::x;
}
}
return cldnn::arg_max_min::axis_name::batch;
}
void CreateTopKOp(Program& p, const std::shared_ptr<ngraph::op::v1::TopK>& op) {
p.ValidateInputs(op, {2});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
cldnn::arg_max_min::out_type otype;
cldnn::arg_max_min::sort_type stype;
if (op->get_mode() == ngraph::op::v1::TopK::Mode::MAX)
otype = cldnn::arg_max_min::out_type::max;
else
otype = cldnn::arg_max_min::out_type::min;
if (op->get_sort_type() == ngraph::op::v1::TopK::SortType::SORT_VALUES)
stype = cldnn::arg_max_min::sort_type::sort_by_values;
else
stype = cldnn::arg_max_min::sort_type::sort_by_indices;
uint32_t top_k = op->get_k();
cldnn::arg_max_min::axis_name chosen_axis = GetAxis(static_cast<int32_t>(op->get_axis()),
op->get_input_shape(0).size());
if (op->get_output_size() == 2) {
auto mutable_precision = op->get_output_element_type(1);
if (mutable_precision == ngraph::element::i64) {
mutable_precision = ngraph::element::i32;
}
cldnn::layout mutableLayout = cldnn::layout(DataTypeFromPrecision(mutable_precision),
DefaultFormatForDims(op->get_output_shape(1).size()),
CldnnTensorFromIEDims(op->get_output_shape(1)));
auto shared_memory = cldnn::memory::allocate(p.GetEngine(), mutableLayout);
cldnn::primitive_id argmax_mutable_id_w = layer_type_name_ID(op) + "_md_write";
auto argmax_mutable_prim = cldnn::mutable_data(argmax_mutable_id_w, shared_memory);
p.primitivesToIRLayersMap[argmax_mutable_id_w] = {op->get_friendly_name()};
p.primitiveIDs[argmax_mutable_id_w] = argmax_mutable_id_w;
p.AddPrimitive(argmax_mutable_prim);
inputPrimitives.push_back(argmax_mutable_id_w);
std::string ArgMaxLayerName = layerName + ".0";
auto argmaxPrim = cldnn::arg_max_min(ArgMaxLayerName,
inputPrimitives,
otype,
top_k,
chosen_axis,
stype,
true,
cldnn::padding({0, 0, 0, 0}, 0),
DataTypeFromPrecision(op->get_output_element_type(0)));
p.AddPrimitive(argmaxPrim);
cldnn::primitive_id argmax_mutable_id_r = layerName + ".1";
auto argmax_mutable_prim_r = cldnn::mutable_data(argmax_mutable_id_r, {ArgMaxLayerName}, shared_memory);
p.primitivesToIRLayersMap[argmax_mutable_id_r] = {op->get_friendly_name()};
p.primitiveIDs[argmax_mutable_id_r] = argmax_mutable_id_r;
p.AddPrimitive(argmax_mutable_prim_r);
p.InitProfileInfo(ArgMaxLayerName, layer_type_lower(op));
p.AddPrimitiveToProfiler(ArgMaxLayerName, op);
} else if (op->get_output_size() == 1) {
auto argmaxPrim = cldnn::arg_max_min(layerName,
inputPrimitives,
otype,
top_k,
chosen_axis,
stype,
true,
cldnn::padding({0, 0, 0, 0}, 0),
DataTypeFromPrecision(op->get_output_element_type(0)));
p.AddPrimitive(argmaxPrim);
p.AddPrimitiveToProfiler(op);
} else {
THROW_IE_EXCEPTION << op->get_friendly_name() << " Incorrect TopK outputs number";
}
}
REGISTER_FACTORY_IMPL(v1, TopK);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,80 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "ngraph/op/transpose.hpp"
#include "ngraph/op/constant.hpp"
#include "api/permute.hpp"
namespace CLDNNPlugin {
template<class Type>
std::vector<Type> GetPermuteOrder(const std::vector<Type>& ie_order, Type value_to_align = 0) {
static_assert(std::is_integral<Type>::value, "Integeral required.");
std::vector<Type> cldnn_order = ie_order;
// 1. Align to min. 4 sizes
if (cldnn_order.size() < 4)
cldnn_order.push_back(value_to_align);
// 2. Swap spatial positions
for (int i = 0; i < (cldnn_order.size() - 2) / 2; i++) {
std::swap(cldnn_order[2 + i], cldnn_order[1 + cldnn_order.size() - (2 + i)]);
}
return cldnn_order;
}
void CreateTransposeOp(Program& p, const std::shared_ptr<ngraph::op::v1::Transpose>& op) {
p.ValidateInputs(op, {1, 2});
auto inputPrimitives = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
std::vector<uint16_t> ie_order;
if (op->get_input_size() == 2) {
auto order_constant = std::dynamic_pointer_cast<ngraph::op::Constant>(op->get_input_node_shared_ptr(1));
if (!order_constant) {
THROW_IE_EXCEPTION << "Unsupported parameter nodes type in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
}
ie_order = order_constant->cast_vector<uint16_t>();
}
int rank = std::max(4, static_cast<int>(op->get_input_shape(0).size()));
if (ie_order.empty()) {
// if order size is less than 4 - fill the rest with just copy
for (int o = rank - 1; o >= 0; o--)
ie_order.push_back((uint16_t)o);
}
// if order size is less than 4 - fill the rest with just copy
for (auto o = ie_order.size(); o < rank; o++)
ie_order.push_back((uint16_t)o);
/*
Because of the cldnn ordering: bfxy, and IE ordering: bfyx
we need to adjust the permute order.
*/
std::vector<uint16_t> cldnn_permute_order;
// 1. Switch permute order values for spatial dims
for (auto const& o : ie_order) {
if (o >= 2)
cldnn_permute_order.push_back(1 + ie_order.size() - o);
else
cldnn_permute_order.push_back(o);
}
cldnn_permute_order = GetPermuteOrder(cldnn_permute_order);
auto permutePrim = cldnn::permute(layerName,
inputPrimitives[0],
cldnn_permute_order);
p.AddPrimitive(permutePrim);
p.AddPrimitiveToProfiler(op);
}
REGISTER_FACTORY_IMPL(v1, Transpose);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,312 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "cldnn_program.h"
#include "transformations/utils/utils.hpp"
#include "ngraph/op/tanh.hpp"
#include "ngraph/op/elu.hpp"
#include "ngraph/op/sigmoid.hpp"
#include "ngraph/op/relu.hpp"
#include "ngraph/op/prelu.hpp"
#include "ngraph/op/clamp.hpp"
#include "ngraph/op/exp.hpp"
#include "ngraph/op/not.hpp"
#include "ngraph/op/asin.hpp"
#include "ngraph/op/asinh.hpp"
#include "ngraph/op/acos.hpp"
#include "ngraph/op/acosh.hpp"
#include "ngraph/op/atan.hpp"
#include "ngraph/op/atanh.hpp"
#include "ngraph/op/abs.hpp"
#include "ngraph/op/floor.hpp"
#include "ngraph/op/ceiling.hpp"
#include "ngraph/op/erf.hpp"
#include "ngraph/op/hard_sigmoid.hpp"
#include "ngraph/op/log.hpp"
#include "ngraph/op/negative.hpp"
#include "ngraph/op/selu.hpp"
#include "ngraph/op/softplus.hpp"
#include "ngraph/op/tan.hpp"
#include "ngraph/op/sin.hpp"
#include "ngraph/op/sinh.hpp"
#include "ngraph/op/cos.hpp"
#include "ngraph/op/cosh.hpp"
#include "ngraph/op/swish.hpp"
#include "ngraph/op/hswish.hpp"
#include "ngraph/op/mish.hpp"
#include "ngraph/op/gelu.hpp"
#include "ngraph/op/sign.hpp"
#include "ngraph/op/hsigmoid.hpp"
#include "ngraph/op/round.hpp"
#include "api/activation.hpp"
namespace CLDNNPlugin {
void CreateUnaryEltwiseOp(Program& p, const std::shared_ptr<ngraph::Node>& op,
cldnn::activation_func func, cldnn::activation_additional_params params) {
auto inputs = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
auto activationPrimitive = cldnn::activation(layerName, inputs[0], func, params);
p.AddPrimitive(activationPrimitive);
p.AddPrimitiveToProfiler(op);
}
void CreateTanhOp(Program& p, const std::shared_ptr<ngraph::op::v0::Tanh>& op) {
CreateUnaryEltwiseOp(p, op, cldnn::activation_func::hyperbolic_tan, {});
}
void CreateEluOp(Program& p, const std::shared_ptr<ngraph::op::v0::Elu>& op) {
auto alpha = static_cast<float>(op->get_alpha());
CreateUnaryEltwiseOp(p, op, cldnn::activation_func::elu, {alpha});
}
void CreateSigmoidOp(Program& p, const std::shared_ptr<ngraph::op::v0::Sigmoid>& op) {
CreateUnaryEltwiseOp(p, op, cldnn::activation_func::logistic, {});
}
void CreateReluOp(Program& p, const std::shared_ptr<ngraph::op::v0::Relu>& op) {
CreateUnaryEltwiseOp(p, op, cldnn::activation_func::relu, {});
}
void CreatePReluOp(Program& p, const std::shared_ptr<ngraph::op::v0::PRelu>& op) {
p.ValidateInputs(op, {2});
auto slope_node = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(1));
auto slope_shape = op->get_input_shape(1);
auto out_shape = op->get_output_shape(0);
if (slope_node && ngraph::shape_size(slope_shape) == 1) {
float slope;
if (!ngraph::op::util::get_single_value(slope_node, slope))
THROW_IE_EXCEPTION << "Unsupported parameter size in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
CreateUnaryEltwiseOp(p, op, cldnn::activation_func::relu_negative_slope, {slope});
} else if (out_shape.size() >= 2 && ngraph::shape_size(slope_shape) == out_shape[1]) {
auto inputs = p.GetInputPrimitiveIDs(op);
std::string layerName = layer_type_name_ID(op);
auto activationPrimitive = cldnn::activation(layerName, inputs[0], inputs[1], cldnn::activation_func::relu_negative_slope);
p.AddPrimitive(activationPrimitive);
p.AddPrimitiveToProfiler(op);
}
}
void CreateClampOp(Program& p, const std::shared_ptr<ngraph::op::v0::Clamp>& op) {
float min = static_cast<float>(op->get_min());
float max = static_cast<float>(op->get_max());
CreateUnaryEltwiseOp(p, op, cldnn::activation_func::clamp, {min, max});
}
void CreateExpOp(Program& p, const std::shared_ptr<ngraph::op::v0::Exp>& op) {
CreateUnaryEltwiseOp(p, op, cldnn::activation_func::exp, {});
}
void CreateLogicalNotOp(Program& p, const std::shared_ptr<ngraph::op::v1::LogicalNot>& op) {
CreateUnaryEltwiseOp(p, op, cldnn::activation_func::negation, {});
}
void CreateAsinOp(Program& p, const std::shared_ptr<ngraph::op::v0::Asin>& op) {
CreateUnaryEltwiseOp(p, op, cldnn::activation_func::asin, {});
}
void CreateAsinhOp(Program& p, const std::shared_ptr<ngraph::op::v3::Asinh>& op) {
CreateUnaryEltwiseOp(p, op, cldnn::activation_func::asinh, {});
}
void CreateAcosOp(Program& p, const std::shared_ptr<ngraph::op::v0::Acos>& op) {
CreateUnaryEltwiseOp(p, op, cldnn::activation_func::acos, {});
}
void CreateAcoshOp(Program& p, const std::shared_ptr<ngraph::op::v3::Acosh>& op) {
CreateUnaryEltwiseOp(p, op, cldnn::activation_func::acosh, {});
}
void CreateAtanOp(Program& p, const std::shared_ptr<ngraph::op::v0::Atan>& op) {
CreateUnaryEltwiseOp(p, op, cldnn::activation_func::atan, {});
}
void CreateAtanhOp(Program& p, const std::shared_ptr<ngraph::op::v3::Atanh>& op) {
CreateUnaryEltwiseOp(p, op, cldnn::activation_func::atanh, {});
}
void CreateAbsOp(Program& p, const std::shared_ptr<ngraph::op::v0::Abs>& op) {
CreateUnaryEltwiseOp(p, op, cldnn::activation_func::abs, {});
}
void CreateFloorOp(Program& p, const std::shared_ptr<ngraph::op::v0::Floor>& op) {
CreateUnaryEltwiseOp(p, op, cldnn::activation_func::floor, {});
}
void CreateCeilingOp(Program& p, const std::shared_ptr<ngraph::op::v0::Ceiling>& op) {
CreateUnaryEltwiseOp(p, op, cldnn::activation_func::ceil, {});
}
void CreateSqrtOp(Program& p, const std::shared_ptr<ngraph::op::v0::Sqrt>& op) {
CreateUnaryEltwiseOp(p, op, cldnn::activation_func::sqrt, {});
}
void CreateErfOp(Program& p, const std::shared_ptr<ngraph::op::v0::Erf>& op) {
CreateUnaryEltwiseOp(p, op, cldnn::activation_func::erf, {});
}
void CreateHardSigmoidOp(Program& p, const std::shared_ptr<ngraph::op::v0::HardSigmoid>& op) {
p.ValidateInputs(op, {3});
auto alpha_node = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(1));
auto beta_node = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(2));
if (!alpha_node || !beta_node) {
THROW_IE_EXCEPTION << "Unsupported parameter nodes type in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
}
if (ngraph::shape_size(alpha_node->get_output_shape(0)) == 1 &&
ngraph::shape_size(beta_node->get_output_shape(0)) == 1) {
float alpha, beta;
if (!ngraph::op::util::get_single_value(alpha_node, alpha) || !ngraph::op::util::get_single_value(beta_node, beta)) {
THROW_IE_EXCEPTION << "Unsupported parameter size in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
}
CreateUnaryEltwiseOp(p, op, cldnn::activation_func::hard_sigmoid, {alpha, beta});
}
}
void CreateLogOp(Program& p, const std::shared_ptr<ngraph::op::v0::Log>& op) {
CreateUnaryEltwiseOp(p, op, cldnn::activation_func::log, {});
}
void CreateNegativeOp(Program& p, const std::shared_ptr<ngraph::op::v0::Negative>& op) {
CreateUnaryEltwiseOp(p, op, cldnn::activation_func::negative, {});
}
void CreateSeluOp(Program& p, const std::shared_ptr<ngraph::op::v0::Selu>& op) {
p.ValidateInputs(op, {3});
auto alpha_node = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(1));
auto lambda_node = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(2));
if (!alpha_node || !lambda_node) {
THROW_IE_EXCEPTION << "Unsupported parameter nodes type in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
}
if (ngraph::shape_size(alpha_node->get_output_shape(0)) == 1 &&
ngraph::shape_size(lambda_node->get_output_shape(0)) == 1) {
float alpha, lambda;
if (!ngraph::op::util::get_single_value(alpha_node, alpha) || !ngraph::op::util::get_single_value(lambda_node, lambda)) {
THROW_IE_EXCEPTION << "Unsupported parameter size in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
}
CreateUnaryEltwiseOp(p, op, cldnn::activation_func::selu, {alpha, lambda});
} else {
THROW_IE_EXCEPTION << "Unsupported shapes of parameter nodes in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
}
}
void CreateSoftPlusOp(Program& p, const std::shared_ptr<ngraph::op::v4::SoftPlus>& op) {
CreateUnaryEltwiseOp(p, op, cldnn::activation_func::softplus, {});
}
void CreateTanOp(Program& p, const std::shared_ptr<ngraph::op::v0::Tan>& op) {
CreateUnaryEltwiseOp(p, op, cldnn::activation_func::tan, {});
}
void CreateSinOp(Program& p, const std::shared_ptr<ngraph::op::v0::Sin>& op) {
CreateUnaryEltwiseOp(p, op, cldnn::activation_func::sin, {});
}
void CreateSinhOp(Program& p, const std::shared_ptr<ngraph::op::v0::Sinh>& op) {
CreateUnaryEltwiseOp(p, op, cldnn::activation_func::sinh, {});
}
void CreateCosOp(Program& p, const std::shared_ptr<ngraph::op::v0::Cos>& op) {
CreateUnaryEltwiseOp(p, op, cldnn::activation_func::cos, {});
}
void CreateCoshOp(Program& p, const std::shared_ptr<ngraph::op::v0::Cosh>& op) {
CreateUnaryEltwiseOp(p, op, cldnn::activation_func::cosh, {});
}
void CreateSwishOp(Program& p, const std::shared_ptr<ngraph::op::v4::Swish>& op) {
p.ValidateInputs(op, {1, 2});
if (op->get_input_size() == 2) {
auto beta_node = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(op->get_input_node_shared_ptr(1));
if (beta_node) {
if (ngraph::shape_size(beta_node->get_output_shape(0)) == 1) {
float beta;
if (!ngraph::op::util::get_single_value(beta_node, beta)) {
THROW_IE_EXCEPTION << "Unsupported parameter size in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
}
CreateUnaryEltwiseOp(p, op, cldnn::activation_func::swish, {beta});
} else {
THROW_IE_EXCEPTION << "Unsupported parameter size in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
}
} else {
THROW_IE_EXCEPTION << "Unsupported parameter type in " << op->get_friendly_name() << " (" << op->get_type_name() << ")";
}
} else {
CreateUnaryEltwiseOp(p, op, cldnn::activation_func::swish, {1.0f});
}
}
void CreateHSwishOp(Program& p, const std::shared_ptr<ngraph::op::v4::HSwish>& op) {
CreateUnaryEltwiseOp(p, op, cldnn::activation_func::hswish, {});
}
void CreateMishOp(Program& p, const std::shared_ptr<ngraph::op::v4::Mish>& op) {
CreateUnaryEltwiseOp(p, op, cldnn::activation_func::mish, {});
}
void CreateGeluOp(Program& p, const std::shared_ptr<ngraph::op::v0::Gelu>& op) {
CreateUnaryEltwiseOp(p, op, cldnn::activation_func::gelu, {});
}
void CreateSignOp(Program& p, const std::shared_ptr<ngraph::op::v0::Sign>& op) {
CreateUnaryEltwiseOp(p, op, cldnn::activation_func::sign, {});
}
void CreateHSigmoidOp(Program& p, const std::shared_ptr<ngraph::op::v5::HSigmoid>& op) {
CreateUnaryEltwiseOp(p, op, cldnn::activation_func::hsigmoid, {});
}
void CreateRoundOp(Program& p, const std::shared_ptr<ngraph::op::v5::Round>& op) {
auto func = cldnn::activation_func::none;
switch (op->get_mode()) {
case ngraph::op::v5::Round::RoundMode::HALF_TO_EVEN : func = cldnn::activation_func::round_half_to_even; break;
case ngraph::op::v5::Round::RoundMode::HALF_AWAY_FROM_ZERO : func = cldnn::activation_func::round_half_away_from_zero; break;
default: THROW_IE_EXCEPTION << "Unsupported round mode in " << op->get_friendly_name() << ": " << static_cast<int>(op->get_mode());
}
CreateUnaryEltwiseOp(p, op, func, {});
}
REGISTER_FACTORY_IMPL(v0, Tanh);
REGISTER_FACTORY_IMPL(v0, Elu);
REGISTER_FACTORY_IMPL(v0, Sigmoid);
REGISTER_FACTORY_IMPL(v0, Relu);
REGISTER_FACTORY_IMPL(v0, PRelu);
REGISTER_FACTORY_IMPL(v0, Clamp);
REGISTER_FACTORY_IMPL(v0, Exp);
REGISTER_FACTORY_IMPL(v1, LogicalNot);
REGISTER_FACTORY_IMPL(v0, Asin);
REGISTER_FACTORY_IMPL(v3, Asinh);
REGISTER_FACTORY_IMPL(v0, Acos);
REGISTER_FACTORY_IMPL(v3, Acosh);
REGISTER_FACTORY_IMPL(v0, Atan);
REGISTER_FACTORY_IMPL(v3, Atanh);
REGISTER_FACTORY_IMPL(v0, Abs);
REGISTER_FACTORY_IMPL(v0, Floor);
REGISTER_FACTORY_IMPL(v0, Ceiling);
REGISTER_FACTORY_IMPL(v0, Sqrt);
REGISTER_FACTORY_IMPL(v0, Erf);
REGISTER_FACTORY_IMPL(v0, HardSigmoid);
REGISTER_FACTORY_IMPL(v0, Log);
REGISTER_FACTORY_IMPL(v0, Negative);
REGISTER_FACTORY_IMPL(v0, Selu);
REGISTER_FACTORY_IMPL(v4, SoftPlus);
REGISTER_FACTORY_IMPL(v0, Tan);
REGISTER_FACTORY_IMPL(v0, Sin);
REGISTER_FACTORY_IMPL(v0, Sinh);
REGISTER_FACTORY_IMPL(v0, Cos);
REGISTER_FACTORY_IMPL(v0, Cosh);
REGISTER_FACTORY_IMPL(v4, Swish);
REGISTER_FACTORY_IMPL(v4, HSwish);
REGISTER_FACTORY_IMPL(v4, Mish);
REGISTER_FACTORY_IMPL(v0, Gelu);
REGISTER_FACTORY_IMPL(v0, Sign);
REGISTER_FACTORY_IMPL(v5, HSigmoid);
REGISTER_FACTORY_IMPL(v5, Round);
} // namespace CLDNNPlugin

View File

@ -0,0 +1,59 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <memory>
#include <string>
#include <transformations_visibility.hpp>
#include "ngraph/coordinate_diff.hpp"
#include "ngraph/op/op.hpp"
namespace ngraph {
namespace op {
namespace internal {
class TRANSFORMATIONS_API NonMaxSuppressionIEInternal : public Op {
public:
static constexpr NodeTypeInfo type_info{"NonMaxSuppressionIEInternal", 0};
const NodeTypeInfo& get_type_info() const override { return type_info; }
NonMaxSuppressionIEInternal(const Output<Node>& boxes,
const Output<Node>& scores,
const Output<Node>& max_output_boxes_per_class,
const Output<Node>& iou_threshold,
const Output<Node>& score_threshold,
int center_point_box,
bool sort_result_descending,
const ngraph::element::Type& output_type = ngraph::element::i64);
NonMaxSuppressionIEInternal(const Output<Node>& boxes,
const Output<Node>& scores,
const Output<Node>& max_output_boxes_per_class,
const Output<Node>& iou_threshold,
const Output<Node>& score_threshold,
const Output<Node>& soft_nms_sigma,
int center_point_box,
bool sort_result_descending,
const ngraph::element::Type& output_type = ngraph::element::i64);
void validate_and_infer_types() override;
bool visit_attributes(AttributeVisitor& visitor) override;
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector & new_args) const override;
int m_center_point_box;
bool m_sort_result_descending = true;
element::Type m_output_type;
private:
int64_t max_boxes_output_from_input() const;
};
} // namespace internal
} // namespace op
} // namespace ngraph

View File

@ -0,0 +1,26 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <vector>
#include <utility>
#include <memory>
#include <transformations_visibility.hpp>
#include <ngraph/pass/graph_rewrite.hpp>
namespace ngraph {
namespace pass {
class TRANSFORMATIONS_API ConvertNMSToNMSIEInternal;
} // namespace pass
} // namespace ngraph
class ngraph::pass::ConvertNMSToNMSIEInternal: public ngraph::pass::MatcherPass {
public:
NGRAPH_RTTI_DECLARATION;
ConvertNMSToNMSIEInternal();
};

View File

@ -0,0 +1,106 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <memory>
#include <ngraph/opsets/opset5.hpp>
#include "ngraph_ops/nms_ie_internal.hpp"
using namespace std;
using namespace ngraph;
constexpr NodeTypeInfo op::internal::NonMaxSuppressionIEInternal::type_info;
op::internal::NonMaxSuppressionIEInternal::NonMaxSuppressionIEInternal(const Output<Node>& boxes,
const Output<Node>& scores,
const Output<Node>& max_output_boxes_per_class,
const Output<Node>& iou_threshold,
const Output<Node>& score_threshold,
int center_point_box,
bool sort_result_descending,
const ngraph::element::Type& output_type)
: Op({boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold}),
m_center_point_box(center_point_box), m_sort_result_descending(sort_result_descending), m_output_type(output_type) {
constructor_validate_and_infer_types();
}
op::internal::NonMaxSuppressionIEInternal::NonMaxSuppressionIEInternal(const Output<Node>& boxes,
const Output<Node>& scores,
const Output<Node>& max_output_boxes_per_class,
const Output<Node>& iou_threshold,
const Output<Node>& score_threshold,
const Output<Node>& soft_nms_sigma,
int center_point_box,
bool sort_result_descending,
const ngraph::element::Type& output_type)
: Op({boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold, soft_nms_sigma}),
m_center_point_box(center_point_box), m_sort_result_descending(sort_result_descending), m_output_type(output_type) {
constructor_validate_and_infer_types();
}
std::shared_ptr<Node> op::internal::NonMaxSuppressionIEInternal::clone_with_new_inputs(const ngraph::OutputVector &new_args) const {
if (new_args.size() == 6) {
return make_shared<NonMaxSuppressionIEInternal>(new_args.at(0), new_args.at(1), new_args.at(2), new_args.at(3),
new_args.at(4), new_args.at(5), m_center_point_box, m_sort_result_descending,
m_output_type);
} else if (new_args.size() == 5) {
return make_shared<NonMaxSuppressionIEInternal>(new_args.at(0), new_args.at(1), new_args.at(2), new_args.at(3),
new_args.at(4), m_center_point_box, m_sort_result_descending,
m_output_type);
}
throw ngraph::ngraph_error("Unsupported number of inputs: " + std::to_string(new_args.size()));
}
bool op::internal::NonMaxSuppressionIEInternal::visit_attributes(AttributeVisitor& visitor) {
visitor.on_attribute("center_point_box", m_center_point_box);
visitor.on_attribute("sort_result_descending", m_sort_result_descending);
visitor.on_attribute("output_type", m_output_type);
return true;
}
static constexpr size_t boxes_port = 0;
static constexpr size_t scores_port = 1;
static constexpr size_t max_output_boxes_per_class_port = 2;
int64_t op::internal::NonMaxSuppressionIEInternal::max_boxes_output_from_input() const {
int64_t max_output_boxes{0};
size_t num_of_inputs = inputs().size();
if (num_of_inputs < 3) {
return 0;
}
const auto max_output_boxes_input =
as_type_ptr<op::Constant>(input_value(max_output_boxes_per_class_port).get_node_shared_ptr());
max_output_boxes = max_output_boxes_input->cast_vector<int64_t>().at(0);
return max_output_boxes;
}
void op::internal::NonMaxSuppressionIEInternal::validate_and_infer_types() {
const auto boxes_ps = get_input_partial_shape(boxes_port);
const auto scores_ps = get_input_partial_shape(scores_port);
// NonMaxSuppression produces triplets
// that have the following format: [batch_index, class_index, box_index]
PartialShape out_shape = {Dimension::dynamic(), 3};
if (boxes_ps.rank().is_static() && scores_ps.rank().is_static()) {
const auto num_boxes_boxes = boxes_ps[1];
const auto max_output_boxes_per_class_node = input_value(max_output_boxes_per_class_port).get_node_shared_ptr();
if (num_boxes_boxes.is_static() && scores_ps[0].is_static() && scores_ps[1].is_static() &&
op::is_constant(max_output_boxes_per_class_node)) {
const auto num_boxes = num_boxes_boxes.get_length();
const auto num_classes = scores_ps[1].get_length();
const auto max_output_boxes_per_class = max_boxes_output_from_input();
out_shape[0] = std::min(num_boxes, max_output_boxes_per_class) * num_classes *
scores_ps[0].get_length();
}
}
set_output_type(0, m_output_type, out_shape);
set_output_type(1, element::f32, out_shape);
set_output_type(2, m_output_type, Shape{1});
}

View File

@ -0,0 +1,123 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <memory>
#include <vector>
#include <ngraph/opsets/opset1.hpp>
#include <ngraph/opsets/opset5.hpp>
#include <ngraph/rt_info.hpp>
#include <ngraph/pattern/op/wrap_type.hpp>
#include "ngraph_ops/nms_ie_internal.hpp"
#include "transformations/op_conversions/convert_nms_to_nms_ie_internal.hpp"
NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertNMSToNMSIEInternal, "ConvertNMSToNMSIEInternal", 0);
ngraph::pass::ConvertNMSToNMSIEInternal::ConvertNMSToNMSIEInternal() {
auto nms = ngraph::pattern::wrap_type<ngraph::opset5::NonMaxSuppression>();
ngraph::matcher_pass_callback callback = [](pattern::Matcher &m) {
auto nms_5 = std::dynamic_pointer_cast<ngraph::opset5::NonMaxSuppression>(m.get_match_root());
if (!nms_5) {
return false;
}
const auto new_args = nms_5->input_values();
const std::size_t num_of_inputs = new_args.size();
const auto& arg2 = num_of_inputs > 2 ? new_args.at(2) : ngraph::opset5::Constant::create(element::i32, Shape{}, {0});
const auto& arg3 = num_of_inputs > 3 ? new_args.at(3) : ngraph::opset5::Constant::create(element::f32, Shape{}, {.0f});
const auto& arg4 = num_of_inputs > 4 ? new_args.at(4) : ngraph::opset5::Constant::create(element::f32, Shape{}, {.0f});
// vector of new nGraph operations
NodeVector new_ops;
auto one_dim_shape = Shape{1};
Output<Node> new_max_per_class;
Output<Node> new_iou_threshold;
Output<Node> new_score_threshold;
Output<Node> new_soft_nms_sigma;
Output<Node> new_shape_for_max_per_class = opset1::Constant::create(ngraph::element::i64, Shape{1}, {1});
Output<Node> new_shape_for_iou_threshold = opset1::Constant::create(ngraph::element::i64, Shape{1}, {1});
Output<Node> new_shape_for_score_threshold = opset1::Constant::create(ngraph::element::i64, Shape{1}, {1});
Output<Node> new_shape_for_soft_nms_sigma = opset1::Constant::create(ngraph::element::i64, Shape{1}, {1});
new_max_per_class = std::make_shared<opset1::Reshape>(arg2, new_shape_for_max_per_class, true);
new_ops.emplace_back(new_max_per_class.get_node_shared_ptr());
new_iou_threshold = std::make_shared<opset1::Reshape>(arg3, new_shape_for_iou_threshold, true);
new_ops.emplace_back(new_iou_threshold.get_node_shared_ptr());
new_score_threshold = std::make_shared<opset1::Reshape>(arg4, new_shape_for_score_threshold, true);
new_ops.emplace_back(new_score_threshold.get_node_shared_ptr());
int center_point_box = 0;
switch (nms_5->get_box_encoding()) {
case ::ngraph::opset5::NonMaxSuppression::BoxEncodingType::CENTER:
center_point_box = 1;
break;
case ::ngraph::opset5::NonMaxSuppression::BoxEncodingType::CORNER:
center_point_box = 0;
break;
default:
throw ngraph_error("NonMaxSuppression layer " + nms_5->get_friendly_name() +
" has unsupported box encoding");
}
std::shared_ptr<op::internal::NonMaxSuppressionIEInternal> nms_legacy{nullptr};
if (num_of_inputs > 5 && nms_5->soft_nms_sigma_from_input() != 0.0f) {
new_soft_nms_sigma = std::make_shared<opset1::Reshape>(new_args.at(5), new_shape_for_soft_nms_sigma, true);
new_ops.emplace_back(new_soft_nms_sigma.get_node_shared_ptr());
nms_legacy = std::make_shared<op::internal::NonMaxSuppressionIEInternal>(
new_args.at(0),
new_args.at(1),
new_max_per_class,
new_iou_threshold,
new_score_threshold,
new_soft_nms_sigma,
center_point_box,
nms_5->get_sort_result_descending(),
element::i32);
new_ops.push_back(nms_legacy);
} else {
nms_legacy = std::make_shared<op::internal::NonMaxSuppressionIEInternal>(
new_args.at(0),
new_args.at(1),
new_max_per_class,
new_iou_threshold,
new_score_threshold,
center_point_box,
nms_5->get_sort_result_descending(),
element::i32);
new_ops.push_back(nms_legacy);
}
Output<Node> output_0 = nms_legacy->output(0);
if (nms_5->output(0).get_element_type() != output_0.get_element_type()) {
output_0 = std::make_shared<opset1::Convert>(output_0, nms_5->output(0).get_element_type());
output_0.get_node_shared_ptr()->set_friendly_name(nms_5->get_friendly_name() + "/convert.0");
new_ops.emplace_back(output_0.get_node_shared_ptr());
}
Output<Node> output_2 = nms_legacy->output(2);
if (nms_5->output(2).get_element_type() != output_2.get_element_type()) {
output_2 = std::make_shared<opset1::Convert>(output_2, nms_5->output(2).get_element_type());
output_2.get_node_shared_ptr()->set_friendly_name(nms_5->get_friendly_name() + "/convert.2");
new_ops.emplace_back(output_2.get_node_shared_ptr());
}
nms_legacy->set_friendly_name(nms_5->get_friendly_name());
ngraph::copy_runtime_info(nms_5, new_ops);
ngraph::replace_node(nms_5, {output_0, nms_legacy->output(1), output_2});
return true;
};
auto m = std::make_shared<ngraph::pattern::Matcher>(nms, "ConvertNMSToNMSIEInternal");
this->register_matcher(m, callback);
}

View File

@ -0,0 +1,192 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <gtest/gtest.h>
#include <string>
#include <memory>
#include <queue>
#include <ngraph/function.hpp>
#include <ngraph/opsets/opset1.hpp>
#include <ngraph/opsets/opset3.hpp>
#include <ngraph/opsets/opset5.hpp>
#include <transformations/op_conversions/convert_nms_to_nms_ie_internal.hpp>
#include <transformations/op_conversions/convert_previous_nms_to_nms_5.hpp>
#include <transformations/init_node_info.hpp>
#include <transformations/utils/utils.hpp>
#include <ngraph_ops/nms_ie_internal.hpp>
#include <ngraph/pass/constant_folding.hpp>
#include <ngraph/pass/manager.hpp>
#include "common_test_utils/ngraph_test_utils.hpp"
using namespace testing;
using namespace ngraph;
TEST(TransformationTests, ConvertNMS1ToNMSIEInternal) {
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
{
auto boxes = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 1000, 4});
auto scores = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 1, 1000});
auto max_output_boxes_per_class = opset1::Constant::create(element::i64, Shape{}, {10});
auto iou_threshold = opset1::Constant::create(element::f32, Shape{}, {0.75});
auto score_threshold = opset1::Constant::create(element::f32, Shape{}, {0.7});
auto nms = std::make_shared<opset1::NonMaxSuppression>(boxes, scores, max_output_boxes_per_class,
iou_threshold, score_threshold, op::v1::NonMaxSuppression::BoxEncodingType::CORNER, true);
f = std::make_shared<Function>(NodeVector{nms}, ParameterVector{boxes, scores});
const auto & orig_shape = f->get_output_partial_shape(0);
ngraph::pass::Manager manager;
manager.register_pass<ngraph::pass::InitNodeInfo>();
manager.register_pass<ngraph::pass::ConvertNMS1ToNMS5>();
manager.register_pass<ngraph::pass::ConvertNMSToNMSIEInternal>();
manager.register_pass<ngraph::pass::ConstantFolding>();
manager.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
ASSERT_TRUE(f->get_output_partial_shape(0).is_static()) << "Shape " << f->get_output_partial_shape(0) << " should be static";
}
{
auto boxes = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 1000, 4});
auto scores = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 1, 1000});
auto max_output_boxes_per_class = opset1::Constant::create(element::i64, Shape{1}, {10});
auto iou_threshold = opset1::Constant::create(element::f32, Shape{1}, {0.75});
auto score_threshold = opset1::Constant::create(element::f32, Shape{1}, {0.7});
auto nms = std::make_shared<op::internal::NonMaxSuppressionIEInternal>(boxes, scores, max_output_boxes_per_class,
iou_threshold, score_threshold, 0, true, element::i32);
auto convert = std::make_shared<opset1::Convert>(nms->output(0), element::i64);
f_ref = std::make_shared<Function>(NodeVector{convert}, ParameterVector{boxes, scores});
ASSERT_TRUE(f_ref->get_output_partial_shape(0).is_static()) << "Shape " << f_ref->get_output_partial_shape(0) << " should be static";
}
auto res = compare_functions(f, f_ref);
ASSERT_TRUE(res.first) << res.second;
}
TEST(TransformationTests, ConvertNMS3ToNMSIEInternal) {
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
{
auto boxes = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 1000, 4});
auto scores = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 1, 1000});
auto max_output_boxes_per_class = opset1::Constant::create(element::i32, Shape{}, {10});
auto iou_threshold = opset1::Constant::create(element::f32, Shape{}, {0.75});
auto score_threshold = opset1::Constant::create(element::f32, Shape{}, {0.7});
auto nms = std::make_shared<opset3::NonMaxSuppression>(boxes, scores, max_output_boxes_per_class,
iou_threshold, score_threshold, opset3::NonMaxSuppression::BoxEncodingType::CORNER, true, element::i32);
f = std::make_shared<Function>(NodeVector{nms}, ParameterVector{boxes, scores});
const auto & orig_shape = f->get_output_partial_shape(0);
ngraph::pass::Manager manager;
manager.register_pass<ngraph::pass::InitNodeInfo>();
manager.register_pass<ngraph::pass::ConvertNMS3ToNMS5>();
manager.register_pass<ngraph::pass::ConvertNMSToNMSIEInternal>();
manager.register_pass<ngraph::pass::ConstantFolding>();
manager.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
ASSERT_TRUE(f->get_output_partial_shape(0).is_static()) << "Shape " << f->get_output_partial_shape(0) << " should be static";
}
{
auto boxes = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 1000, 4});
auto scores = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 1, 1000});
auto max_output_boxes_per_class = opset1::Constant::create(element::i32, Shape{1}, {10});
auto iou_threshold = opset1::Constant::create(element::f32, Shape{1}, {0.75});
auto score_threshold = opset1::Constant::create(element::f32, Shape{1}, {0.7});
auto nms = std::make_shared<op::internal::NonMaxSuppressionIEInternal>(boxes, scores, max_output_boxes_per_class,
iou_threshold, score_threshold, 0, true, element::i32);
f_ref = std::make_shared<Function>(NodeVector{nms}, ParameterVector{boxes, scores});
ASSERT_TRUE(f_ref->get_output_partial_shape(0).is_static()) << "Shape " << f_ref->get_output_partial_shape(0) << " should be static";
}
auto res = compare_functions(f, f_ref);
ASSERT_TRUE(res.first) << res.second;
}
TEST(TransformationTests, ConvertNMS4ToNMSIEInternal) {
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
{
auto boxes = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 1000, 4});
auto scores = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 1, 1000});
auto max_output_boxes_per_class = opset1::Constant::create(element::i32, Shape{}, {10});
auto iou_threshold = opset1::Constant::create(element::f32, Shape{}, {0.75});
auto score_threshold = opset1::Constant::create(element::f32, Shape{}, {0.7});
auto nms = std::make_shared<opset4::NonMaxSuppression>(boxes, scores, max_output_boxes_per_class,
iou_threshold, score_threshold, opset4::NonMaxSuppression::BoxEncodingType::CORNER, true, element::i32);
f = std::make_shared<Function>(NodeVector{nms}, ParameterVector{boxes, scores});
const auto & orig_shape = f->get_output_partial_shape(0);
ngraph::pass::Manager manager;
manager.register_pass<ngraph::pass::InitNodeInfo>();
manager.register_pass<ngraph::pass::ConvertNMS4ToNMS5>();
manager.register_pass<ngraph::pass::ConvertNMSToNMSIEInternal>();
manager.register_pass<ngraph::pass::ConstantFolding>();
manager.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
ASSERT_TRUE(f->get_output_partial_shape(0).is_static()) << "Shape " << f->get_output_partial_shape(0) << " should be static";
}
{
auto boxes = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 1000, 4});
auto scores = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 1, 1000});
auto max_output_boxes_per_class = opset1::Constant::create(element::i32, Shape{1}, {10});
auto iou_threshold = opset1::Constant::create(element::f32, Shape{1}, {0.75});
auto score_threshold = opset1::Constant::create(element::f32, Shape{1}, {0.7});
auto nms = std::make_shared<op::internal::NonMaxSuppressionIEInternal>(boxes, scores, max_output_boxes_per_class,
iou_threshold, score_threshold, 0, true, element::i32);
f_ref = std::make_shared<Function>(NodeVector{nms}, ParameterVector{boxes, scores});
ASSERT_TRUE(f_ref->get_output_partial_shape(0).is_static()) << "Shape " << f_ref->get_output_partial_shape(0) << " should be static";
}
auto res = compare_functions(f, f_ref);
ASSERT_TRUE(res.first) << res.second;
}
TEST(TransformationTests, ConvertNMS5ToNMSIEInternal) {
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
{
auto boxes = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 1000, 4});
auto scores = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 1, 1000});
auto max_output_boxes_per_class = opset1::Constant::create(element::i32, Shape{}, {10});
auto iou_threshold = opset1::Constant::create(element::f32, Shape{}, {0.75});
auto score_threshold = opset1::Constant::create(element::f32, Shape{}, {0.7});
auto soft_nms_sigma = opset1::Constant::create(element::f32, Shape{}, {0.5});
auto nms = std::make_shared<opset5::NonMaxSuppression>(boxes, scores, max_output_boxes_per_class,
iou_threshold, score_threshold, soft_nms_sigma, opset5::NonMaxSuppression::BoxEncodingType::CORNER, true, element::i32);
f = std::make_shared<Function>(NodeVector{nms}, ParameterVector{boxes, scores});
const auto & orig_shape = f->get_output_partial_shape(0);
ngraph::pass::Manager manager;
manager.register_pass<ngraph::pass::InitNodeInfo>();
manager.register_pass<ngraph::pass::ConvertNMSToNMSIEInternal>();
manager.register_pass<ngraph::pass::ConstantFolding>();
manager.run_passes(f);
ASSERT_NO_THROW(check_rt_info(f));
ASSERT_TRUE(f->get_output_partial_shape(0).is_static()) << "Shape " << f->get_output_partial_shape(0) << " should be static";
}
{
auto boxes = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 1000, 4});
auto scores = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 1, 1000});
auto max_output_boxes_per_class = opset1::Constant::create(element::i32, Shape{1}, {10});
auto iou_threshold = opset1::Constant::create(element::f32, Shape{1}, {0.75});
auto score_threshold = opset1::Constant::create(element::f32, Shape{1}, {0.7});
auto soft_nms_sigma = opset1::Constant::create(element::f32, Shape{1}, {0.5});
auto nms = std::make_shared<op::internal::NonMaxSuppressionIEInternal>(boxes, scores, max_output_boxes_per_class,
iou_threshold, score_threshold, soft_nms_sigma, 0, true, element::i32);
f_ref = std::make_shared<Function>(NodeVector{nms}, ParameterVector{boxes, scores});
ASSERT_TRUE(f_ref->get_output_partial_shape(0).is_static()) << "Shape " << f_ref->get_output_partial_shape(0) << " should be static";
}
auto res = compare_functions(f, f_ref);
ASSERT_TRUE(res.first) << res.second;
}

View File

@ -23,15 +23,7 @@ TEST_P(CoreThreadingTestsWithIterations, smoke_LoadNetwork_RemoteContext) {
InferenceEngine::Core ie;
std::atomic<unsigned int> counter{0u};
const FuncTestUtils::TestModel::TestModel models[] = {
FuncTestUtils::TestModel::convReluNormPoolFcModelFP32,
FuncTestUtils::TestModel::convReluNormPoolFcModelFP16
};
std::vector<InferenceEngine::CNNNetwork> networks;
for (auto & model : models) {
networks.emplace_back(ie.ReadNetwork(model.model_xml_str, model.weights_blob));
}
networks.emplace_back(InferenceEngine::CNNNetwork(ngraph::builder::subgraph::make2InputSubtract()));
networks.emplace_back(InferenceEngine::CNNNetwork(ngraph::builder::subgraph::makeMultiSingleConv()));
networks.emplace_back(InferenceEngine::CNNNetwork(ngraph::builder::subgraph::makeSingleConv()));

View File

@ -9,7 +9,13 @@
using namespace LayerTestsDefinitions;
using namespace ngraph::helpers;
namespace {
// Common params
const std::vector<InferenceEngine::Precision> inputPrecisions = {
InferenceEngine::Precision::FP32,
InferenceEngine::Precision::FP16,
InferenceEngine::Precision::I16,
InferenceEngine::Precision::U8
};
const std::vector<InferenceEngine::Precision> netPrecisions = {
InferenceEngine::Precision::FP32,
@ -46,15 +52,26 @@ const std::map<ActivationTypes, std::vector<std::vector<float>>> activationTypes
{HSwish, {}},
{SoftPlus, {}},
{HSigmoid, {}},
{Swish, {{0.5f}}},
{RoundHalfToEven, {}},
{RoundHalfAwayFromZero, {}}
};
const std::map<ActivationTypes, std::vector<std::vector<float>>> activationParamTypes = {
{PReLu, {{-0.01f}}},
{LeakyRelu, {{0.01f}}}
};
std::map<std::vector<size_t>, std::vector<std::vector<size_t>>> basic = {
{{1, 50}, {{}}},
{{1, 128}, {{}}},
};
std::map<std::vector<size_t>, std::vector<std::vector<size_t>>> preluBasic = {
{{1, 50}, {{1}, {50}}},
{{1, 128}, {{1}, {128}}},
};
const auto basicCases = ::testing::Combine(
::testing::ValuesIn(CommonTestUtils::combineParams(activationTypes)),
::testing::ValuesIn(netPrecisions),
@ -66,6 +83,21 @@ const auto basicCases = ::testing::Combine(
::testing::Values(CommonTestUtils::DEVICE_GPU)
);
const auto basicPreluCases = ::testing::Combine(
::testing::ValuesIn(CommonTestUtils::combineParams(activationParamTypes)),
::testing::ValuesIn(netPrecisions),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::ValuesIn(CommonTestUtils::combineParams(preluBasic)),
::testing::Values(CommonTestUtils::DEVICE_GPU)
);
INSTANTIATE_TEST_CASE_P(smoke_Activation_Basic, ActivationLayerTest, basicCases, ActivationLayerTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_Activation_Basic_Prelu, ActivationLayerTest, basicPreluCases, ActivationLayerTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_Activation_Basic, ActivationParamLayerTest, basicPreluCases, ActivationLayerTest::getTestCaseName);
} // namespace

View File

@ -0,0 +1,174 @@
// Copyright (C) 2019 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include "single_layer_tests/broadcast.hpp"
#include "common_test_utils/test_constants.hpp"
using namespace LayerTestsDefinitions;
namespace {
const std::vector<InferenceEngine::Precision> inputPrecisions = {
InferenceEngine::Precision::FP32,
InferenceEngine::Precision::I32,
InferenceEngine::Precision::BOOL
};
// NUMPY MODE
std::vector<std::vector<size_t>> inShapesNumpy = {
{3, 1},
{1, 4, 1}
};
std::vector<std::vector<size_t>> targetShapesNumpy = {
{2, 3, 6},
{1, 4, 4}
};
const auto numpyBroadcastParams1 = ::testing::Combine(
::testing::Values(targetShapesNumpy[0]),
::testing::Values(ngraph::AxisSet{}), //not used in numpy mode
::testing::Values(ngraph::op::BroadcastType::NUMPY),
::testing::Values(inShapesNumpy[0]),
::testing::ValuesIn(inputPrecisions),
::testing::Values(CommonTestUtils::DEVICE_GPU)
);
INSTANTIATE_TEST_CASE_P(
smoke_TestNumpyBroadcast1,
BroadcastLayerTest,
numpyBroadcastParams1,
BroadcastLayerTest::getTestCaseName
);
const auto numpyBroadcastParams2 = ::testing::Combine(
::testing::Values(targetShapesNumpy[1]),
::testing::Values(ngraph::AxisSet{}), //not used in numpy mode
::testing::Values(ngraph::op::BroadcastType::NUMPY),
::testing::Values(inShapesNumpy[1]),
::testing::ValuesIn(inputPrecisions),
::testing::Values(CommonTestUtils::DEVICE_GPU)
);
INSTANTIATE_TEST_CASE_P(
smoke_TestNumpyBroadcast2,
BroadcastLayerTest,
numpyBroadcastParams2,
BroadcastLayerTest::getTestCaseName
);
// BIDIRECTIONAL MODE
std::vector<std::vector<size_t>> inShapesBidi = {
{4, 1},
{1, 4, 1},
{4, 1, 1}
};
std::vector<std::vector<size_t>> targetShapesBidi = {
{2, 1, 4},
{1, 4, 4},
{1, 1, 2, 2}
};
const auto bidirectionalBroadcastParams1 = ::testing::Combine(
::testing::Values(targetShapesBidi[0]),
::testing::Values(ngraph::AxisSet{}), //not used in bidirectional mode
::testing::Values(ngraph::op::BroadcastType::BIDIRECTIONAL),
::testing::Values(inShapesBidi[0]),
::testing::ValuesIn(inputPrecisions),
::testing::Values(CommonTestUtils::DEVICE_GPU)
);
INSTANTIATE_TEST_CASE_P(
smoke_TestBidirectionalBroadcast1,
BroadcastLayerTest,
bidirectionalBroadcastParams1,
BroadcastLayerTest::getTestCaseName
);
const auto bidirectionalBroadcastParams2 = ::testing::Combine(
::testing::Values(targetShapesBidi[1]),
::testing::Values(ngraph::AxisSet{}), //not used in bidirectional mode
::testing::Values(ngraph::op::BroadcastType::BIDIRECTIONAL),
::testing::Values(inShapesBidi[1]),
::testing::ValuesIn(inputPrecisions),
::testing::Values(CommonTestUtils::DEVICE_GPU)
);
INSTANTIATE_TEST_CASE_P(
smoke_TestBidirectionalBroadcast2,
BroadcastLayerTest,
bidirectionalBroadcastParams2,
BroadcastLayerTest::getTestCaseName
);
const auto bidirectionalBroadcastParams3 = ::testing::Combine(
::testing::Values(targetShapesBidi[2]),
::testing::Values(ngraph::AxisSet{}), //not used in bidirectional mode
::testing::Values(ngraph::op::BroadcastType::BIDIRECTIONAL),
::testing::Values(inShapesBidi[2]),
::testing::ValuesIn(inputPrecisions),
::testing::Values(CommonTestUtils::DEVICE_GPU)
);
INSTANTIATE_TEST_CASE_P(
smoke_TestBidirectionalBroadcast3,
BroadcastLayerTest,
bidirectionalBroadcastParams3,
BroadcastLayerTest::getTestCaseName
);
// EXPLICIT MODE
std::vector<std::vector<size_t>> inShapesExplicit = {
{3, 1},
{2, 4}
};
std::vector<std::vector<size_t>> targetShapesExplicit = {
{2, 3, 1},
{2, 3, 4}
};
std::vector<ngraph::AxisSet> axes = {
{1, 2},
{0, 2}
};
const auto explicitBroadcastParams1 = ::testing::Combine(
::testing::Values(targetShapesExplicit[0]),
::testing::Values(axes[0]),
::testing::Values(ngraph::op::BroadcastType::EXPLICIT),
::testing::Values(inShapesExplicit[0]),
::testing::ValuesIn(inputPrecisions),
::testing::Values(CommonTestUtils::DEVICE_GPU)
);
INSTANTIATE_TEST_CASE_P(
smoke_TestExplicitBroadcast1,
BroadcastLayerTest,
explicitBroadcastParams1,
BroadcastLayerTest::getTestCaseName
);
const auto explicitBroadcastParams2 = ::testing::Combine(
::testing::Values(targetShapesExplicit[1]),
::testing::Values(axes[1]),
::testing::Values(ngraph::op::BroadcastType::EXPLICIT),
::testing::Values(inShapesExplicit[1]),
::testing::ValuesIn(inputPrecisions),
::testing::Values(CommonTestUtils::DEVICE_GPU)
);
INSTANTIATE_TEST_CASE_P(
smoke_TestExplicitBroadcast2,
BroadcastLayerTest,
explicitBroadcastParams2,
BroadcastLayerTest::getTestCaseName
);
} // namespace

View File

@ -0,0 +1,85 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "single_layer_tests/detection_output.hpp"
using namespace LayerTestsDefinitions;
namespace {
const int numClasses = 11;
const int backgroundLabelId = 0;
const std::vector<int> topK = {75};
const std::vector<std::vector<int>> keepTopK = { {50}, {100} };
const std::vector<std::string> codeType = {"caffe.PriorBoxParameter.CORNER", "caffe.PriorBoxParameter.CENTER_SIZE"};
const float nmsThreshold = 0.5f;
const float confidenceThreshold = 0.3f;
const std::vector<bool> clipAfterNms = {true, false};
const std::vector<bool> clipBeforeNms = {true, false};
const std::vector<bool> decreaseLabelId = {true, false};
const float objectnessScore = 0.4f;
const std::vector<size_t> numberBatch = {1, 2};
const auto commonAttributes = ::testing::Combine(
::testing::Values(numClasses),
::testing::Values(backgroundLabelId),
::testing::ValuesIn(topK),
::testing::ValuesIn(keepTopK),
::testing::ValuesIn(codeType),
::testing::Values(nmsThreshold),
::testing::Values(confidenceThreshold),
::testing::ValuesIn(clipAfterNms),
::testing::ValuesIn(clipBeforeNms),
::testing::ValuesIn(decreaseLabelId)
);
/* =============== 3 inputs cases =============== */
const std::vector<ParamsWhichSizeDepends> specificParams3In = {
ParamsWhichSizeDepends{true, true, true, 1, 1, {1, 60}, {1, 165}, {1, 1, 60}, {}, {}},
ParamsWhichSizeDepends{true, false, true, 1, 1, {1, 660}, {1, 165}, {1, 1, 60}, {}, {}},
ParamsWhichSizeDepends{false, true, true, 1, 1, {1, 60}, {1, 165}, {1, 2, 60}, {}, {}},
ParamsWhichSizeDepends{false, false, true, 1, 1, {1, 660}, {1, 165}, {1, 2, 60}, {}, {}},
ParamsWhichSizeDepends{true, true, false, 10, 10, {1, 60}, {1, 165}, {1, 1, 75}, {}, {}},
ParamsWhichSizeDepends{true, false, false, 10, 10, {1, 660}, {1, 165}, {1, 1, 75}, {}, {}},
ParamsWhichSizeDepends{false, true, false, 10, 10, {1, 60}, {1, 165}, {1, 2, 75}, {}, {}},
ParamsWhichSizeDepends{false, false, false, 10, 10, {1, 660}, {1, 165}, {1, 2, 75}, {}, {}}
};
const auto params3Inputs = ::testing::Combine(
commonAttributes,
::testing::ValuesIn(specificParams3In),
::testing::ValuesIn(numberBatch),
::testing::Values(0.0f),
::testing::Values(CommonTestUtils::DEVICE_GPU)
);
INSTANTIATE_TEST_CASE_P(smoke_DetectionOutput3In, DetectionOutputLayerTest, params3Inputs, DetectionOutputLayerTest::getTestCaseName);
/* =============== 5 inputs cases =============== */
const std::vector<ParamsWhichSizeDepends> specificParams5In = {
ParamsWhichSizeDepends{true, true, true, 1, 1, {1, 60}, {1, 165}, {1, 1, 60}, {1, 30}, {1, 60}},
ParamsWhichSizeDepends{true, false, true, 1, 1, {1, 660}, {1, 165}, {1, 1, 60}, {1, 30}, {1, 660}},
ParamsWhichSizeDepends{false, true, true, 1, 1, {1, 60}, {1, 165}, {1, 2, 60}, {1, 30}, {1, 60}},
ParamsWhichSizeDepends{false, false, true, 1, 1, {1, 660}, {1, 165}, {1, 2, 60}, {1, 30}, {1, 660}},
ParamsWhichSizeDepends{true, true, false, 10, 10, {1, 60}, {1, 165}, {1, 1, 75}, {1, 30}, {1, 60}},
ParamsWhichSizeDepends{true, false, false, 10, 10, {1, 660}, {1, 165}, {1, 1, 75}, {1, 30}, {1, 660}},
ParamsWhichSizeDepends{false, true, false, 10, 10, {1, 60}, {1, 165}, {1, 2, 75}, {1, 30}, {1, 60}},
ParamsWhichSizeDepends{false, false, false, 10, 10, {1, 660}, {1, 165}, {1, 2, 75}, {1, 30}, {1, 660}}
};
const auto params5Inputs = ::testing::Combine(
commonAttributes,
::testing::ValuesIn(specificParams5In),
::testing::ValuesIn(numberBatch),
::testing::Values(objectnessScore),
::testing::Values(CommonTestUtils::DEVICE_GPU)
);
INSTANTIATE_TEST_CASE_P(smoke_DetectionOutput5In, DetectionOutputLayerTest, params5Inputs, DetectionOutputLayerTest::getTestCaseName);
} // namespace

View File

@ -16,6 +16,8 @@ std::vector<std::vector<std::vector<size_t>>> inShapes = {
{{1, 10, 100}},
{{4, 4, 16}},
{{1, 1, 1, 3}},
{{2, 17, 5, 4}, {1, 17, 1, 1}},
{{2, 17, 5, 1}, {1, 17, 1, 4}},
{{1, 2, 4}},
{{1, 4, 4}},
{{1, 4, 4, 1}},
@ -40,10 +42,14 @@ std::vector<CommonTestUtils::OpType> opTypes = {
};
std::vector<ngraph::helpers::EltwiseTypes> eltwiseOpTypes = {
ngraph::helpers::EltwiseTypes::ADD,
ngraph::helpers::EltwiseTypes::MULTIPLY,
ngraph::helpers::EltwiseTypes::SUBTRACT,
ngraph::helpers::EltwiseTypes::ADD,
ngraph::helpers::EltwiseTypes::POWER
ngraph::helpers::EltwiseTypes::DIVIDE,
ngraph::helpers::EltwiseTypes::FLOOR_MOD,
ngraph::helpers::EltwiseTypes::SQUARED_DIFF,
ngraph::helpers::EltwiseTypes::POWER,
ngraph::helpers::EltwiseTypes::MOD
};
std::map<std::string, std::string> additional_config = {};
@ -61,4 +67,17 @@ const auto multiply_params = ::testing::Combine(
::testing::Values(additional_config));
INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs, EltwiseLayerTest, multiply_params, EltwiseLayerTest::getTestCaseName);
} // namespace
std::vector<std::vector<std::vector<size_t>>> inShapesSingleThread = {
{{1, 2, 3, 4}},
{{2, 2, 2, 2}},
{{2, 1, 2, 1, 2, 2}}
};
std::vector<ngraph::helpers::EltwiseTypes> eltwiseOpTypesSingleThread = {
ngraph::helpers::EltwiseTypes::ADD,
ngraph::helpers::EltwiseTypes::POWER,
};
} // namespace

View File

@ -0,0 +1,48 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include "single_layer_tests/fake_quantize.hpp"
#include "common_test_utils/test_constants.hpp"
using namespace LayerTestsDefinitions;
namespace {
const std::vector<InferenceEngine::Precision> netPrecisions = {
InferenceEngine::Precision::FP32,
InferenceEngine::Precision::FP16
};
const std::vector<std::vector<size_t>> inputShapes = {{1, 1, 1, 1}, {3, 10, 5, 6}};
const std::vector<std::vector<size_t>> constShapes = {{1}};
const std::vector<size_t> levels = {16, 255, 256};
const std::pair<std::string, std::map<std::string, std::string>> config = {};
const std::vector<float> fqArgs = {};
const std::vector<float> inputParams = {};
const auto fqParams = ::testing::Combine(
::testing::ValuesIn(levels),
::testing::ValuesIn(constShapes),
::testing::Values(fqArgs),
::testing::Values(inputParams)
);
INSTANTIATE_TEST_CASE_P(smoke_FakeQuantize, FakeQuantizeLayerTest,
::testing::Combine(
fqParams,
::testing::ValuesIn(netPrecisions),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::ValuesIn(inputShapes),
::testing::Values(CommonTestUtils::DEVICE_GPU),
::testing::Values(config)),
FakeQuantizeLayerTest::getTestCaseName);
} // namespace

View File

@ -0,0 +1,129 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include "single_layer_tests/group_convolution_backprop_data.hpp"
#include "common_test_utils/test_constants.hpp"
using namespace LayerTestsDefinitions;
namespace {
const std::vector<InferenceEngine::Precision> netPrecisions = {
InferenceEngine::Precision::FP32
};
const std::vector<size_t> numOutChannels = {16, 32};
const std::vector<size_t> numGroups = {2, 8, 16};
/* ============= 2D GroupConvolution ============= */
const std::vector<std::vector<size_t >> inputShapes2D = {{1, 16, 10, 10},
{1, 32, 10, 10}};
const std::vector<std::vector<size_t >> kernels2D = {{1, 1}, {3, 3}};
const std::vector<std::vector<size_t >> strides2D = {{1, 1}};
const std::vector<std::vector<ptrdiff_t>> padBegins2D = {{0, 0}};
const std::vector<std::vector<ptrdiff_t>> padEnds2D = {{0, 0}};
const std::vector<std::vector<size_t >> dilations2D = {{1, 1}};
const auto groupConvBackpropData2DParams_ExplicitPadding = ::testing::Combine(
::testing::ValuesIn(kernels2D),
::testing::ValuesIn(strides2D),
::testing::ValuesIn(padBegins2D),
::testing::ValuesIn(padEnds2D),
::testing::ValuesIn(dilations2D),
::testing::ValuesIn(numOutChannels),
::testing::ValuesIn(numGroups),
::testing::Values(ngraph::op::PadType::EXPLICIT)
);
const auto groupConvBackpropData2DParams_AutoPadValid = ::testing::Combine(
::testing::ValuesIn(kernels2D),
::testing::ValuesIn(strides2D),
::testing::Values(std::vector<ptrdiff_t>({0, 0})),
::testing::Values(std::vector<ptrdiff_t>({0, 0})),
::testing::ValuesIn(dilations2D),
::testing::ValuesIn(numOutChannels),
::testing::ValuesIn(numGroups),
::testing::Values(ngraph::op::PadType::VALID)
);
INSTANTIATE_TEST_CASE_P(smoke_GroupConvBackpropData2D_ExplicitPadding, GroupConvBackpropDataLayerTest,
::testing::Combine(
groupConvBackpropData2DParams_ExplicitPadding,
::testing::ValuesIn(netPrecisions),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::ValuesIn(inputShapes2D),
::testing::Values(CommonTestUtils::DEVICE_GPU)),
GroupConvBackpropDataLayerTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_GroupConvBackpropData2D_AutoPadValid, GroupConvBackpropDataLayerTest,
::testing::Combine(
groupConvBackpropData2DParams_AutoPadValid,
::testing::ValuesIn(netPrecisions),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::ValuesIn(inputShapes2D),
::testing::Values(CommonTestUtils::DEVICE_GPU)),
GroupConvBackpropDataLayerTest::getTestCaseName);
/* ============= 3D GroupConvolution ============= */
const std::vector<std::vector<size_t >> inputShapes3D = {{1, 16, 5, 5, 5},
{1, 32, 5, 5, 5}};
const std::vector<std::vector<size_t >> kernels3D = {{1, 1, 1}, {3, 3, 3}};
const std::vector<std::vector<size_t >> strides3D = {{1, 1, 1}};
const std::vector<std::vector<ptrdiff_t>> padBegins3D = {{0, 0, 0}};
const std::vector<std::vector<ptrdiff_t>> padEnds3D = {{0, 0, 0}};
const std::vector<std::vector<size_t >> dilations3D = {{1, 1, 1}};
const auto groupConvBackpropData3DParams_ExplicitPadding = ::testing::Combine(
::testing::ValuesIn(kernels3D),
::testing::ValuesIn(strides3D),
::testing::ValuesIn(padBegins3D),
::testing::ValuesIn(padEnds3D),
::testing::ValuesIn(dilations3D),
::testing::ValuesIn(numOutChannels),
::testing::ValuesIn(numGroups),
::testing::Values(ngraph::op::PadType::EXPLICIT)
);
const auto groupConvBackpropData3DParams_AutoPadValid = ::testing::Combine(
::testing::ValuesIn(kernels3D),
::testing::ValuesIn(strides3D),
::testing::Values(std::vector<ptrdiff_t>({0, 0, 0})),
::testing::Values(std::vector<ptrdiff_t>({0, 0, 0})),
::testing::ValuesIn(dilations3D),
::testing::ValuesIn(numOutChannels),
::testing::ValuesIn(numGroups),
::testing::Values(ngraph::op::PadType::VALID)
);
INSTANTIATE_TEST_CASE_P(smoke_GroupConvBackpropData3D_ExplicitPadding, GroupConvBackpropDataLayerTest,
::testing::Combine(
groupConvBackpropData3DParams_ExplicitPadding,
::testing::ValuesIn(netPrecisions),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::ValuesIn(inputShapes3D),
::testing::Values(CommonTestUtils::DEVICE_GPU)),
GroupConvBackpropDataLayerTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_GroupConvBackpropData3D_AutoPadValid, GroupConvBackpropDataLayerTest,
::testing::Combine(
groupConvBackpropData3DParams_AutoPadValid,
::testing::ValuesIn(netPrecisions),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::ValuesIn(inputShapes3D),
::testing::Values(CommonTestUtils::DEVICE_GPU)),
GroupConvBackpropDataLayerTest::getTestCaseName);
} // namespace

View File

@ -0,0 +1,37 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include "single_layer_tests/gru_cell.hpp"
#include "common_test_utils/test_constants.hpp"
using namespace LayerTestsDefinitions;
namespace {
std::vector<bool> should_decompose{false, true};
std::vector<size_t> batch{5};
std::vector<size_t> hidden_size{1, 10};
std::vector<size_t> input_size{1, 30};
std::vector<std::vector<std::string>> activations = {{"relu", "tanh"}, {"tanh", "sigmoid"}, {"sigmoid", "tanh"},
{"tanh", "relu"}};
std::vector<float> clip = {0.0f, 0.7f};
std::vector<bool> linear_before_reset = {true, false};
std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP32,
InferenceEngine::Precision::FP16};
INSTANTIATE_TEST_CASE_P(GRUCellCommon, GRUCellTest,
::testing::Combine(
::testing::ValuesIn(should_decompose),
::testing::ValuesIn(batch),
::testing::ValuesIn(hidden_size),
::testing::ValuesIn(input_size),
::testing::ValuesIn(activations),
::testing::ValuesIn(clip),
::testing::ValuesIn(linear_before_reset),
::testing::ValuesIn(netPrecisions),
::testing::Values(CommonTestUtils::DEVICE_GPU)),
GRUCellTest::getTestCaseName);
} // namespace

View File

@ -0,0 +1,65 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include <ngraph/op/util/attr_types.hpp>
#include "single_layer_tests/gru_sequence.hpp"
#include "common_test_utils/test_constants.hpp"
using namespace LayerTestsDefinitions;
namespace {
std::vector<ngraph::helpers::SequenceTestsMode> mode{ngraph::helpers::SequenceTestsMode::CONVERT_TO_TI_MAX_SEQ_LEN_CONST,
ngraph::helpers::SequenceTestsMode::CONVERT_TO_TI_RAND_SEQ_LEN_CONST,
ngraph::helpers::SequenceTestsMode::CONVERT_TO_TI_RAND_SEQ_LEN_PARAM,
ngraph::helpers::SequenceTestsMode::PURE_SEQ};
// output values increase rapidly without clip, so use only seq_lenghts = 2
std::vector<size_t> seq_lengths_zero_clip{2};
std::vector<size_t> seq_lengths_clip_non_zero{20};
std::vector<size_t> batch{10};
std::vector<size_t> hidden_size{1, 10};
// std::vector<size_t> input_size{10};
std::vector<std::vector<std::string>> activations = {{"relu", "tanh"}, {"tanh", "sigmoid"}, {"sigmoid", "tanh"},
{"tanh", "relu"}};
std::vector<bool> linear_before_reset = {true, false};
std::vector<float> clip{0.f};
std::vector<float> clip_non_zeros{0.7f};
std::vector<ngraph::op::RecurrentSequenceDirection> direction = {ngraph::op::RecurrentSequenceDirection::FORWARD,
ngraph::op::RecurrentSequenceDirection::REVERSE,
ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL
};
std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP32,
InferenceEngine::Precision::FP16};
INSTANTIATE_TEST_CASE_P(GRUSequenceCommonZeroClip, GRUSequenceTest,
::testing::Combine(
::testing::ValuesIn(mode),
::testing::ValuesIn(seq_lengths_zero_clip),
::testing::ValuesIn(batch),
::testing::ValuesIn(hidden_size),
// ::testing::ValuesIn(input_size), // hardcoded to 10 due to Combine supports up to 10 args
::testing::ValuesIn(activations),
::testing::ValuesIn(clip),
::testing::ValuesIn(linear_before_reset),
::testing::ValuesIn(direction),
::testing::ValuesIn(netPrecisions),
::testing::Values(CommonTestUtils::DEVICE_GPU)),
GRUSequenceTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(GRUSequenceCommonClip, GRUSequenceTest,
::testing::Combine(
::testing::ValuesIn(mode),
::testing::ValuesIn(seq_lengths_clip_non_zero),
::testing::ValuesIn(batch),
::testing::ValuesIn(hidden_size),
// ::testing::ValuesIn(input_size), // hardcoded to 10 due to Combine supports up to 10 args
::testing::ValuesIn(activations),
::testing::ValuesIn(clip_non_zeros),
::testing::ValuesIn(linear_before_reset),
::testing::ValuesIn(direction),
::testing::ValuesIn(netPrecisions),
::testing::Values(CommonTestUtils::DEVICE_GPU)),
GRUSequenceTest::getTestCaseName);
} // namespace

View File

@ -0,0 +1,49 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include "single_layer_tests/lstm_cell.hpp"
#include "common_test_utils/test_constants.hpp"
using namespace LayerTestsDefinitions;
namespace {
std::vector<bool> should_decompose{false, true};
std::vector<size_t> batch{5};
std::vector<size_t> hidden_size{1, 10};
std::vector<size_t> hidden_size_smoke{1};
std::vector<size_t> input_size{1, 30};
std::vector<std::vector<std::string>> activations_smoke = {{"relu", "sigmoid", "tanh"}};
std::vector<std::vector<std::string>> activations = {{"relu", "sigmoid", "tanh"}, {"sigmoid", "tanh", "tanh"},
{"tanh", "relu", "sigmoid"}, {"sigmoid", "sigmoid", "sigmoid"},
{"tanh", "tanh", "tanh"}, {"relu", "relu", "relu"}};
std::vector<float> clip{0.f, 0.7f};
std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP32,
InferenceEngine::Precision::FP16};
INSTANTIATE_TEST_CASE_P(LSTMCellCommon, LSTMCellTest,
::testing::Combine(
::testing::ValuesIn(should_decompose),
::testing::ValuesIn(batch),
::testing::ValuesIn(hidden_size),
::testing::ValuesIn(input_size),
::testing::ValuesIn(activations),
::testing::ValuesIn(clip),
::testing::ValuesIn(netPrecisions),
::testing::Values(CommonTestUtils::DEVICE_GPU)),
LSTMCellTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_LSTMCellCommon, LSTMCellTest,
::testing::Combine(
::testing::ValuesIn(should_decompose),
::testing::ValuesIn(batch),
::testing::ValuesIn(hidden_size_smoke),
::testing::ValuesIn(input_size),
::testing::ValuesIn(activations_smoke),
::testing::ValuesIn(clip),
::testing::ValuesIn(netPrecisions),
::testing::Values(CommonTestUtils::DEVICE_GPU)),
LSTMCellTest::getTestCaseName);
} // namespace

View File

@ -0,0 +1,79 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include <ngraph/op/util/attr_types.hpp>
#include "single_layer_tests/lstm_sequence.hpp"
#include "common_test_utils/test_constants.hpp"
using namespace LayerTestsDefinitions;
namespace {
std::vector<ngraph::helpers::SequenceTestsMode> mode{ngraph::helpers::SequenceTestsMode::CONVERT_TO_TI_MAX_SEQ_LEN_CONST,
ngraph::helpers::SequenceTestsMode::CONVERT_TO_TI_RAND_SEQ_LEN_CONST,
ngraph::helpers::SequenceTestsMode::CONVERT_TO_TI_RAND_SEQ_LEN_PARAM,
ngraph::helpers::SequenceTestsMode::PURE_SEQ};
// output values increase rapidly without clip, so use only seq_lenghts = 2
std::vector<size_t> seq_lengths_zero_clip{2};
std::vector<size_t> seq_lengths_clip_non_zero{20};
std::vector<size_t> batch{10};
std::vector<size_t> hidden_size{1, 10};
std::vector<size_t> hidden_size_smoke{1};
std::vector<size_t> input_size{10};
std::vector<std::vector<std::string>> activations = {{"relu", "sigmoid", "tanh"}, {"sigmoid", "tanh", "tanh"},
{"tanh", "relu", "sigmoid"}, {"sigmoid", "sigmoid", "sigmoid"},
{"tanh", "tanh", "tanh"}, {"relu", "relu", "relu"}};
std::vector<std::vector<std::string>> activations_smoke = {{"relu", "sigmoid", "tanh"}};
std::vector<float> clip{0.f};
std::vector<float> clip_non_zeros{0.7f};
std::vector<ngraph::op::RecurrentSequenceDirection> direction = {ngraph::op::RecurrentSequenceDirection::FORWARD,
ngraph::op::RecurrentSequenceDirection::REVERSE,
ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL
};
std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP32,
InferenceEngine::Precision::FP16};
INSTANTIATE_TEST_CASE_P(LSTMSequenceCommonZeroClip, LSTMSequenceTest,
::testing::Combine(
::testing::ValuesIn(mode),
::testing::ValuesIn(seq_lengths_zero_clip),
::testing::ValuesIn(batch),
::testing::ValuesIn(hidden_size),
::testing::ValuesIn(input_size),
::testing::ValuesIn(activations),
::testing::ValuesIn(clip),
::testing::ValuesIn(direction),
::testing::ValuesIn(netPrecisions),
::testing::Values(CommonTestUtils::DEVICE_GPU)),
LSTMSequenceTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(LSTMSequenceCommonClip, LSTMSequenceTest,
::testing::Combine(
::testing::ValuesIn(mode),
::testing::ValuesIn(seq_lengths_clip_non_zero),
::testing::ValuesIn(batch),
::testing::ValuesIn(hidden_size),
::testing::ValuesIn(input_size),
::testing::ValuesIn(activations),
::testing::ValuesIn(clip_non_zeros),
::testing::ValuesIn(direction),
::testing::ValuesIn(netPrecisions),
::testing::Values(CommonTestUtils::DEVICE_GPU)),
LSTMSequenceTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_LSTMSequenceCommonClip, LSTMSequenceTest,
::testing::Combine(
::testing::ValuesIn(mode),
::testing::ValuesIn(seq_lengths_clip_non_zero),
::testing::ValuesIn(batch),
::testing::ValuesIn(hidden_size_smoke),
::testing::ValuesIn(input_size),
::testing::ValuesIn(activations_smoke),
::testing::ValuesIn(clip_non_zeros),
::testing::ValuesIn(direction),
::testing::ValuesIn(netPrecisions),
::testing::Values(CommonTestUtils::DEVICE_GPU)),
LSTMSequenceTest::getTestCaseName);
} // namespace

View File

@ -0,0 +1,42 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include "single_layer_tests/non_max_suppression.hpp"
#include "common_test_utils/test_constants.hpp"
using namespace LayerTestsDefinitions;
using namespace InferenceEngine;
using namespace ngraph;
const std::vector<InputShapeParams> inShapeParams = {
InputShapeParams{3, 100, 5},
InputShapeParams{1, 10, 50},
InputShapeParams{2, 50, 50}
};
const std::vector<int32_t> maxOutBoxPerClass = {5, 20};
const std::vector<float> threshold = {0.3f, 0.7f};
const std::vector<float> sigmaThreshold = {0.0f, 0.5f};
const std::vector<op::v5::NonMaxSuppression::BoxEncodingType> encodType = {op::v5::NonMaxSuppression::BoxEncodingType::CENTER,
op::v5::NonMaxSuppression::BoxEncodingType::CORNER};
const std::vector<bool> sortResDesc = {true, false};
const std::vector<element::Type> outType = {element::i32, element::i64};
const auto nmsParams = ::testing::Combine(::testing::ValuesIn(inShapeParams),
::testing::Combine(::testing::Values(Precision::FP32),
::testing::Values(Precision::I32),
::testing::Values(Precision::FP32)),
::testing::ValuesIn(maxOutBoxPerClass),
::testing::ValuesIn(threshold),
::testing::ValuesIn(threshold),
::testing::ValuesIn(sigmaThreshold),
::testing::ValuesIn(encodType),
::testing::ValuesIn(sortResDesc),
::testing::ValuesIn(outType),
::testing::Values(CommonTestUtils::DEVICE_GPU)
);
INSTANTIATE_TEST_CASE_P(smoke_NmsLayerTest, NmsLayerTest, nmsParams, NmsLayerTest::getTestCaseName);

View File

@ -35,7 +35,7 @@ const auto normL2params = testing::Combine(
);
INSTANTIATE_TEST_CASE_P(
NormalizeL2,
smoke_NormalizeL2,
NormalizeL2LayerTest,
normL2params,
NormalizeL2LayerTest::getTestCaseName

View File

@ -64,8 +64,8 @@ INSTANTIATE_TEST_CASE_P(smoke_PriorBoxClustered_Basic, PriorBoxClusteredLayerTes
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(std::vector<size_t>({ 1, 16, 4, 4 })),
::testing::Values(std::vector<size_t>({ 1, 3, 50, 50 })),
::testing::Values(std::vector<size_t>({ 4, 4 })),
::testing::Values(std::vector<size_t>({ 50, 50 })),
::testing::Values(CommonTestUtils::DEVICE_GPU)),
PriorBoxClusteredLayerTest::getTestCaseName
);

View File

@ -1,4 +1,4 @@
// Copyright (C) 20120 Intel Corporation
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
@ -10,67 +10,230 @@
using namespace LayerTestsDefinitions;
namespace {
const std::vector<InferenceEngine::Precision> netPrecisions = {
const std::vector<InferenceEngine::Precision> netPrecisions = {
InferenceEngine::Precision::FP32,
};
InferenceEngine::Precision::I32,
InferenceEngine::Precision::U8,
InferenceEngine::Precision::I8,
};
const std::vector<std::vector<size_t>> inputShapes = {
std::vector<size_t>{1, 2, 4, 4},
std::vector<size_t>{3, 2, 5, 6},
};
const std::vector<bool> keepDims = {
true,
false,
};
const std::vector<std::vector<int>> axes = {
const std::vector<std::vector<size_t>> inputShapes = {
std::vector<size_t>{10, 20, 30, 40},
std::vector<size_t>{3, 5, 7, 9},
};
const std::vector<std::vector<size_t>> inputShapesOneAxis = {
std::vector<size_t>{10, 20, 30, 40},
std::vector<size_t>{3, 5, 7, 9},
std::vector<size_t>{10},
};
const std::vector<std::vector<int>> axes = {
{0},
{1},
{2},
{3},
{0, 1},
{0, 2},
{1, 3}
};
{0, 3},
{1, 2},
{1, 3},
{2, 3},
{0, 1, 2},
{0, 1, 3},
{0, 2, 3},
{1, 2, 3},
{0, 1, 2, 3},
{1, -1}
};
std::vector<CommonTestUtils::OpType> opTypes = {
std::vector<CommonTestUtils::OpType> opTypes = {
CommonTestUtils::OpType::SCALAR,
CommonTestUtils::OpType::VECTOR,
};
};
const std::vector<ngraph::helpers::ReductionType> reductionTypes = {
const std::vector<ngraph::helpers::ReductionType> reductionTypes = {
ngraph::helpers::ReductionType::Mean,
ngraph::helpers::ReductionType::Min,
ngraph::helpers::ReductionType::Max,
ngraph::helpers::ReductionType::Sum,
ngraph::helpers::ReductionType::Prod,
};
ngraph::helpers::ReductionType::L1,
ngraph::helpers::ReductionType::L2,
};
const auto paramsOneAxis = testing::Combine(
const std::vector<ngraph::helpers::ReductionType> reductionLogicalTypes = {
ngraph::helpers::ReductionType::LogicalOr,
ngraph::helpers::ReductionType::LogicalAnd
};
const auto paramsOneAxis = testing::Combine(
testing::Values(std::vector<int>{0}),
testing::ValuesIn(opTypes),
testing::Values(true, false),
testing::ValuesIn(reductionTypes),
testing::ValuesIn(netPrecisions),
testing::Values(InferenceEngine::Precision::FP32),
testing::Values(InferenceEngine::Precision::UNSPECIFIED),
testing::Values(InferenceEngine::Precision::UNSPECIFIED),
testing::Values(InferenceEngine::Layout::ANY),
testing::ValuesIn(inputShapesOneAxis),
testing::Values(CommonTestUtils::DEVICE_GPU)
);
const auto paramsOneAxisLogical = testing::Combine(
testing::Values(std::vector<int>{0}),
testing::ValuesIn(opTypes),
testing::Values(true, false),
testing::ValuesIn(reductionLogicalTypes),
testing::Values(InferenceEngine::Precision::BOOL),
testing::Values(InferenceEngine::Precision::UNSPECIFIED),
testing::Values(InferenceEngine::Precision::UNSPECIFIED),
testing::Values(InferenceEngine::Layout::ANY),
testing::ValuesIn(inputShapes),
testing::Values(CommonTestUtils::DEVICE_GPU));
testing::Values(CommonTestUtils::DEVICE_GPU)
);
INSTANTIATE_TEST_CASE_P(
const auto params_Precisions = testing::Combine(
testing::Values(std::vector<int>{1, 3}),
testing::Values(opTypes[1]),
testing::ValuesIn(keepDims),
testing::Values(ngraph::helpers::ReductionType::Sum),
testing::Values(InferenceEngine::Precision::FP32,
InferenceEngine::Precision::I32),
testing::Values(InferenceEngine::Precision::UNSPECIFIED),
testing::Values(InferenceEngine::Precision::UNSPECIFIED),
testing::Values(InferenceEngine::Layout::ANY),
testing::Values(std::vector<size_t>{2, 2, 2, 2}),
testing::Values(CommonTestUtils::DEVICE_GPU)
);
const auto params_InputShapes = testing::Combine(
testing::Values(std::vector<int>{0}),
testing::Values(opTypes[1]),
testing::ValuesIn(keepDims),
testing::Values(ngraph::helpers::ReductionType::Mean),
testing::Values(InferenceEngine::Precision::FP32),
testing::Values(InferenceEngine::Precision::UNSPECIFIED),
testing::Values(InferenceEngine::Precision::UNSPECIFIED),
testing::Values(InferenceEngine::Layout::ANY),
testing::Values(std::vector<size_t>{3},
std::vector<size_t>{3, 5},
std::vector<size_t>{2, 4, 6},
std::vector<size_t>{2, 4, 6, 8},
std::vector<size_t>{2, 2, 2, 2, 2},
std::vector<size_t>{2, 2, 2, 2, 2, 2}),
testing::Values(CommonTestUtils::DEVICE_GPU)
);
const auto params_Axes = testing::Combine(
testing::ValuesIn(axes),
testing::Values(opTypes[1]),
testing::ValuesIn(keepDims),
testing::Values(ngraph::helpers::ReductionType::Mean),
testing::Values(InferenceEngine::Precision::FP32),
testing::Values(InferenceEngine::Precision::UNSPECIFIED),
testing::Values(InferenceEngine::Precision::UNSPECIFIED),
testing::Values(InferenceEngine::Layout::ANY),
testing::ValuesIn(inputShapes),
testing::Values(CommonTestUtils::DEVICE_GPU)
);
const auto params_ReductionTypes = testing::Combine(
testing::Values(std::vector<int>{0, 1, 3}),
testing::Values(opTypes[1]),
testing::ValuesIn(keepDims),
testing::ValuesIn(reductionTypes),
testing::Values(InferenceEngine::Precision::FP32),
testing::Values(InferenceEngine::Precision::UNSPECIFIED),
testing::Values(InferenceEngine::Precision::UNSPECIFIED),
testing::Values(InferenceEngine::Layout::ANY),
testing::Values(std::vector<size_t>{2, 9, 2, 9}),
testing::Values(CommonTestUtils::DEVICE_GPU)
);
const auto params_ReductionTypesLogical = testing::Combine(
testing::Values(std::vector<int>{0, 1, 3}),
testing::Values(opTypes[1]),
testing::ValuesIn(keepDims),
testing::ValuesIn(reductionLogicalTypes),
testing::Values(InferenceEngine::Precision::BOOL),
testing::Values(InferenceEngine::Precision::UNSPECIFIED),
testing::Values(InferenceEngine::Precision::UNSPECIFIED),
testing::Values(InferenceEngine::Layout::ANY),
testing::Values(std::vector<size_t>{2, 9, 2, 9}),
testing::Values(CommonTestUtils::DEVICE_GPU)
);
INSTANTIATE_TEST_CASE_P(
smoke_ReduceOneAxis,
ReduceOpsLayerTest,
paramsOneAxis,
ReduceOpsLayerTest::getTestCaseName);
ReduceOpsLayerTest::getTestCaseName
);
const auto params = testing::Combine(
testing::ValuesIn(axes),
testing::Values(opTypes[1]),
testing::Values(true, false),
testing::ValuesIn(reductionTypes),
testing::ValuesIn(netPrecisions),
testing::Values(InferenceEngine::Precision::UNSPECIFIED),
testing::Values(InferenceEngine::Precision::UNSPECIFIED),
testing::Values(InferenceEngine::Layout::ANY),
testing::ValuesIn(inputShapes),
testing::Values(CommonTestUtils::DEVICE_GPU));
INSTANTIATE_TEST_CASE_P(
smoke_Reduce,
INSTANTIATE_TEST_CASE_P(
smoke_ReduceLogicalOneAxis,
ReduceOpsLayerTest,
params,
ReduceOpsLayerTest::getTestCaseName);
paramsOneAxisLogical,
ReduceOpsLayerTest::getTestCaseName
);
INSTANTIATE_TEST_CASE_P(
smoke_Reduce_Precisions,
ReduceOpsLayerTest,
params_Precisions,
ReduceOpsLayerTest::getTestCaseName
);
INSTANTIATE_TEST_CASE_P(
smoke_Reduce_InputShapes,
ReduceOpsLayerTest,
params_InputShapes,
ReduceOpsLayerTest::getTestCaseName
);
INSTANTIATE_TEST_CASE_P(
smoke_Reduce_Axes,
ReduceOpsLayerTest,
params_Axes,
ReduceOpsLayerTest::getTestCaseName
);
INSTANTIATE_TEST_CASE_P(
smoke_Reduce_ReductionTypes,
ReduceOpsLayerTest,
params_ReductionTypes,
ReduceOpsLayerTest::getTestCaseName
);
INSTANTIATE_TEST_CASE_P(
smoke_ReduceLogical_ReductionTypes,
ReduceOpsLayerTest,
params_ReductionTypesLogical,
ReduceOpsLayerTest::getTestCaseName
);
INSTANTIATE_TEST_CASE_P(
smoke_Reduce,
ReduceOpsLayerWithSpecificInputTest,
testing::Combine(
testing::ValuesIn(decltype(axes) {{0}, {1}}),
testing::Values(opTypes[1]),
testing::Values(true),
testing::Values(ngraph::helpers::ReductionType::Sum),
testing::Values(InferenceEngine::Precision::FP32,
InferenceEngine::Precision::I32),
testing::Values(InferenceEngine::Precision::UNSPECIFIED),
testing::Values(InferenceEngine::Precision::UNSPECIFIED),
testing::Values(InferenceEngine::Layout::ANY),
testing::Values(std::vector<size_t> {2, 10}),
testing::Values(CommonTestUtils::DEVICE_GPU)),
ReduceOpsLayerWithSpecificInputTest::getTestCaseName
);
} // namespace

View File

@ -0,0 +1,34 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include "single_layer_tests/rnn_cell.hpp"
#include "common_test_utils/test_constants.hpp"
using namespace LayerTestsDefinitions;
namespace {
std::vector<bool> should_decompose{false, true};
std::vector<size_t> batch{1, 5};
std::vector<size_t> hidden_size{1, 10};
std::vector<size_t> input_size{1, 30};
std::vector<std::vector<std::string>> activations = {{"relu"}, {"sigmoid"}, {"tanh"}};
std::vector<float> clip = {0.f, 0.7f};
std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP32,
InferenceEngine::Precision::FP16};
INSTANTIATE_TEST_CASE_P(RNNCellCommon, RNNCellTest,
::testing::Combine(
::testing::ValuesIn(should_decompose),
::testing::ValuesIn(batch),
::testing::ValuesIn(hidden_size),
::testing::ValuesIn(input_size),
::testing::ValuesIn(activations),
::testing::ValuesIn(clip),
::testing::ValuesIn(netPrecisions),
::testing::Values(CommonTestUtils::DEVICE_GPU)),
RNNCellTest::getTestCaseName);
} // namespace

View File

@ -0,0 +1,60 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include <ngraph/op/util/attr_types.hpp>
#include "single_layer_tests/rnn_sequence.hpp"
#include "common_test_utils/test_constants.hpp"
using namespace LayerTestsDefinitions;
namespace {
std::vector<ngraph::helpers::SequenceTestsMode> mode{ngraph::helpers::SequenceTestsMode::CONVERT_TO_TI_MAX_SEQ_LEN_CONST,
ngraph::helpers::SequenceTestsMode::CONVERT_TO_TI_RAND_SEQ_LEN_CONST,
ngraph::helpers::SequenceTestsMode::CONVERT_TO_TI_RAND_SEQ_LEN_PARAM,
ngraph::helpers::SequenceTestsMode::PURE_SEQ};
// output values increase rapidly without clip, so use only seq_lenghts = 2
std::vector<size_t> seq_lengths_zero_clip{2};
std::vector<size_t> seq_lengths_clip_non_zero{20};
std::vector<size_t> batch{1, 10};
std::vector<size_t> hidden_size{1, 10};
std::vector<size_t> input_size{10};
std::vector<std::vector<std::string>> activations = {{"relu"}, {"sigmoid"}, {"tanh"}};
std::vector<float> clip{0.f};
std::vector<float> clip_non_zeros{0.7f};
std::vector<ngraph::op::RecurrentSequenceDirection> direction = {ngraph::op::RecurrentSequenceDirection::FORWARD,
ngraph::op::RecurrentSequenceDirection::REVERSE,
ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL,
};
std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP32};
INSTANTIATE_TEST_CASE_P(RNNSequenceCommonZeroClip, RNNSequenceTest,
::testing::Combine(
::testing::ValuesIn(mode),
::testing::ValuesIn(seq_lengths_zero_clip),
::testing::ValuesIn(batch),
::testing::ValuesIn(hidden_size),
::testing::ValuesIn(input_size),
::testing::ValuesIn(activations),
::testing::ValuesIn(clip),
::testing::ValuesIn(direction),
::testing::ValuesIn(netPrecisions),
::testing::Values(CommonTestUtils::DEVICE_GPU)),
RNNSequenceTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(RNNSequenceCommonClip, RNNSequenceTest,
::testing::Combine(
::testing::ValuesIn(mode),
::testing::ValuesIn(seq_lengths_clip_non_zero),
::testing::ValuesIn(batch),
::testing::ValuesIn(hidden_size),
::testing::ValuesIn(input_size),
::testing::ValuesIn(activations),
::testing::ValuesIn(clip_non_zeros),
::testing::ValuesIn(direction),
::testing::ValuesIn(netPrecisions),
::testing::Values(CommonTestUtils::DEVICE_GPU)),
RNNSequenceTest::getTestCaseName);
} // namespace

View File

@ -0,0 +1,46 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include <ngraph/opsets/opset3.hpp>
#include "single_layer_tests/scatter_update.hpp"
#include "common_test_utils/test_constants.hpp"
using namespace LayerTestsDefinitions;
using namespace ngraph::opset3;
namespace {
const std::vector<InferenceEngine::Precision> inputPrecisions = {
InferenceEngine::Precision::FP32,
InferenceEngine::Precision::FP16,
InferenceEngine::Precision::I32,
};
const std::vector<InferenceEngine::Precision> idxPrecisions = {
InferenceEngine::Precision::I32,
InferenceEngine::Precision::I64,
};
// map<inputShape, map<indicesShape, axis>>
std::map<std::vector<size_t>, std::map<std::vector<size_t>, std::vector<int>>> axesShapeInShape {
{{10, 16, 12, 15}, {{{2, 4}, {0, 1, 2, 3}}, {{8}, {-1, -2, -3, -4}}}},
{{10, 9, 10, 9, 10}, {{{8}, {-3, -1, 0, 2, 4}}, {{4, 2}, {-2, 2}}}},
};
//indices should not be random value
const std::vector<std::vector<size_t>> idxValue = {
{0, 2, 4, 6, 1, 3, 5, 7}
};
const auto ScatterUpdateCase = ::testing::Combine(
::testing::ValuesIn(ScatterUpdateLayerTest::combineShapes(axesShapeInShape)),
::testing::ValuesIn(idxValue),
::testing::ValuesIn(inputPrecisions),
::testing::ValuesIn(idxPrecisions),
::testing::Values(CommonTestUtils::DEVICE_GPU)
);
INSTANTIATE_TEST_CASE_P(smoke_ScatterUpdate, ScatterUpdateLayerTest, ScatterUpdateCase, ScatterUpdateLayerTest::getTestCaseName);
} // namespace

View File

@ -9,8 +9,6 @@
std::vector<std::string> disabledTestPatterns() {
return {
// Issues - 34059
".*BehaviorTests\\.pluginDoesNotChangeOriginalNetwork.*",
//TODO: Issue: 34748
R"(.*(ComparisonLayerTest).*)",
// TODO: Issue: 39014
@ -20,8 +18,6 @@ std::vector<std::string> disabledTestPatterns() {
// Expected behavior
R"(.*EltwiseLayerTest.*eltwiseOpType=Pow.*netPRC=I64.*)",
R"(.*EltwiseLayerTest.*IS=\(.*\..*\..*\..*\..*\).*eltwiseOpType=Pow.*secondaryInputType=CONSTANT.*)",
// TODO: Issue: 40958
R"(.*(ConstantResultSubgraphTest).*)",
// TODO: Issue: 43794
R"(.*(PreprocessTest).*(SetScalePreProcess).*)",
R"(.*(PreprocessTest).*(ReverseInputChannelsPreProcess).*)",
@ -35,8 +31,23 @@ std::vector<std::string> disabledTestPatterns() {
R"(.*TopKLayerTest.*k=10.*mode=min.*sort=index.*)",
R"(.*TopKLayerTest.*k=5.*sort=(none|index).*)",
// TODO: Issue: 43511
R"(.*EltwiseLayerTest.*IS=\(1.4.3.2.1.3\).*OpType=(Prod|Sub).*secondaryInputType=CONSTANT_opType=VECTOR_netPRC=(FP16|FP32).*)",
R"(.*EltwiseLayerTest.*IS=\(1.4.3.2.1.3\).*OpType=Sum.*secondaryInputType=CONSTANT_opType=VECTOR_netPRC=(FP16|FP32).*)",
R"(.*EltwiseLayerTest.*IS=\(1.4.3.2.1.3\).*OpType=Sub.*secondaryInputType=CONSTANT_opType=VECTOR_netPRC=I64.*)",
R"(.*EltwiseLayerTest.*IS=\(1.4.3.2.1.3\).*)",
R"(.*EltwiseLayerTest.*IS=\(2\).*OpType=Mod.*opType=VECTOR.*)",
R"(.*EltwiseLayerTest.*OpType=FloorMod.*netPRC=I64.*)",
// These tests might fail due to accuracy loss a bit bigger than threshold
R"(.*(GRUCellTest).*)",
R"(.*(RNNSequenceTest).*)",
R"(.*(GRUSequenceTest).*)",
// These test cases might fail due to FP16 overflow
R"(.*(LSTM).*activations=\(relu.*netPRC=FP16.*)",
// Need to update activation primitive to support any broadcastable constant to enable these cases.
R"(.*ActivationParamLayerTest.*)",
// Unknown issues
R"(.*(LSTMSequence).*mode=CONVERT_TO_TI_RAND_SEQ_LEN.*)",
R"(.*(smoke_DetectionOutput3In).*)",
R"(.*(smoke_DetectionOutput5In).*)",
R"(.*(ScatterUpdateLayerTest).*)",
};
}

View File

@ -0,0 +1,16 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include "subgraph_tests/parameter_result.hpp"
#include "common_test_utils/test_constants.hpp"
using namespace SubgraphTestsDefinitions;
namespace {
INSTANTIATE_TEST_CASE_P(smoke_Check, ParameterResultSubgraphTest,
::testing::Values(CommonTestUtils::DEVICE_GPU),
ParameterResultSubgraphTest::getTestCaseName);
} // namespace

View File

@ -0,0 +1,15 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "shared_test_classes/subgraph/parameter_result.hpp"
namespace SubgraphTestsDefinitions {
TEST_P(ParameterResultSubgraphTest, CompareWithRefs) {
Run();
}
} // namespace SubgraphTestsDefinitions

View File

@ -67,7 +67,6 @@ protected:
float offset;
bool clip;
std::vector<std::vector<std::uint8_t>> CalculateRefs() override;
void SetUp() override;
};

View File

@ -0,0 +1,28 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <tuple>
#include <string>
#include <vector>
#include <memory>
#include "shared_test_classes/base/layer_test_utils.hpp"
#include "ngraph_functions/builders.hpp"
namespace SubgraphTestsDefinitions {
typedef std::tuple<
std::string // Device name
> parameterResultParams;
class ParameterResultSubgraphTest : public testing::WithParamInterface<parameterResultParams>,
virtual public LayerTestsUtils::LayerTestsCommon {
public:
static std::string getTestCaseName(testing::TestParamInfo<parameterResultParams> obj);
protected:
void SetUp() override;
};
} // namespace SubgraphTestsDefinitions

View File

@ -57,84 +57,8 @@ std::string PriorBoxClusteredLayerTest::getTestCaseName(const testing::TestParam
return result.str();
}
std::vector<std::vector<std::uint8_t>> PriorBoxClusteredLayerTest::CalculateRefs() {
size_t numPriors = widths.size();
const size_t layerWidth = inputShapes[3];
const size_t layerHeight = inputShapes[2];
size_t imgWidth = imageShapes[3];
size_t imgHeight = imageShapes[2];
if (variances.empty())
variances.push_back(0.1f);
size_t varSize = variances.size();
size_t topDataOffset = 4 * layerWidth * layerHeight * numPriors;
size_t outSize = 2 * topDataOffset;
auto outBuf = std::vector<float>(outSize);
float* topData_0 = outBuf.data();
float* topData_1 = outBuf.data() + topDataOffset;
if (targetDevice.find(CommonTestUtils::DEVICE_GPU) != std::string::npos) {
//GPU inits buffers with 0.0f
for (auto i = 0; i < outSize; i++)
topData_0[i] = 0.0f;
}
float stepW = step_width;
float stepH = step_height;
if (stepW == 0 && stepH == 0) {
stepW = static_cast<float>(imgWidth) / layerWidth;
stepH = static_cast<float>(imgHeight) / layerHeight;
}
for (size_t h = 0; h < layerHeight; ++h) {
for (size_t w = 0; w < layerWidth; ++w) {
float center_x = (w + offset) * stepW;
float center_y = (h + offset) * stepH;
for (size_t s = 0; s < numPriors; ++s) {
float box_width = widths[s];
float box_height = heights[s];
float xmin = (center_x - box_width / 2.0f) / imgWidth;
float ymin = (center_y - box_height / 2.0f) / imgHeight;
float xmax = (center_x + box_width / 2.0f) / imgWidth;
float ymax = (center_y + box_height / 2.0f) / imgHeight;
if (clip) {
xmin = (std::min)((std::max)(xmin, 0.0f), 1.0f);
ymin = (std::min)((std::max)(ymin, 0.0f), 1.0f);
xmax = (std::min)((std::max)(xmax, 0.0f), 1.0f);
ymax = (std::min)((std::max)(ymax, 0.0f), 1.0f);
}
topData_0[h * layerWidth * numPriors * 4 + w * numPriors * 4 + s * 4 + 0] = xmin;
topData_0[h * layerWidth * numPriors * 4 + w * numPriors * 4 + s * 4 + 1] = ymin;
topData_0[h * layerWidth * numPriors * 4 + w * numPriors * 4 + s * 4 + 2] = xmax;
topData_0[h * layerWidth * numPriors * 4 + w * numPriors * 4 + s * 4 + 3] = ymax;
for (int j = 0; j < varSize; j++)
topData_1[h * layerWidth * numPriors * varSize + w * numPriors * varSize +
s * varSize +
j] = variances[j];
}
}
}
// Be aligned with test utils ref calulcation method, which returns std::vector<std::vector<uint8_t>>...
std::vector<std::vector<uint8_t>> ret(1);
for (auto& val : outBuf) {
uint8_t* u8_val = reinterpret_cast<uint8_t*>(&val);
ret[0].push_back(u8_val[0]);
ret[0].push_back(u8_val[1]);
ret[0].push_back(u8_val[2]);
ret[0].push_back(u8_val[3]);
}
return ret;
}
void PriorBoxClusteredLayerTest::SetUp() {
SetRefMode(LayerTestsUtils::RefMode::CONSTANT_FOLDING);
priorBoxClusteredSpecificParams specParams;
std::tie(specParams, netPrecision,
inPrc, outPrc, inLayout, outLayout,
@ -149,9 +73,7 @@ void PriorBoxClusteredLayerTest::SetUp() {
variances) = specParams;
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
auto paramsIn = ngraph::builder::makeParams(ngPrc, { inputShapes, imageShapes });
auto paramsOut = ngraph::helpers::convert2OutputVector(
ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(paramsIn));
auto params = ngraph::builder::makeParams(ngPrc, { inputShapes, imageShapes });
ngraph::op::PriorBoxClusteredAttrs attributes;
attributes.widths = widths;
@ -162,12 +84,14 @@ void PriorBoxClusteredLayerTest::SetUp() {
attributes.offset = offset;
attributes.variances = variances;
auto priorBoxClustered = std::make_shared<ngraph::op::PriorBoxClusteredIE>(
paramsOut[0],
paramsOut[1],
auto shape_of_1 = std::make_shared<ngraph::opset3::ShapeOf>(params[0]);
auto shape_of_2 = std::make_shared<ngraph::opset3::ShapeOf>(params[1]);
auto priorBoxClustered = std::make_shared<ngraph::op::PriorBoxClustered>(
shape_of_1,
shape_of_2,
attributes);
ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(priorBoxClustered) };
function = std::make_shared<ngraph::Function>(results, paramsIn, "PB_Clustered");
function = std::make_shared<ngraph::Function>(results, params, "PB_Clustered");
}
} // namespace LayerTestsDefinitions

View File

@ -0,0 +1,28 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "shared_test_classes/subgraph/parameter_result.hpp"
namespace SubgraphTestsDefinitions {
std::string ParameterResultSubgraphTest::getTestCaseName(testing::TestParamInfo<parameterResultParams> obj) {
std::string targetDevice;
std::tie(targetDevice) = obj.param;
std::ostringstream result;
result << "TargetDevice=" << targetDevice;
return result.str();
}
void ParameterResultSubgraphTest::SetUp() {
InferenceEngine::SizeVector inputShapes;
InferenceEngine::Precision inputPrecision;
std::tie(targetDevice) = this->GetParam();
auto parameter = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::Type_t::f32, ngraph::Shape{1, 3, 10, 10});
const ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(parameter)};
ngraph::ParameterVector params = {parameter};
function = std::make_shared<ngraph::Function>(results, params, "ParameterResult");
}
} // namespace SubgraphTestsDefinitions

View File

@ -6,16 +6,7 @@
set(TARGET_NAME ClDnnFunctionalTests)
file(GLOB CLDNN_TEST_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
${CMAKE_CURRENT_SOURCE_DIR}/regression_tests/*.cpp
${CMAKE_CURRENT_SOURCE_DIR}/single_layer_tests/*.cpp
${CMAKE_CURRENT_SOURCE_DIR}/shared_tests_instance/io_blob_tests/*.cpp
${CMAKE_CURRENT_SOURCE_DIR}/shared_tests_instance/input_tests/*.cpp
${CMAKE_CURRENT_SOURCE_DIR}/shared_tests_instance/inference_engine_regression_tests/*.cpp
${CMAKE_CURRENT_SOURCE_DIR}/shared_tests_instance/lstm/*.cpp
${CMAKE_CURRENT_SOURCE_DIR}/shared_tests_instance/common_single_layer_tests/*.cpp
${CMAKE_CURRENT_SOURCE_DIR}/shared_tests_instance/ie_class/*.cpp
${CMAKE_CURRENT_SOURCE_DIR}/shared_tests_instance/single_layer_tests/*.cpp)
${CMAKE_CURRENT_SOURCE_DIR}/*.cpp)
list(APPEND TEST_SRC ${CLDNN_TEST_SOURCES})

View File

@ -0,0 +1,3 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

Some files were not shown because too many files have changed in this diff Show More