publish master branch snapshot, revision 8d31237e2c3f673cbb0f0ba110fc10f5cce1d2bb
This commit is contained in:
parent
eab7ef4895
commit
deb008a26f
13
.gitignore
vendored
13
.gitignore
vendored
@ -16,6 +16,7 @@ build/
|
||||
.gdb_history
|
||||
.vimspector.json
|
||||
doc/
|
||||
!ngraph/doc
|
||||
docs/build_documentation/work_dir/
|
||||
inference-engine/plugins/
|
||||
inference-engine/temp
|
||||
@ -56,3 +57,15 @@ __pycache__
|
||||
/model-optimizer/*.mapping
|
||||
/model-optimizer/*.dat
|
||||
/model-optimizer/*.svg
|
||||
|
||||
# ngraph
|
||||
ngraph/src/CPackConfig.cmake
|
||||
ngraph/src/CPackSourceConfig.cmake
|
||||
ngraph/src/VERSION
|
||||
ngraph/src/gtest/
|
||||
ngraph/src/json/
|
||||
ngraph/src/ngraphConfig.cmake
|
||||
ngraph/src/ngraphConfigVersion.cmake
|
||||
ngraph/src/protobuf/
|
||||
ngraph/src/src/
|
||||
ngraph/src/test/
|
||||
|
@ -64,12 +64,11 @@ endmacro()
|
||||
|
||||
macro(ie_cpack)
|
||||
set(CPACK_GENERATOR "TGZ")
|
||||
string(REPLACE "/" "_" CPACK_PACKAGE_VERSION "${CI_BUILD_NUMBER}")
|
||||
if(WIN32)
|
||||
set(CPACK_PACKAGE_NAME inference-engine_${CMAKE_BUILD_TYPE})
|
||||
string(REPLACE "\\" "_" CPACK_PACKAGE_VERSION "${CI_BUILD_NUMBER}")
|
||||
else()
|
||||
set(CPACK_PACKAGE_NAME inference-engine)
|
||||
string(REPLACE "/" "_" CPACK_PACKAGE_VERSION "${CI_BUILD_NUMBER}")
|
||||
endif()
|
||||
set(CPACK_INCLUDE_TOPLEVEL_DIRECTORY OFF)
|
||||
set(CPACK_ARCHIVE_COMPONENT_INSTALL ON)
|
||||
|
@ -159,6 +159,17 @@ if(ENABLE_PYTHON)
|
||||
COMPONENT python_samples)
|
||||
endif()
|
||||
|
||||
# install speech demo files
|
||||
|
||||
if(SPEECH_LIBS_AND_DEMOS)
|
||||
ie_cpack_add_component(speech_demo_files REQUIRED)
|
||||
|
||||
install(DIRECTORY ${TEMP}/deployment_tools
|
||||
${TEMP}/data_processing
|
||||
DESTINATION .
|
||||
COMPONENT speech_demo_files)
|
||||
endif()
|
||||
|
||||
#
|
||||
# Developer package
|
||||
#
|
||||
|
@ -57,7 +57,7 @@ add_subdirectory (src/openvino/inference_engine)
|
||||
|
||||
# Check Cython version
|
||||
if("${CYTHON_VERSION}" VERSION_LESS "0.29")
|
||||
message(FATAL_ERROR "OpenVINO Python API needs at least Cython version 0.29, found verson ${CYTHON_VERSION}")
|
||||
message(FATAL_ERROR "OpenVINO Python API needs at least Cython version 0.29, found version ${CYTHON_VERSION}")
|
||||
else()
|
||||
message(STATUS "Found Cython version ${CYTHON_VERSION}")
|
||||
endif()
|
||||
|
@ -58,6 +58,6 @@ FIND_PACKAGE_HANDLE_STANDARD_ARGS( Cython REQUIRED_VARS CYTHON_EXECUTABLE )
|
||||
|
||||
# Find Cython version
|
||||
execute_process(COMMAND ${CYTHON_EXECUTABLE} -V ERROR_VARIABLE CYTHON_OUTPUT OUTPUT_QUIET)
|
||||
string(REGEX REPLACE "^Cython version ([0-9]+\\.[0-9]+\\.[0-9]+).*" "\\1" CYTHON_VERSION "${CYTHON_OUTPUT}")
|
||||
string(REGEX REPLACE "^Cython version ([0-9]+\\.[0-9]+(\\.[0-9]+)?).*" "\\1" CYTHON_VERSION "${CYTHON_OUTPUT}")
|
||||
|
||||
mark_as_advanced( CYTHON_EXECUTABLE CYTHON_VERSION )
|
||||
|
@ -23,6 +23,7 @@ foreach(PYX_FILE ${OTHER_SOURCES})
|
||||
get_filename_component(PYX_NAME "${PYX_FILE}" NAME_WE)
|
||||
set_source_files_properties(${PYX_FILE} PROPERTIES CYTHON_IS_CXX ON)
|
||||
cython_add_module(${PYX_NAME} ${PYX_FILE})
|
||||
add_dependencies(${TARGET_NAME} ${PYX_NAME})
|
||||
target_include_directories(${PYX_NAME} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}")
|
||||
target_link_libraries(${PYX_NAME} PRIVATE ${InferenceEngine_LIBRARIES})
|
||||
endforeach()
|
||||
|
@ -3,6 +3,7 @@ import os
|
||||
import pytest
|
||||
import warnings
|
||||
import threading
|
||||
from datetime import datetime
|
||||
|
||||
from openvino.inference_engine import ie_api as ie
|
||||
from conftest import model_path, image_path
|
||||
@ -195,11 +196,25 @@ def test_async_infer_wait_finish(device):
|
||||
def test_async_infer_wait_time(device):
|
||||
ie_core = ie.IECore()
|
||||
net = ie_core.read_network(test_net_xml, test_net_bin)
|
||||
exec_net = ie_core.load_network(net, device, num_requests=1)
|
||||
exec_net = ie_core.load_network(net, device, num_requests=2)
|
||||
img = read_image()
|
||||
request = exec_net.requests[0]
|
||||
request.async_infer({'data': img})
|
||||
request.wait(100)
|
||||
start_time = datetime.utcnow()
|
||||
status = request.wait(ie.WaitMode.RESULT_READY)
|
||||
assert status == ie.StatusCode.OK
|
||||
time_delta = datetime.utcnow() - start_time
|
||||
latency_ms = (time_delta.microseconds / 1000) + (time_delta.seconds * 1000)
|
||||
timeout = max(100, latency_ms)
|
||||
request = exec_net.requests[1]
|
||||
request.async_infer({'data': img})
|
||||
max_repeat = 10
|
||||
status = ie.StatusCode.REQUEST_BUSY
|
||||
i = 0
|
||||
while i < max_repeat and status != ie.StatusCode.OK:
|
||||
status = request.wait(timeout)
|
||||
i += 1
|
||||
assert status == ie.StatusCode.OK
|
||||
res = request.output_blobs['fc_out'].buffer
|
||||
assert np.argmax(res) == 2
|
||||
del exec_net
|
||||
|
@ -100,6 +100,9 @@ static const char dump_config_message[] = "Optional. Path to XML/YAML/JSON file
|
||||
static const char shape_message[] = "Optional. Set shape for input. For example, \"input1[1,3,224,224],input2[1,4]\" or \"[1,3,224,224]\""
|
||||
" in case of one input size.";
|
||||
|
||||
// @brief message for quantization bits
|
||||
static const char gna_qb_message[] = "Optional. Weight bits for quantization: 8 or 16 (default)";
|
||||
|
||||
/// @brief Define flag for showing help message <br>
|
||||
DEFINE_bool(h, false, help_message);
|
||||
|
||||
@ -184,6 +187,9 @@ DEFINE_string(dump_config, "", dump_config_message);
|
||||
/// @brief Define flag for input shape <br>
|
||||
DEFINE_string(shape, "", shape_message);
|
||||
|
||||
/// @brief Define flag for quantization bits (default 16)
|
||||
DEFINE_int32(qb, 16, gna_qb_message);
|
||||
|
||||
/**
|
||||
* @brief This function show a help message
|
||||
*/
|
||||
@ -221,4 +227,5 @@ static void showUsage() {
|
||||
std::cout << " -dump_config " << dump_config_message << std::endl;
|
||||
std::cout << " -load_config " << load_config_message << std::endl;
|
||||
#endif
|
||||
std::cout << " -qb " << gna_qb_message << std::endl;
|
||||
}
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include <inference_engine.hpp>
|
||||
#include <vpu/vpu_plugin_config.hpp>
|
||||
#include <cldnn/cldnn_config.hpp>
|
||||
#include <gna/gna_config.hpp>
|
||||
#include <samples/common.hpp>
|
||||
#include <samples/slog.hpp>
|
||||
#include <samples/args_helper.hpp>
|
||||
@ -274,6 +275,14 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
} else if (device == "MYRIAD") {
|
||||
device_config[CONFIG_KEY(LOG_LEVEL)] = CONFIG_VALUE(LOG_WARNING);
|
||||
} else if (device == "GNA") {
|
||||
if (FLAGS_qb == 8)
|
||||
device_config[GNA_CONFIG_KEY(PRECISION)] = "I8";
|
||||
else
|
||||
device_config[GNA_CONFIG_KEY(PRECISION)] = "I16";
|
||||
|
||||
if (isFlagSetInCommandLine("nthreads"))
|
||||
device_config[GNA_CONFIG_KEY(LIB_N_THREADS)] = std::to_string(FLAGS_nthreads);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -24,6 +24,7 @@
|
||||
#include "details/caseless.hpp"
|
||||
#include <details/ie_cnn_network_tools.h>
|
||||
#include <ngraph/opsets/opset2.hpp>
|
||||
#include <ngraph/opsets/opset3.hpp>
|
||||
#include <ngraph/op/fused/gelu.hpp>
|
||||
#include <generic_ie.hpp>
|
||||
#include <transformations/common_optimizations/common_optimizations.hpp>
|
||||
@ -73,7 +74,8 @@ InferenceEngine::ICNNNetwork::Ptr clDNNEngine::CloneNetwork(const InferenceEngin
|
||||
std::shared_ptr<ICNNNetwork> clonedNetwork(nullptr);
|
||||
if (network.getFunction()) {
|
||||
const auto transformations_callback = [](const std::shared_ptr<const ::ngraph::Node> &node) -> bool {
|
||||
return std::dynamic_pointer_cast<const ::ngraph::opset2::Gelu>(node) != nullptr;
|
||||
return std::dynamic_pointer_cast<const ::ngraph::opset2::Gelu>(node) ||
|
||||
std::dynamic_pointer_cast<const ::ngraph::opset3::ShuffleChannels>(node);
|
||||
};
|
||||
CNNNetwork net(network.getFunction());
|
||||
auto nGraphFunc = net.getFunction();
|
||||
|
@ -17,6 +17,8 @@
|
||||
#include "blob_factory.hpp"
|
||||
#include "precision_ex.hpp"
|
||||
#include "layers/gna_layer_info.hpp"
|
||||
#include "weights_converter.hpp"
|
||||
#include "layer_transform.hpp"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace frontend {
|
||||
@ -137,6 +139,48 @@ class Quant<QuantI8> {
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
inline InferenceEngine::Blob::Ptr fp32_to_precision_blob(InferenceEngine::Blob::Ptr fp32_blob, InferenceEngine::Precision precision, float scale_factor) {
|
||||
auto prec_blob = InferenceEngine::make_shared_blob<T>({ precision,
|
||||
fp32_blob->getTensorDesc().getDims(), fp32_blob->getTensorDesc().getLayout() });
|
||||
prec_blob->allocate();
|
||||
|
||||
int i = 0;
|
||||
for (auto& precValue : *prec_blob) {
|
||||
auto f32Value = fp32_blob->buffer().template as<InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type*>()[i++] * scale_factor;
|
||||
if (f32Value > std::numeric_limits<T>::max()) {
|
||||
precValue = std::numeric_limits<T>::max();
|
||||
} else if (f32Value < std::numeric_limits<T>::min()) {
|
||||
precValue = std::numeric_limits<T>::min();
|
||||
} else {
|
||||
precValue = static_cast<T>(f32Value);
|
||||
}
|
||||
}
|
||||
|
||||
return static_cast<InferenceEngine::Blob::Ptr>(prec_blob);
|
||||
}
|
||||
|
||||
inline InferenceEngine::Blob::Ptr fp32_to_precision_blob(InferenceEngine::Blob::Ptr fp32_blob, InferenceEngine::Precision precision, float scale_factor) {
|
||||
InferenceEngine::Blob::Ptr result_ptr = nullptr;
|
||||
switch (precision) {
|
||||
case InferenceEngine::Precision::FP32:
|
||||
result_ptr = fp32_to_precision_blob<float>(fp32_blob, precision, scale_factor);
|
||||
break;
|
||||
case InferenceEngine::Precision::I32:
|
||||
result_ptr = fp32_to_precision_blob<int32_t>(fp32_blob, precision, scale_factor);
|
||||
break;
|
||||
case InferenceEngine::Precision::I16:
|
||||
result_ptr = fp32_to_precision_blob<int16_t>(fp32_blob, precision, scale_factor);
|
||||
break;
|
||||
case InferenceEngine::Precision::I8:
|
||||
result_ptr = fp32_to_precision_blob<int8_t>(fp32_blob, precision, scale_factor);
|
||||
break;
|
||||
default:
|
||||
THROW_GNA_EXCEPTION << "FP32 to " << precision << " not supported";
|
||||
}
|
||||
return result_ptr;
|
||||
}
|
||||
|
||||
template<class QuantDesc, class QuantFunc>
|
||||
inline void quantizeWeightsBiases(const QuantDesc & quantDesc,
|
||||
InferenceEngine::WeightableLayer *wl,
|
||||
@ -389,6 +433,18 @@ class DataQuantizer<Desc, InferenceEngine::CNNLayer *> : public DataQuantizerBas
|
||||
}
|
||||
cnnLayer->precision = Desc::mandatory().getInputPrecision();
|
||||
|
||||
if (cnnLayer->type == "Const") {
|
||||
if (cnnLayer->blobs["custom"]->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP16) {
|
||||
cnnLayer->blobs["custom"] = make_fp32_blob(cnnLayer->blobs["custom"]);
|
||||
}
|
||||
auto const_scale_factor = InferenceEngine::getInjectedData<QuantizedLayerParams>(*cnnLayer)->_dst_quant.scale;
|
||||
auto new_const_blob = InferenceEngine::Blob::CreateFromData(cnnLayer->outData[0]);
|
||||
auto const_blob = cnnLayer->blobs["custom"];
|
||||
if (const_blob->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32) {
|
||||
cnnLayer->blobs["custom"] = fp32_to_precision_blob(const_blob, cnnLayer->outData[0]->getPrecision(), const_scale_factor);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
@ -197,6 +197,36 @@ class ScaleFactorPerLayer<InferenceEngine::CNNLayer *> {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (cnnLayer->type == "Const") {
|
||||
auto blob = cnnLayer->blobs["custom"];
|
||||
if (blob->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP16) {
|
||||
blob = make_fp32_blob(blob);
|
||||
}
|
||||
auto max_val = std::numeric_limits<float>::min();
|
||||
auto min_val = std::numeric_limits<float>::max();
|
||||
|
||||
auto flt_buf = blob->buffer().as<float*>();
|
||||
auto size = blob->size();
|
||||
|
||||
for (int i=0; i < size; i++) {
|
||||
auto val = flt_buf[i];
|
||||
if (val > max_val) max_val = val;
|
||||
if (val < min_val) min_val = val;
|
||||
}
|
||||
|
||||
auto abs_val = std::max(std::abs(max_val), std::abs(min_val));
|
||||
auto scale_val = static_cast<float>(std::numeric_limits<int16_t>::max()) / abs_val;
|
||||
|
||||
// TODO: Investigate what should be the scale in such cases (31910)
|
||||
if (std::isinf(scale_val)) {
|
||||
quant->_dst_quant.scale = quant->_src_quant.scale;
|
||||
} else {
|
||||
quant->_dst_quant.scale = scale_val;
|
||||
}
|
||||
|
||||
return ScaleFactorUpdateResult();
|
||||
}
|
||||
|
||||
if (!CNNNetHasPrevLayer(cnnLayer)) {
|
||||
quant->_dst_quant.scale = quant->_src_quant.scale;
|
||||
return ScaleFactorUpdateResult();
|
||||
@ -231,6 +261,7 @@ class ScaleFactorPerLayer<InferenceEngine::EltwiseLayer*> {
|
||||
|
||||
auto quantParams0 = InferenceEngine::getInjectedData<QuantizedLayerParams>(in0);
|
||||
auto quantParams1 = InferenceEngine::getInjectedData<QuantizedLayerParams>(in1);
|
||||
|
||||
auto quantData = InferenceEngine::getInjectedData<QuantizedLayerParams>(*eltwiseLayer);
|
||||
|
||||
switch (eltwiseLayer->_operation) {
|
||||
@ -239,6 +270,7 @@ class ScaleFactorPerLayer<InferenceEngine::EltwiseLayer*> {
|
||||
quantData->_dst_quant.scale = quantParams0->_dst_quant.scale * quantParams1->_dst_quant.scale;
|
||||
break;
|
||||
}
|
||||
case InferenceEngine::EltwiseLayer::Sub:
|
||||
case InferenceEngine::EltwiseLayer::Sum: {
|
||||
// detect which input will be used as biases
|
||||
if (LayerInfo(in0).has32BOutput()) {
|
||||
@ -247,6 +279,7 @@ class ScaleFactorPerLayer<InferenceEngine::EltwiseLayer*> {
|
||||
}
|
||||
|
||||
// this path might result in significant data loss
|
||||
quantData->_bias_quant.scale = quantParams1->_dst_quant.scale / quantParams0->_dst_quant.scale;
|
||||
quantData->_weights_quant.scale = quantParams1->_dst_quant.scale / quantParams0->_dst_quant.scale;
|
||||
quantData->_dst_quant.scale = quantParams1->_dst_quant.scale;
|
||||
|
||||
|
@ -7,22 +7,28 @@
|
||||
#include "quantized_layer_params.hpp"
|
||||
#include "precision_utils.h"
|
||||
|
||||
inline InferenceEngine::Blob::Ptr make_fp32_blob(InferenceEngine::Blob::Ptr fp16_blob) {
|
||||
auto fp32_blob = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32,
|
||||
fp16_blob->getTensorDesc().getDims(), fp16_blob->getTensorDesc().getLayout() });
|
||||
fp32_blob->allocate();
|
||||
|
||||
int i = 0;
|
||||
for (auto& f32Value : *fp32_blob) {
|
||||
auto f16Value = fp16_blob->buffer().template as<InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type*>()[i++];
|
||||
f32Value = InferenceEngine::PrecisionUtils::f16tof32(f16Value);
|
||||
}
|
||||
|
||||
return static_cast<InferenceEngine::Blob::Ptr>(fp32_blob);
|
||||
}
|
||||
|
||||
inline void fp16_to_fp32(InferenceEngine::WeightableLayer *lp) {
|
||||
InferenceEngine::BlobMap newBlobs;
|
||||
for (auto& blob : lp->blobs) {
|
||||
if (blob.second->getTensorDesc().getPrecision() != InferenceEngine::Precision::FP16) {
|
||||
THROW_GNA_EXCEPTION << "Unsupported precision. Layer: " << lp->name << " , Blob: " << blob.first;
|
||||
}
|
||||
auto tmp =
|
||||
InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32,
|
||||
blob.second->getTensorDesc().getDims(), InferenceEngine::Layout::C });
|
||||
tmp->allocate();
|
||||
int i = 0;
|
||||
for (auto& f32Value : *tmp) {
|
||||
auto f16Value = blob.second->buffer().template as<InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type*>()[i++];
|
||||
f32Value = InferenceEngine::PrecisionUtils::f16tof32(f16Value);
|
||||
}
|
||||
newBlobs[blob.first] = tmp;
|
||||
auto fp32_blob = make_fp32_blob(blob.second);
|
||||
newBlobs[blob.first] = fp32_blob;
|
||||
}
|
||||
lp->_biases = newBlobs["biases"];
|
||||
lp->_weights = newBlobs["weights"];
|
||||
@ -44,6 +50,18 @@ inline bool convertWeights(InferenceEngine::CNNLayer* lp) {
|
||||
for (auto& dataItem : lp->outData) {
|
||||
dataItem->setPrecision(InferenceEngine::Precision::FP32);
|
||||
}
|
||||
InferenceEngine::BlobMap newBlobs;
|
||||
for (auto& blob_pair : lp->blobs) {
|
||||
auto blob_name = blob_pair.first;
|
||||
auto blob_ptr = blob_pair.second;
|
||||
if (blob_ptr->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP16) {
|
||||
auto new_blob = make_fp32_blob(blob_ptr);
|
||||
newBlobs[blob_name] = new_blob;
|
||||
} else {
|
||||
newBlobs[blob_name] = blob_ptr;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -185,17 +185,16 @@ void GNAGraphCompiler::ConstPrimitive(InferenceEngine::CNNLayerPtr constLayer)
|
||||
if (constLayer->blobs.find("custom") == constLayer->blobs.end()) {
|
||||
THROW_GNA_EXCEPTION << "const layer: " << constLayer->name << "doesn't have custom in blobs section";
|
||||
}
|
||||
auto constBlob = constLayer->blobs["custom"];
|
||||
auto const_blob = constLayer->blobs["custom"];
|
||||
|
||||
void* ptr_for_const_blob = &ptr_for_const_blob;
|
||||
connectOutput(constLayer, ptr_for_const_blob, constBlob->size());
|
||||
|
||||
const_connections[constLayer->name] = ptr_for_const_blob;
|
||||
const_connections[constLayer->name] = &const_connections[constLayer->name];
|
||||
void* ptr_for_const_blob = &const_connections[constLayer->name];
|
||||
|
||||
connectOutput(constLayer, ptr_for_const_blob, const_blob->byteSize());
|
||||
// TODO: segment type for bind, bind initializer not used - need refactor to separate bind and allocation requests
|
||||
// dont see practical use case when bind storage type need to be different that allocation type
|
||||
gnamem->readonly().bind_initializer(ptr_for_const_blob, [constBlob](void* data, size_t size) {
|
||||
ie_memcpy(data, size, constBlob->buffer(), constBlob->byteSize());
|
||||
gnamem->readonly().bind_initializer(ptr_for_const_blob, [const_blob](void* data, size_t size) {
|
||||
ie_memcpy(data, size, const_blob->buffer(), const_blob->byteSize());
|
||||
});
|
||||
}
|
||||
|
||||
@ -602,15 +601,35 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
if (cropLayer == nullptr) {
|
||||
return;
|
||||
}
|
||||
if (cropLayer->axis.size() > 1) {
|
||||
|
||||
IE_ASSERT(!layer->insData.empty());
|
||||
auto inputs = layer->insData.begin()->lock();
|
||||
|
||||
IE_ASSERT(!cropLayer->axis.empty());
|
||||
IE_ASSERT(cropLayer->axis.size() == cropLayer->dim.size());
|
||||
IE_ASSERT(cropLayer->axis.size() == cropLayer->offset.size());
|
||||
|
||||
std::vector<int> axis, dim, offset;
|
||||
for (int n = 0; n < cropLayer->axis.size(); n++) {
|
||||
uint32_t input_dim = FROM_IR_DIM(inputs, inputs->getDims().size() - cropLayer->axis[n]);
|
||||
// Exclude crop layer components that do nothing
|
||||
if (cropLayer->offset[n] == 0 && cropLayer->dim[n] == input_dim) {
|
||||
continue;
|
||||
}
|
||||
axis.push_back(cropLayer->axis[n]);
|
||||
dim.push_back(cropLayer->dim[n]);
|
||||
offset.push_back(cropLayer->offset[n]);
|
||||
}
|
||||
|
||||
if (axis.size() > 1) {
|
||||
THROW_GNA_EXCEPTION <<
|
||||
"Crop layer does not support the number of cropped dimensions = "
|
||||
<< cropLayer->axis.size() << ".";
|
||||
"Crop layer does not support the number of (non-trivial) cropped dimensions more than 1, provided: "
|
||||
<< axis.size() << ".";
|
||||
}
|
||||
|
||||
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
|
||||
size_t cropOffset = cropLayer->offset.back() * cropLayer->precision.size();
|
||||
size_t cropOutputSize = cropLayer->dim.back() * cropLayer->precision.size();
|
||||
size_t cropOffset = offset.front() * cropLayer->precision.size();
|
||||
size_t cropOutputSize = dim.front() * cropLayer->precision.size();
|
||||
|
||||
if (ALIGN64(cropOffset) == cropOffset) {
|
||||
// leave crop as it is
|
||||
@ -637,20 +656,18 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
} else {
|
||||
gnalog() << "Crop " << layer->name << " is being replaced by Affine layer...\n";
|
||||
IE_ASSERT(!layer->outData.empty());
|
||||
IE_ASSERT(!layer->insData.empty());
|
||||
auto outputs = *layer->outData.begin();
|
||||
auto inputs = layer->insData.begin()->lock();
|
||||
|
||||
// only 1D crops supported
|
||||
if (cropLayer->axis.size() != 1) {
|
||||
if (axis.size() != 1) {
|
||||
THROW_GNA_EXCEPTION << "only 1D crop layer supported: " << cropLayer->name;
|
||||
}
|
||||
|
||||
// TODO: add unit tests for 4d crops blobs
|
||||
uint32_t num_rows_in = FROM_IR_DIM(inputs, inputs->getDims().size() - cropLayer->axis[0]);
|
||||
uint32_t num_rows_in = FROM_IR_DIM(inputs, inputs->getDims().size() - axis.front());
|
||||
uint32_t num_columns_in = 1;
|
||||
|
||||
uint32_t num_rows_out = FROM_IR_DIM(outputs, inputs->getDims().size() - cropLayer->axis[0]);
|
||||
uint32_t num_rows_out = FROM_IR_DIM(outputs, inputs->getDims().size() - axis.front());
|
||||
uint32_t num_padding = ALIGN(num_rows_in, 8) - num_rows_in;
|
||||
|
||||
void* ptr_inputs = nullptr;
|
||||
@ -686,7 +703,7 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 0);
|
||||
connectOutput(layer, ptr_outputs, num_data_bytes_out);
|
||||
|
||||
FillWeightOfAligningFilter(layer, ptr_weights, cropLayer->offset.back(), (quantized == nullptr) ? false : true);
|
||||
FillWeightOfAligningFilter(layer, ptr_weights, offset.front(), (quantized == nullptr) ? false : true);
|
||||
|
||||
(quantized == nullptr) ?
|
||||
gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64) :
|
||||
@ -713,17 +730,27 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
int biasesLayerIdx = 1;
|
||||
|
||||
if (quantized) {
|
||||
if (eltwise._operation == EltwiseLayer::Sum) {
|
||||
switch (eltwise._operation) {
|
||||
case InferenceEngine::EltwiseLayer::Sum:
|
||||
case InferenceEngine::EltwiseLayer::Sub:
|
||||
{
|
||||
if (inputs4Bytes->getPrecision().size() != 4) {
|
||||
std::swap(inputs4Bytes, inputs2Bytes);
|
||||
biasesLayerIdx = 0;
|
||||
}
|
||||
GNA_LAYER_ASSERT(layer, inputs2Bytes->getPrecision().size() == 2);
|
||||
GNA_LAYER_ASSERT(layer, inputs4Bytes->getPrecision().size() == 4);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
case InferenceEngine::EltwiseLayer::Prod:
|
||||
{
|
||||
// for mul both inputs should be 2 bytes precision
|
||||
GNA_LAYER_ASSERT(layer, inputs2Bytes->getPrecision().size() == 2);
|
||||
GNA_LAYER_ASSERT(layer, inputs4Bytes->getPrecision().size() == 2);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
THROW_GNA_EXCEPTION << "Unsupported eltwise operation for quantization: " << eltwise._operation;
|
||||
}
|
||||
}
|
||||
|
||||
@ -767,6 +794,18 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 1 - biasesLayerIdx);
|
||||
|
||||
switch (eltwise._operation) {
|
||||
case EltwiseLayer::Sub:
|
||||
if (quantized == nullptr) {
|
||||
gnamem->readonly().push_value(ptr_weights, -1.0f, num_rows_out, 64);
|
||||
} else {
|
||||
auto scaledIdentity = -quantized->_weights_quant.scale;
|
||||
|
||||
auto quantizedIdentity = FLOAT_TO_INT16(std::min(scaledIdentity, static_cast<float>(INT16_MAX)));
|
||||
|
||||
gnamem->readonly().push_value<int16_t>(ptr_weights, quantizedIdentity, num_rows_out, 64);
|
||||
}
|
||||
connectInput(layer, ptr_biases, num_data_bytes_in, 0, biasesLayerIdx);
|
||||
break;
|
||||
case EltwiseLayer::Sum:
|
||||
if (quantized == nullptr) {
|
||||
gnamem->readonly().push_value(ptr_weights, 1.0f, num_rows_out, 64);
|
||||
|
@ -248,8 +248,6 @@ void Config::AdjustKeyMapValues() {
|
||||
key_config_map[CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS)] =
|
||||
gnaFlags.exclusive_async_requests ? PluginConfigParams::YES: PluginConfigParams::NO;
|
||||
key_config_map[GNA_CONFIG_KEY(PRECISION)] = gnaPrecision.name();
|
||||
key_config_map[CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS)] =
|
||||
gnaFlags.exclusive_async_requests ? PluginConfigParams::YES: PluginConfigParams::NO;
|
||||
key_config_map[GNA_CONFIG_KEY(PWL_UNIFORM_DESIGN)] =
|
||||
gnaFlags.uniformPwlDesign ? PluginConfigParams::YES: PluginConfigParams::NO;
|
||||
key_config_map[CONFIG_KEY(PERF_COUNT)] =
|
||||
|
@ -153,6 +153,15 @@ class LayerInfo {
|
||||
return dynamic_cast<const InferenceEngine::EltwiseLayer *>(layer)->_operation ==
|
||||
InferenceEngine::EltwiseLayer::Sum;
|
||||
}
|
||||
bool isEltwiseSub() const noexcept {
|
||||
IS_VALID();
|
||||
if (!isEltwise()) return false;
|
||||
// dynamic_cast<const InferenceEngine::EltwiseLayer *>(layer) is validated in isEltwise function
|
||||
// coverity[var_deref_op]
|
||||
return dynamic_cast<const InferenceEngine::EltwiseLayer *>(layer)->_operation ==
|
||||
InferenceEngine::EltwiseLayer::Sub;
|
||||
}
|
||||
|
||||
bool isEltwiseMul() const noexcept {
|
||||
IS_VALID();
|
||||
if (!isEltwise()) return false;
|
||||
|
@ -150,6 +150,7 @@ static std::vector<CNNLayerPtr> getCandidatesForIdentityInsertion(const CNNLayer
|
||||
auto prev1 = PrevFunctionalLayer(l, 1);
|
||||
|
||||
switch (eltwise->_operation) {
|
||||
case EltwiseLayer::Sub:
|
||||
case EltwiseLayer::Sum:
|
||||
if (!LayerInfo(prev0).has32BOutput() || !LayerInfo(prev1).has32BOutput()) {
|
||||
return prevLayers;
|
||||
@ -227,7 +228,7 @@ void InsertDiagonalLayerPass::run() {
|
||||
// for e mul if we have 2-4 - inputs we need to insert identity to put 4 bytes input into weights
|
||||
// for e mul if we have 4-4 - inputs we need to insert 2 identities to put both 4 bytes input into weights
|
||||
|
||||
if (eltwise->_operation != EltwiseLayer::Sum)
|
||||
if (eltwise->_operation != EltwiseLayer::Sum && eltwise->_operation != EltwiseLayer::Sub)
|
||||
continue;
|
||||
|
||||
auto prevLayer1 = CNNNetPrevLayerSkipCertain(l, 1, [](CNNLayerPtr ptr) {
|
||||
|
@ -392,7 +392,7 @@ std::shared_ptr<ngraph::Node> V10Parser::createNode(const std::vector<ngraph::Ou
|
||||
auto blobs = node.child("blobs");
|
||||
if (!blobs.empty()) {
|
||||
for (pugi::xml_node blob = blobs.first_child(); !blob.empty(); blob = blob.next_sibling()) {
|
||||
size_t size = GetUIntAttr(blob, "size", 0);
|
||||
size_t size = GetUInt64Attr(blob, "size", 0);
|
||||
uint64_t offset = GetUInt64Attr(blob, "offset", 0);
|
||||
Precision precision(Precision::U8);
|
||||
const std::string& preStr = GetStrAttr(blob, "precision", "");
|
||||
@ -787,7 +787,7 @@ std::shared_ptr<ngraph::Node> V10Parser::LayerCreator<ngraph::op::LSTMCell>::cre
|
||||
std::vector<float> activations_beta = getParameters<float>(dn, "activations_beta", {});
|
||||
float clip = GetFloatAttr(dn, "clip", 0.f);
|
||||
return std::make_shared<ngraph::op::LSTMCell>(inputs[0], inputs[1], inputs[2], inputs[3], inputs[4], inputs[5],
|
||||
GetUIntAttr(dn, "hidden_size"), ngraph::op::LSTMWeightsFormat::IFCO,
|
||||
GetUInt64Attr(dn, "hidden_size"), ngraph::op::LSTMWeightsFormat::IFCO,
|
||||
activations, activations_alpha, activations_beta, clip);
|
||||
}
|
||||
|
||||
@ -1365,8 +1365,8 @@ std::shared_ptr<ngraph::Node> V10Parser::LayerCreator<ngraph::op::Constant>::cre
|
||||
if (dn.empty())
|
||||
THROW_IE_EXCEPTION << "Cannot read parameter for " << getType() << " layer with name: " << layerParsePrms.name;
|
||||
|
||||
size_t offset = GetUIntAttr(dn, "offset");
|
||||
size_t size = GetUIntAttr(dn, "size");
|
||||
size_t offset = GetUInt64Attr(dn, "offset");
|
||||
size_t size = GetUInt64Attr(dn, "size");
|
||||
|
||||
if (!weights || weights->cbuffer() == nullptr)
|
||||
THROW_IE_EXCEPTION << "Cannot read network! The model requires weights data! "
|
||||
|
@ -166,12 +166,12 @@ private:
|
||||
class XmlDeserializer : public ngraph::AttributeVisitor {
|
||||
public:
|
||||
explicit XmlDeserializer(const pugi::xml_node& node): node(node) {}
|
||||
void on_attribute(const std::string& name, std::string& value) override {
|
||||
void on_adapter(const std::string& name, ngraph::ValueAccessor<std::string>& value) override {
|
||||
std::string val;
|
||||
if (!getStrAttribute(node.child("data"), name, val)) return;
|
||||
value = val;
|
||||
value.set(val);
|
||||
}
|
||||
void on_attribute(const std::string& name, bool& value) override {
|
||||
void on_adapter(const std::string& name, ngraph::ValueAccessor<bool>& value) override {
|
||||
std::string val;
|
||||
if (!getStrAttribute(node.child("data"), name, val)) return;
|
||||
std::transform(val.begin(), val.end(), val.begin(), [](char ch) {
|
||||
@ -184,7 +184,7 @@ private:
|
||||
bool is_false = false_names.find(val) != false_names.end();
|
||||
|
||||
if (!is_true && !is_false) return;
|
||||
value = is_true;
|
||||
value.set(is_true);
|
||||
}
|
||||
void on_adapter(const std::string& name, ngraph::ValueAccessor<void>& adapter) override {
|
||||
std::string val;
|
||||
|
@ -63,12 +63,8 @@ public:
|
||||
|
||||
CNNLayerPtr create();
|
||||
|
||||
void on_attribute(const std::string& name, std::string& value) override {
|
||||
params[name] = value;
|
||||
}
|
||||
|
||||
void on_attribute(const std::string& name, bool& value) override {
|
||||
params[name] = value ? "true" : "false";
|
||||
void on_adapter(const std::string& name, ::ngraph::ValueAccessor<bool> &value) override {
|
||||
params[name] = value.get() ? "true" : "false";
|
||||
}
|
||||
|
||||
void addSpecificCreator(const std::vector<std::string>& forTypes, const CreatorFor& creator) {
|
||||
@ -417,6 +413,15 @@ InferenceEngine::details::CNNLayerCreator::CNNLayerCreator(const std::shared_ptr
|
||||
res->params = params;
|
||||
return res;
|
||||
});
|
||||
|
||||
addSpecificCreator({"StaticShapeTopK"}, [](const std::shared_ptr<::ngraph::Node>& node,
|
||||
const std::map<std::string, std::string> params) -> CNNLayerPtr {
|
||||
LayerParams attrs = {node->get_friendly_name(), "TopK",
|
||||
details::convertPrecision(node->get_output_element_type(0))};
|
||||
auto res = std::make_shared<TopKLayer>(attrs);
|
||||
res->params = params;
|
||||
return res;
|
||||
});
|
||||
}
|
||||
|
||||
CNNLayerPtr InferenceEngine::details::CNNLayerCreator::create() {
|
||||
@ -530,7 +535,6 @@ std::shared_ptr<CNNNetworkImpl> convertFunctionToICNNNetwork(const std::shared_p
|
||||
std::make_shared<Builder::NodeConverter<::ngraph::op::Sign>>(),
|
||||
std::make_shared<Builder::NodeConverter<::ngraph::op::Sinh>>(),
|
||||
std::make_shared<Builder::NodeConverter<::ngraph::op::SquaredDifference>>(),
|
||||
std::make_shared<Builder::NodeConverter<::ngraph::op::v1::Select>>(),
|
||||
std::make_shared<Builder::NodeConverter<::ngraph::op::v1::Softmax>>(),
|
||||
std::make_shared<Builder::NodeConverter<::ngraph::op::v1::Split>>(),
|
||||
std::make_shared<Builder::NodeConverter<::ngraph::op::VariadicSplit>>(),
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <mutex>
|
||||
#include <algorithm>
|
||||
|
||||
#include <cnn_network_ngraph_impl.hpp>
|
||||
#include "blob_factory.hpp"
|
||||
@ -228,6 +229,12 @@ std::vector<CNNLayerPtr> ConstTransformer::foldConstSubgraphsInternal(const std:
|
||||
return remainingConstLayers;
|
||||
}
|
||||
|
||||
static std::vector<std::string> skipConstInfer = {
|
||||
"FakeQuantize",
|
||||
"Quantize",
|
||||
"CumSum" // Const inference function for CumSum is not implemented!
|
||||
};
|
||||
|
||||
const std::map<std::string, bool> ConstTransformer::getConstLayers(const std::vector<CNNLayerPtr>& sortedLayers) {
|
||||
std::map<std::string, bool> mapConstLayers;
|
||||
// collect all const layers, which inputs are const layers.
|
||||
@ -235,7 +242,7 @@ const std::map<std::string, bool> ConstTransformer::getConstLayers(const std::ve
|
||||
// Layers with "Shape" and "Const" type are Const by definition
|
||||
if (layer->type == "Shape" || layer->type == "Const") {
|
||||
mapConstLayers[layer->name] = false;
|
||||
} else if ((layer->type != "FakeQuantize") && (layer->type != "Quantize") && (!isForFakeQuantzie(*layer))) {
|
||||
} else if (std::find(skipConstInfer.begin(), skipConstInfer.end(), layer->type) == skipConstInfer.end() && !isForFakeQuantzie(*layer)) {
|
||||
bool isAllInputsConst = true;
|
||||
for (auto const& data : layer->insData) {
|
||||
auto creator = data.lock()->getCreatorLayer().lock();
|
||||
@ -336,7 +343,7 @@ const BlobMap ConstTransformer::getConstData(const std::map<std::string, bool>&
|
||||
};
|
||||
|
||||
for (const auto& layer : sortedLayers) {
|
||||
if (layer->type == "FakeQuantize" || layer->type == "Quantize") {
|
||||
if (std::find(skipConstInfer.begin(), skipConstInfer.end(), layer->type) != skipConstInfer.end()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -346,13 +353,13 @@ const BlobMap ConstTransformer::getConstData(const std::map<std::string, bool>&
|
||||
|
||||
auto implPtr = holder.getConstInferImpl(layer->type);
|
||||
if (!implPtr && !isForShape)
|
||||
if (layer->type != "FakeQuantize" && layer->type != "Quantize")
|
||||
if (std::find(skipConstInfer.begin(), skipConstInfer.end(), layer->type) == skipConstInfer.end())
|
||||
THROW_IE_EXCEPTION << "Failed to find reference implementation for `" + layer->name +
|
||||
"` Layer with `" + layer->type + "` Type on constant propagation";
|
||||
if (!isForShape) {
|
||||
auto outputBlobs = getOutputBlobs(layer->outData);
|
||||
auto inp = getInputBlobs(layer->insData, isForShape);
|
||||
if (layer->type != "FakeQuantize" && layer->type != "Quantize")
|
||||
if (std::find(skipConstInfer.begin(), skipConstInfer.end(), layer->type) == skipConstInfer.end())
|
||||
implPtr->infer(inp, layer->params, layer->blobs, outputBlobs);
|
||||
for (int i = 0; i < layer->outData.size(); i++) {
|
||||
std::string dataName = layer->outData[i]->getName();
|
||||
|
@ -1381,24 +1381,6 @@ CNNLayer::Ptr NodeConverter<ngraph::op::SquaredDifference>::createLayer(const st
|
||||
return res;
|
||||
}
|
||||
|
||||
template <>
|
||||
CNNLayer::Ptr NodeConverter<ngraph::op::v1::Select>::createLayer(const std::shared_ptr<ngraph::Node>& layer) const {
|
||||
LayerParams params = {layer->get_friendly_name(), "Select", details::convertPrecision(layer->get_output_element_type(0))};
|
||||
|
||||
auto res = std::make_shared<InferenceEngine::CNNLayer>(params);
|
||||
auto castedLayer = ngraph::as_type_ptr<ngraph::op::v1::Select>(layer);
|
||||
if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
|
||||
|
||||
auto broadcast = castedLayer->get_auto_broadcast().m_type;
|
||||
if (broadcast == ngraph::op::AutoBroadcastType::NUMPY) {
|
||||
res->params["auto_broadcast"] = "numpy";
|
||||
} else if (broadcast == ngraph::op::AutoBroadcastType::NONE) {
|
||||
res->params["auto_broadcast"] = "none";
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
template <>
|
||||
CNNLayer::Ptr NodeConverter<ngraph::op::DetectionOutput>::createLayer(
|
||||
const std::shared_ptr<ngraph::Node>& layer) const {
|
||||
|
@ -4,6 +4,10 @@
|
||||
|
||||
set(TARGET_NAME "MKLDNNPlugin")
|
||||
|
||||
if(ENABLE_LTO)
|
||||
ie_enable_lto()
|
||||
endif()
|
||||
|
||||
if (WIN32)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNOMINMAX")
|
||||
endif()
|
||||
@ -41,6 +45,7 @@ set(LAYERS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_resample_node.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_normalize_node.cpp
|
||||
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/list.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/batch_to_space.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/broadcast.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/convert.cpp
|
||||
@ -93,6 +98,7 @@ set(LAYERS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/topk.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/proposal.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/proposal_imp.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/nodes/cum_sum.cpp
|
||||
)
|
||||
|
||||
foreach(LAYER ${LAYERS})
|
||||
|
@ -51,7 +51,7 @@ struct primitive_desc_iterator : public handle<mkldnn_primitive_desc_iterator_t>
|
||||
|
||||
memory::primitive_desc fetch() const {
|
||||
memory::primitive_desc adesc;
|
||||
mkldnn_primitive_desc_t cdesc;
|
||||
mkldnn_primitive_desc_t cdesc = nullptr;
|
||||
|
||||
cdesc = mkldnn_primitive_desc_iterator_fetch(get());
|
||||
|
||||
@ -72,7 +72,7 @@ struct primitive_desc_iterator : public handle<mkldnn_primitive_desc_iterator_t>
|
||||
memory::primitive_desc src_primitive_desc(size_t index = 0) const {
|
||||
memory::primitive_desc adesc;
|
||||
memory::primitive_desc cdesc_elem;
|
||||
mkldnn_primitive_desc_t cdesc;
|
||||
mkldnn_primitive_desc_t cdesc = nullptr;
|
||||
cdesc_elem.reset(mkldnn_primitive_desc_iterator_fetch(get()));
|
||||
const_mkldnn_primitive_desc_t const_cdesc =
|
||||
mkldnn_primitive_desc_query_pd(cdesc_elem.get(),
|
||||
@ -86,7 +86,7 @@ struct primitive_desc_iterator : public handle<mkldnn_primitive_desc_iterator_t>
|
||||
memory::primitive_desc dst_primitive_desc(size_t index = 0) const {
|
||||
memory::primitive_desc adesc;
|
||||
memory::primitive_desc cdesc_elem;
|
||||
mkldnn_primitive_desc_t cdesc;
|
||||
mkldnn_primitive_desc_t cdesc = nullptr;
|
||||
cdesc_elem.reset(mkldnn_primitive_desc_iterator_fetch(get()));
|
||||
const_mkldnn_primitive_desc_t const_cdesc =
|
||||
mkldnn_primitive_desc_query_pd(cdesc_elem.get(),
|
||||
@ -101,7 +101,7 @@ struct primitive_desc_iterator : public handle<mkldnn_primitive_desc_iterator_t>
|
||||
memory::primitive_desc diff_src_primitive_desc(size_t index = 0) const {
|
||||
memory::primitive_desc adesc;
|
||||
memory::primitive_desc cdesc_elem;
|
||||
mkldnn_primitive_desc_t cdesc;
|
||||
mkldnn_primitive_desc_t cdesc = nullptr;
|
||||
cdesc_elem.reset(mkldnn_primitive_desc_iterator_fetch(get()));
|
||||
const_mkldnn_primitive_desc_t const_cdesc =
|
||||
mkldnn_primitive_desc_query_pd(cdesc_elem.get(),
|
||||
@ -115,7 +115,7 @@ struct primitive_desc_iterator : public handle<mkldnn_primitive_desc_iterator_t>
|
||||
memory::primitive_desc weights_primitive_desc(size_t index = 0) const {
|
||||
memory::primitive_desc adesc;
|
||||
memory::primitive_desc cdesc_elem;
|
||||
mkldnn_primitive_desc_t cdesc;
|
||||
mkldnn_primitive_desc_t cdesc = nullptr;
|
||||
cdesc_elem.reset(mkldnn_primitive_desc_iterator_fetch(get()));
|
||||
const_mkldnn_primitive_desc_t const_cdesc =
|
||||
mkldnn_primitive_desc_query_pd(cdesc_elem.get(),
|
||||
@ -129,7 +129,7 @@ struct primitive_desc_iterator : public handle<mkldnn_primitive_desc_iterator_t>
|
||||
memory::primitive_desc diff_dst_primitive_desc(size_t index = 0) const {
|
||||
memory::primitive_desc adesc;
|
||||
memory::primitive_desc cdesc_elem;
|
||||
mkldnn_primitive_desc_t cdesc;
|
||||
mkldnn_primitive_desc_t cdesc = nullptr;
|
||||
cdesc_elem.reset(mkldnn_primitive_desc_iterator_fetch(get()));
|
||||
const_mkldnn_primitive_desc_t const_cdesc =
|
||||
mkldnn_primitive_desc_query_pd(cdesc_elem.get(),
|
||||
@ -152,7 +152,7 @@ struct primitive_desc_iterator : public handle<mkldnn_primitive_desc_iterator_t>
|
||||
|
||||
template <typename T>
|
||||
void getPrimitiveDescriptor(T& pdesc) const {
|
||||
mkldnn_primitive_desc_t cdesc;
|
||||
mkldnn_primitive_desc_t cdesc = nullptr;
|
||||
|
||||
memory::primitive_desc cdescpd;
|
||||
|
||||
|
@ -151,7 +151,9 @@ void MKLDNNEdge::allocate(const void* mem_ptr) {
|
||||
auto inputDesc = getInputDesc();
|
||||
auto outputDesc = getOutputDesc();
|
||||
if (!MKLDNNExtensionUtils::initTensorsAreEqual(outputDesc, inputDesc) ||
|
||||
(inputDesc.getDims().size() > 0 && inputDesc.getDims()[0] != 1 && inputDesc != outputDesc))
|
||||
(inputDesc.getDims().size() > 0 && inputDesc.getDims()[0] != 1 &&
|
||||
(inputDesc.getPrecision() != outputDesc.getPrecision() ||
|
||||
inputDesc.getBlockingDesc() != outputDesc.getBlockingDesc())))
|
||||
THROW_IE_EXCEPTION << "Cannot allocate memory. Nodes have primitive descriptors with different formats.";
|
||||
if (inputDesc.getLayout() == InferenceEngine::Layout::ANY)
|
||||
THROW_IE_EXCEPTION << "Cannot get input descriptor!";
|
||||
|
@ -1,22 +0,0 @@
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "mkldnn_layers_dispatcher.hpp"
|
||||
#include <details/ie_exception.hpp>
|
||||
#include "nodes/list.hpp"
|
||||
#include <memory>
|
||||
|
||||
using namespace InferenceEngine;
|
||||
|
||||
namespace MKLDNNPlugin {
|
||||
|
||||
void addDefaultExtensions(MKLDNNExtensionManager::Ptr mngr) {
|
||||
if (!mngr)
|
||||
THROW_IE_EXCEPTION << "Cannot add default extensions! Extension manager is empty.";
|
||||
|
||||
auto defaultExtensions = std::make_shared<Extensions::Cpu::MKLDNNExtensions>();
|
||||
mngr->AddExtension(defaultExtensions);
|
||||
}
|
||||
|
||||
} // namespace MKLDNNPlugin
|
@ -1,13 +0,0 @@
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "mkldnn_extension_mngr.h"
|
||||
|
||||
namespace MKLDNNPlugin {
|
||||
|
||||
void addDefaultExtensions(MKLDNNExtensionManager::Ptr mngr);
|
||||
|
||||
} // namespace MKLDNNPlugin
|
@ -23,11 +23,14 @@ MKLDNNMemory::MKLDNNMemory(const engine& eng) : eng(eng) {}
|
||||
|
||||
size_t MKLDNNMemory::GetSize() const {
|
||||
uint8_t itemSize = MKLDNNExtensionUtils::sizeOfDataType(mkldnn::memory::data_type(GetDataType()));
|
||||
return GetElementsCount() * itemSize;
|
||||
}
|
||||
|
||||
size_t MKLDNNMemory::GetElementsCount() const {
|
||||
auto desc = GetDescriptor();
|
||||
std::vector<int> dims(desc.data.layout_desc.blocking.padding_dims,
|
||||
desc.data.layout_desc.blocking.padding_dims + desc.data.ndims);
|
||||
return std::accumulate(std::begin(dims), std::end(dims), (size_t) 1, std::multiplies<size_t>()) * itemSize;
|
||||
return std::accumulate(std::begin(dims), std::end(dims), (size_t) 1, std::multiplies<size_t>());
|
||||
}
|
||||
|
||||
void MKLDNNMemory::Create(memory::dims dims, memory::data_type data_type, memory::format format, const void* data) {
|
||||
@ -182,6 +185,7 @@ bool MKLDNNMemory::isConsistant(memory::dims dims, memory::format format) {
|
||||
case f::OhIw16o4i:
|
||||
case f::OIhw4i16o4i:
|
||||
case f::OhIw8o4i:
|
||||
case f::IOhw16o16i:
|
||||
ndims = 4; break;
|
||||
// DHW
|
||||
case f::ncdhw:
|
||||
@ -411,6 +415,7 @@ std::string MKLDNNMemory::formatToString(memory::format fmt) {
|
||||
case memory::OhIw8o4i: return "OhIw8o4i";
|
||||
case memory::OhIw16o4i: return "OhIw16o4i";
|
||||
case memory::OIhw4i16o4i: return "OIhw4i16o4i";
|
||||
case memory::IOhw16o16i: return "IOhw16o16i";
|
||||
|
||||
case memory::oidhw: return "oidhw";
|
||||
case memory::dhwio: return "dhwio";
|
||||
@ -718,6 +723,33 @@ MKLDNNMemoryDesc::operator InferenceEngine::TensorDesc() const {
|
||||
blkDims.push_back(16);
|
||||
layout = Layout::BLOCKED;
|
||||
break;
|
||||
case memory::OIhw8o8i:
|
||||
order = {0, 1, 2, 3, 0, 1};
|
||||
blkDims = dims;
|
||||
blkDims[0] = blkDims[0] / 8 + (blkDims[0] % 8 ? 1 : 0);
|
||||
blkDims[1] = blkDims[1] / 8 + (blkDims[1] % 8 ? 1 : 0);
|
||||
blkDims.push_back(8);
|
||||
blkDims.push_back(8);
|
||||
layout = Layout::BLOCKED;
|
||||
break;
|
||||
case memory::OIhw16o16i:
|
||||
order = {0, 1, 2, 3, 0, 1};
|
||||
blkDims = dims;
|
||||
blkDims[0] = blkDims[0] / 16 + (blkDims[0] % 16 ? 1 : 0);
|
||||
blkDims[1] = blkDims[1] / 16 + (blkDims[1] % 16 ? 1 : 0);
|
||||
blkDims.push_back(16);
|
||||
blkDims.push_back(16);
|
||||
layout = Layout::BLOCKED;
|
||||
break;
|
||||
case memory::IOhw16o16i:
|
||||
order = {1, 0, 2, 3, 0, 1};
|
||||
blkDims = dims;
|
||||
blkDims[0] = blkDims[0] / 16 + (blkDims[0] % 16 ? 1 : 0);
|
||||
blkDims[1] = blkDims[1] / 16 + (blkDims[1] % 16 ? 1 : 0);
|
||||
blkDims.push_back(16);
|
||||
blkDims.push_back(16);
|
||||
layout = Layout::BLOCKED;
|
||||
break;
|
||||
case memory::OIdhw8i8o:
|
||||
order = {0, 1, 2, 3, 4, 1, 0};
|
||||
blkDims = dims;
|
||||
@ -736,8 +768,26 @@ MKLDNNMemoryDesc::operator InferenceEngine::TensorDesc() const {
|
||||
blkDims.push_back(16);
|
||||
layout = Layout::BLOCKED;
|
||||
break;
|
||||
case memory::OIdhw8o8i:
|
||||
order = {0, 1, 2, 3, 4, 1, 0};
|
||||
blkDims = dims;
|
||||
blkDims[0] = blkDims[0] / 8 + (blkDims[0] % 8 ? 1 : 0);
|
||||
blkDims[1] = blkDims[1] / 8 + (blkDims[1] % 8 ? 1 : 0);
|
||||
blkDims.push_back(8);
|
||||
blkDims.push_back(8);
|
||||
layout = Layout::BLOCKED;
|
||||
break;
|
||||
case memory::OIdhw16o16i:
|
||||
order = {0, 1, 2, 3, 4, 0, 1};
|
||||
blkDims = dims;
|
||||
blkDims[0] = blkDims[0] / 16 + (blkDims[0] % 16 ? 1 : 0);
|
||||
blkDims[1] = blkDims[1] / 16 + (blkDims[1] % 16 ? 1 : 0);
|
||||
blkDims.push_back(16);
|
||||
blkDims.push_back(16);
|
||||
layout = Layout::BLOCKED;
|
||||
break;
|
||||
case memory::gOIhw4o4i:
|
||||
order = {0, 1, 2, 3, 4, 2, 1};
|
||||
order = {0, 1, 2, 3, 4, 1, 2};
|
||||
blkDims = dims;
|
||||
blkDims[1] = blkDims[1] / 4 + (blkDims[1] % 4 ? 1 : 0);
|
||||
blkDims[2] = blkDims[2] / 4 + (blkDims[2] % 4 ? 1 : 0);
|
||||
@ -754,6 +804,15 @@ MKLDNNMemoryDesc::operator InferenceEngine::TensorDesc() const {
|
||||
blkDims.push_back(8);
|
||||
layout = Layout::BLOCKED;
|
||||
break;
|
||||
case memory::gOIhw8o8i:
|
||||
order = {0, 1, 2, 3, 4, 1, 2};
|
||||
blkDims = dims;
|
||||
blkDims[1] = blkDims[1] / 8 + (blkDims[1] % 8 ? 1 : 0);
|
||||
blkDims[2] = blkDims[2] / 8 + (blkDims[2] % 8 ? 1 : 0);
|
||||
blkDims.push_back(8);
|
||||
blkDims.push_back(8);
|
||||
layout = Layout::BLOCKED;
|
||||
break;
|
||||
case memory::gOIhw16i16o:
|
||||
order = {0, 1, 2, 3, 4, 2, 1};
|
||||
blkDims = dims;
|
||||
@ -763,6 +822,15 @@ MKLDNNMemoryDesc::operator InferenceEngine::TensorDesc() const {
|
||||
blkDims.push_back(16);
|
||||
layout = Layout::BLOCKED;
|
||||
break;
|
||||
case memory::gOIhw16o16i:
|
||||
order = {0, 1, 2, 3, 4, 1, 2};
|
||||
blkDims = dims;
|
||||
blkDims[1] = blkDims[1] / 16 + (blkDims[1] % 16 ? 1 : 0);
|
||||
blkDims[2] = blkDims[2] / 16 + (blkDims[2] % 16 ? 1 : 0);
|
||||
blkDims.push_back(16);
|
||||
blkDims.push_back(16);
|
||||
layout = Layout::BLOCKED;
|
||||
break;
|
||||
case memory::OhIw8o4i:
|
||||
order = {0, 2, 1, 3, 0, 1};
|
||||
blkDims = dims;
|
||||
@ -1067,6 +1135,16 @@ MKLDNNMemoryDesc::MKLDNNMemoryDesc(const TensorDesc& tDesc):
|
||||
} else if (blkdDims[4] == 16 && blkdDims[5] == 16) {
|
||||
mkldnnFormat = memory::format::OIhw16i16o;
|
||||
}
|
||||
} else if (order.size() == 6 && order[0] == 0 && order[1] == 1 && order[2] == 2 && order[3] == 3 && order[4] == 0 && order[5] == 1) {
|
||||
if (blkdDims[4] == 8 && blkdDims[5] == 8) {
|
||||
mkldnnFormat = memory::format::OIhw8o8i;
|
||||
} else if (blkdDims[4] == 16 && blkdDims[5] == 16) {
|
||||
mkldnnFormat = memory::format::OIhw16o16i;
|
||||
}
|
||||
} else if (order.size() == 6 && order[0] == 1 && order[1] == 0 && order[2] == 2 && order[3] == 3 && order[4] == 0 && order[5] == 1) {
|
||||
if (blkdDims[4] == 16 && blkdDims[5] == 16) {
|
||||
mkldnnFormat = memory::format::IOhw16o16i;
|
||||
}
|
||||
} else if (order.size() == 5 && order[0] == 0 && order[1] == 1 && order[2] == 2 && order[3] == 3 && order[4] == 0) {
|
||||
if (blkdDims[4] == 8) {
|
||||
mkldnnFormat = memory::format::Ohwi8o;
|
||||
@ -1122,6 +1200,13 @@ MKLDNNMemoryDesc::MKLDNNMemoryDesc(const TensorDesc& tDesc):
|
||||
} else if (blkdDims[6] == 16) {
|
||||
mkldnnFormat = memory::format::OIdhw16i16o;
|
||||
}
|
||||
} else if (order.size() == 7 &&
|
||||
order[0] == 0 && order[1] == 1 && order[2] == 2 && order[3] == 3 && order[4] == 4 && order[5] == 0 && order[6] == 1) {
|
||||
if (blkdDims[6] == 8) {
|
||||
mkldnnFormat = memory::format::OIdhw8o8i;
|
||||
} else if (blkdDims[6] == 16) {
|
||||
mkldnnFormat = memory::format::OIdhw16o16i;
|
||||
}
|
||||
} else if (order.size() == 7 &&
|
||||
order[0] == 0 && order[1] == 2 && order[2] == 3 && order[3] == 1 && order[4] == 4 && order[5] == 0 && order[6] == 1) {
|
||||
if (blkdDims[5] == 8) {
|
||||
@ -1136,12 +1221,21 @@ MKLDNNMemoryDesc::MKLDNNMemoryDesc(const TensorDesc& tDesc):
|
||||
} else if (order.size() == 7 &&
|
||||
order[0] == 0 && order[1] == 1 && order[2] == 2 && order[3] == 3 && order[4] == 4 && order[5] == 2 && order[6] == 1) {
|
||||
if (blkdDims[6] == 4) {
|
||||
mkldnnFormat = memory::format::gOIhw4o4i;
|
||||
mkldnnFormat = memory::format::gOIhw4i4o;
|
||||
} else if (blkdDims[6] == 8) {
|
||||
mkldnnFormat = memory::format::gOIhw8i8o;
|
||||
} else if (blkdDims[6] == 16) {
|
||||
mkldnnFormat = memory::format::gOIhw16i16o;
|
||||
}
|
||||
} else if (order.size() == 7 &&
|
||||
order[0] == 0 && order[1] == 1 && order[2] == 2 && order[3] == 3 && order[4] == 4 && order[5] == 1 && order[6] == 2) {
|
||||
if (blkdDims[6] == 4) {
|
||||
mkldnnFormat = memory::format::gOIhw4o4i;
|
||||
} else if (blkdDims[6] == 8) {
|
||||
mkldnnFormat = memory::format::gOIhw8o8i;
|
||||
} else if (blkdDims[6] == 16) {
|
||||
mkldnnFormat = memory::format::gOIhw16o16i;
|
||||
}
|
||||
} else if (order.size() == 7 &&
|
||||
order[0] == 0 && order[1] == 1 && order[2] == 3 && order[3] == 2 && order[4] == 4 && order[5] == 1 && order[6] == 2) {
|
||||
if (blkdDims[5] == 8 && blkdDims[6] == 4) {
|
||||
|
@ -87,6 +87,7 @@ public:
|
||||
}
|
||||
|
||||
size_t GetSize() const;
|
||||
size_t GetElementsCount() const;
|
||||
|
||||
mkldnn::memory::format GetFormat() const {
|
||||
return static_cast<mkldnn::memory::format>(prim->get_primitive_desc().desc().data.format);
|
||||
|
@ -1017,12 +1017,17 @@ void MKLDNNNode::initOptimalPrimitiveDescriptor() {
|
||||
auto config = selected_pd->getConfig();
|
||||
if (!isInitConfig(config)) {
|
||||
for (size_t i = 0; i < config.inConfs.size(); i++) {
|
||||
config.inConfs[i].desc = getConfiguredInputDesc(config, i);
|
||||
// TensorDescriptor constructor which is called inside getConfiguredInputDesc incorrectly computes offset field.
|
||||
// What's why MKLDNNMemoryDesc routine is used to reinitialize TD with expected offset values.
|
||||
config.inConfs[i].desc = MKLDNNMemoryDesc(getConfiguredInputDesc(config, i));
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < config.outConfs.size(); i++) {
|
||||
config.outConfs[i].desc = getConfiguredOutputDesc(config, i);
|
||||
// TensorDescriptor constructor which is called inside getConfiguredOutputDesc incorrectly computes offset field.
|
||||
// What's why MKLDNNMemoryDesc routine is used to reinitialize TD with expected offset values.
|
||||
config.outConfs[i].desc = MKLDNNMemoryDesc(getConfiguredOutputDesc(config, i));
|
||||
}
|
||||
|
||||
initDescriptor(config);
|
||||
} else if (getType() != RNNSeq && getType() != RNNCell) {
|
||||
initDescriptor(config);
|
||||
|
@ -370,7 +370,10 @@ public:
|
||||
if (srcDescs.empty() || selectedDescs.empty())
|
||||
return false;
|
||||
for (size_t i = 0; i < srcDescs.size() && i < selectedDescs.size(); i++) {
|
||||
if (srcDescs[i] != selectedDescs[i].desc && srcDescs[i].getLayout() != InferenceEngine::Layout::ANY)
|
||||
if (!(srcDescs[i].getBlockingDesc() == selectedDescs[i].desc.getBlockingDesc() &&
|
||||
srcDescs[i].getPrecision() == selectedDescs[i].desc.getPrecision() &&
|
||||
srcDescs[i].getDims() == selectedDescs[i].desc.getDims()) &&
|
||||
srcDescs[i].getLayout() != InferenceEngine::Layout::ANY)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
@ -5,7 +5,6 @@
|
||||
#include "ie_metric_helpers.hpp"
|
||||
#include "mkldnn_plugin.h"
|
||||
#include "mkldnn_extension_mngr.h"
|
||||
#include "mkldnn_layers_dispatcher.hpp"
|
||||
#include "mkldnn_weights_cache.hpp"
|
||||
#include <cpp_interfaces/base/ie_plugin_base.hpp>
|
||||
#include <threading/ie_executor_manager.hpp>
|
||||
@ -15,6 +14,7 @@
|
||||
#include <tuple>
|
||||
#include <ie_system_conf.h>
|
||||
#include <generic_ie.hpp>
|
||||
#include <nodes/list.hpp>
|
||||
|
||||
#include "convert_function_to_cnn_network.hpp"
|
||||
#include <transformations/common_optimizations/common_optimizations.hpp>
|
||||
@ -23,6 +23,7 @@
|
||||
#include <transformations/convert_opset3_to_opset2/convert_opset3_to_opset2.hpp>
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
#include <ngraph/opsets/opset2.hpp>
|
||||
#include <ngraph/opsets/opset3.hpp>
|
||||
#include <ngraph/op/fused/gelu.hpp>
|
||||
|
||||
#if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
|
||||
@ -40,7 +41,7 @@ using namespace InferenceEngine;
|
||||
|
||||
Engine::Engine() {
|
||||
_pluginName = "CPU";
|
||||
addDefaultExtensions(extensionManager);
|
||||
extensionManager->AddExtension(std::make_shared<Extensions::Cpu::MKLDNNExtensions>());
|
||||
}
|
||||
|
||||
Engine::~Engine() {
|
||||
@ -83,7 +84,8 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::ICNNNetwork &network, const st
|
||||
const auto transformations_callback = [](const std::shared_ptr<const ::ngraph::Node> &node) -> bool {
|
||||
return std::dynamic_pointer_cast<const ::ngraph::opset2::Gelu>(node) ||
|
||||
std::dynamic_pointer_cast<const ::ngraph::opset2::BatchToSpace>(node) ||
|
||||
std::dynamic_pointer_cast<const ::ngraph::opset2::SpaceToBatch>(node);
|
||||
std::dynamic_pointer_cast<const ::ngraph::opset2::SpaceToBatch>(node) ||
|
||||
std::dynamic_pointer_cast<const ::ngraph::opset3::ShuffleChannels>(node);
|
||||
};
|
||||
auto nGraphFunc = clonedNetwork->getFunction();
|
||||
// Disable shape inference (WA for generic operations)
|
||||
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
|
||||
#include "argmax_imp.hpp"
|
||||
@ -49,7 +48,7 @@ private:
|
||||
argmax_conf conf;
|
||||
};
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<ArgMaxImpl>, ArgMax);
|
||||
REG_FACTORY_FOR(ArgMaxImpl, ArgMax);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
@ -4,6 +4,7 @@
|
||||
|
||||
#include "argmax_imp.hpp"
|
||||
|
||||
#include <cstring>
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
@ -181,7 +182,7 @@ void argmax_many_classes_has_axis(const float* src_data, float* dst_data, Shape
|
||||
vmask_type vmask;
|
||||
int s_index = i0 * dim * after_num + ib1 * block_size;
|
||||
|
||||
memset(reinterpret_cast<void*>(&vmax_values[0]), 0, sizeof(vmax_values));
|
||||
std::memset(reinterpret_cast<void*>(&vmax_values[0]), 0, sizeof(vmax_values));
|
||||
|
||||
auto vswap_func = [&](int index1, int index2) {
|
||||
vtmp = vmax_values[index1];
|
||||
|
@ -6,7 +6,7 @@
|
||||
|
||||
#include <ie_iextension.h>
|
||||
#include "ie_util_internal.hpp"
|
||||
#include "list.hpp"
|
||||
#include "nodes/list.hpp"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
@ -176,6 +176,21 @@ protected:
|
||||
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
||||
template <typename __prim>
|
||||
inline void extRegister(MKLDNNExtensions * extInstance, const char * __type) {
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
extInstance->AddExt(__type,
|
||||
[](const CNNLayer* layer) -> InferenceEngine::ILayerImplFactory* {
|
||||
return new __prim(layer);
|
||||
});
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
}
|
||||
|
||||
#define REG_FACTORY_FOR(__prim, __type) \
|
||||
void __prim ## __type(MKLDNNExtensions * extInstance) { \
|
||||
extRegister<ImplFactory<__prim>>(extInstance, #__type); \
|
||||
}
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
} // namespace InferenceEngine
|
||||
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
|
||||
#include <cmath>
|
||||
@ -219,7 +218,7 @@ private:
|
||||
std::vector<size_t> _crops_end;
|
||||
};
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<BatchToSpaceImpl>, BatchToSpace);
|
||||
REG_FACTORY_FOR(BatchToSpaceImpl, BatchToSpace);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
|
||||
#include <cmath>
|
||||
@ -129,7 +128,7 @@ private:
|
||||
const size_t BROADCAST_SHAPE = 1;
|
||||
};
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<BroadcastImpl>, Broadcast);
|
||||
REG_FACTORY_FOR(BroadcastImpl, Broadcast);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
|
||||
#include <cmath>
|
||||
@ -123,7 +122,7 @@ private:
|
||||
bool with_bins = false;
|
||||
};
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<BucketizeImpl>, Bucketize);
|
||||
REG_FACTORY_FOR(BucketizeImpl, Bucketize);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
@ -14,8 +14,16 @@ namespace Cpu {
|
||||
|
||||
#if defined(HAVE_AVX512F)
|
||||
namespace AVX512F {
|
||||
static inline __m512 _mm_uni_any_ps() {
|
||||
return __m512{};
|
||||
}
|
||||
|
||||
static inline __m512i _mm_uni_any_epi32() {
|
||||
return __m512i{};
|
||||
}
|
||||
|
||||
static inline __m512 _mm_uni_loadu_ps(const float* psrc) {
|
||||
return _mm512_loadu_ps(psrc);
|
||||
return _mm512_mask_loadu_ps(_mm_uni_any_ps(), (__mmask16)-1, psrc);
|
||||
}
|
||||
|
||||
static inline void _mm_uni_storeu_ps(float* pdst, const __m512& vec) {
|
||||
@ -62,8 +70,12 @@ namespace AVX512F {
|
||||
return _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vec0), _mm512_castps_si512(vec1)));
|
||||
}
|
||||
|
||||
static inline __m512i _mm_uni_set1_epi32(int value) {
|
||||
return _mm512_mask_set1_epi32(_mm_uni_any_epi32(), (__mmask16)-1, value);
|
||||
}
|
||||
|
||||
static inline __m512 _mm_uni_blendv_ps(__m512 vec0, __m512 vec1, __m512 vmask) {
|
||||
return _mm512_mask_blend_ps(_mm512_cmpneq_epi32_mask(_mm512_castps_si512(vmask), _mm512_set1_epi32(0)), vec0, vec1);
|
||||
return _mm512_mask_blend_ps(_mm512_cmpneq_epi32_mask(_mm512_castps_si512(vmask), _mm_uni_set1_epi32(0)), vec0, vec1);
|
||||
}
|
||||
|
||||
static inline __m512 _mm_uni_blendv_ps(__m512 vec0, __m512 vec1, __mmask16 vmask) {
|
||||
@ -90,10 +102,6 @@ namespace AVX512F {
|
||||
return _mm512_add_epi32(vec0, vec1);
|
||||
}
|
||||
|
||||
static inline __m512i _mm_uni_set1_epi32(int value) {
|
||||
return _mm512_set1_epi32(value);
|
||||
}
|
||||
|
||||
static inline __m512i _mm_uni_slli_epi32(__m512i vec, int value) {
|
||||
return _mm512_sll_epi32(vec, _mm_set1_epi64x(value));
|
||||
}
|
||||
@ -119,7 +127,7 @@ namespace AVX512F {
|
||||
}
|
||||
|
||||
static inline __m512 _mm_uni_cvtepi32_ps(__m512i vec) {
|
||||
return _mm512_cvtepi32_ps(vec);
|
||||
return _mm512_mask_cvtepi32_ps(_mm_uni_any_ps(), (__mmask16)-1, vec);
|
||||
}
|
||||
} // namespace AVX512F
|
||||
#elif defined(HAVE_AVX2)
|
||||
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
|
||||
#include <cmath>
|
||||
@ -129,7 +128,7 @@ private:
|
||||
std::string precision;
|
||||
};
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<ConvertImpl>, Convert);
|
||||
REG_FACTORY_FOR(ConvertImpl, Convert);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
|
||||
#include <cmath>
|
||||
@ -86,7 +85,7 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<CTCGreedyDecoderImpl>, CTCGreedyDecoder);
|
||||
REG_FACTORY_FOR(CTCGreedyDecoderImpl, CTCGreedyDecoder);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
230
inference-engine/src/mkldnn_plugin/nodes/cum_sum.cpp
Normal file
230
inference-engine/src/mkldnn_plugin/nodes/cum_sum.cpp
Normal file
@ -0,0 +1,230 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "ie_parallel.hpp"
|
||||
#include "ie_precision.hpp"
|
||||
|
||||
namespace InferenceEngine {
|
||||
namespace Extensions {
|
||||
namespace Cpu {
|
||||
|
||||
class CumSumImpl: public ExtLayerBase {
|
||||
enum { CUM_SUM_DATA, AXIS, numOfInputs };
|
||||
enum { N, C, D, H, W, numOfDims };
|
||||
bool exclusive;
|
||||
bool reverse;
|
||||
size_t axis = 0;
|
||||
std::vector<size_t> shape5d;
|
||||
|
||||
public:
|
||||
explicit CumSumImpl(const CNNLayer* layer) {
|
||||
try {
|
||||
layerName = layer->name;
|
||||
if ((layer->insData.size() != numOfInputs && layer->insData.size() != (numOfInputs - 1)) || layer->outData.size() != 1)
|
||||
THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "' has incorrect number of input/output edges!";
|
||||
|
||||
const auto &dataTensor = layer->insData[CUM_SUM_DATA].lock()->getTensorDesc();
|
||||
const auto &dataShape = dataTensor.getDims();
|
||||
if (dataShape.size() < 1 || dataShape.size() > 5) {
|
||||
THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "' doesn't support 'data' input tensor with rank: " << dataShape.size();
|
||||
}
|
||||
|
||||
exclusive = layer->GetParamAsBool("exclusive", false);
|
||||
reverse = layer->GetParamAsBool("reverse", false);
|
||||
|
||||
const auto& dataPrecision = dataTensor.getPrecision();
|
||||
if (dataPrecision != Precision::I8 && dataPrecision != Precision::U8 && dataPrecision != Precision::I16 && dataPrecision != Precision::I32 &&
|
||||
dataPrecision != Precision::FP32 && dataPrecision != Precision::I64 && dataPrecision != Precision::U64 && dataPrecision != Precision::BF16)
|
||||
THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "' has unsupported 'data' input precision: " << dataPrecision.name();
|
||||
|
||||
if (layer->insData.size() == numOfInputs) {
|
||||
const auto& axisTensor = layer->insData[AXIS].lock()->getTensorDesc();
|
||||
const auto& axisTensorPrec = layer->insData[AXIS].lock()->getTensorDesc().getPrecision();
|
||||
if (axisTensorPrec != Precision::I32 && axisTensorPrec != Precision::I64)
|
||||
THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "' has unsupported 'axis' input precision: " << axisTensorPrec.name();
|
||||
|
||||
const auto axisTensorRank = axisTensor.getDims().size();
|
||||
if (axisTensorRank != 0)
|
||||
THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "' doesn't support 'axis' input tensor with rank: " << axisTensorRank;
|
||||
}
|
||||
|
||||
if (dataShape != layer->outData[0]->getTensorDesc().getDims())
|
||||
THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "' has different 'data' input and output dimensions";
|
||||
|
||||
shape5d = get5dShape(dataShape);
|
||||
|
||||
LayerConfig config;
|
||||
for (size_t i = 0; i < layer->insData.size(); i++) {
|
||||
DataConfig inConfig;
|
||||
inConfig.inPlace = -1;
|
||||
inConfig.constant = false;
|
||||
|
||||
Precision inPrecision = layer->insData[i].lock()->getTensorDesc().getPrecision();
|
||||
if (inPrecision == Precision::BF16)
|
||||
inPrecision = Precision::FP32;
|
||||
const SizeVector& inDims = layer->insData[i].lock()->getTensorDesc().getDims();
|
||||
inConfig.desc = TensorDesc(inPrecision, inDims, InferenceEngine::TensorDesc::getLayoutByDims(inDims));
|
||||
|
||||
config.inConfs.push_back(inConfig);
|
||||
}
|
||||
DataConfig outConfig;
|
||||
outConfig.inPlace = -1;
|
||||
outConfig.constant = false;
|
||||
Precision outPrecision = layer->insData[CUM_SUM_DATA].lock()->getTensorDesc().getPrecision();
|
||||
if (outPrecision == Precision::BF16)
|
||||
outPrecision = Precision::FP32;
|
||||
const SizeVector& outDims = layer->outData[0]->getTensorDesc().getDims();
|
||||
outConfig.desc = TensorDesc(outPrecision, outDims, InferenceEngine::TensorDesc::getLayoutByDims(outDims));
|
||||
|
||||
config.outConfs.push_back(outConfig);
|
||||
|
||||
config.dynBatchSupport = false;
|
||||
confs.push_back(config);
|
||||
} catch (InferenceEngine::details::InferenceEngineException &ex) {
|
||||
errorMsg = ex.what();
|
||||
}
|
||||
}
|
||||
|
||||
StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
|
||||
if (inputs.size() == numOfInputs)
|
||||
axis = getAxis(inputs[AXIS], inputs[CUM_SUM_DATA]);
|
||||
|
||||
const auto &dataPrecision = inputs[CUM_SUM_DATA]->getTensorDesc().getPrecision();
|
||||
switch (dataPrecision) {
|
||||
case Precision::I8 : { execImpl<int8_t>(inputs[CUM_SUM_DATA], outputs[0]); break; }
|
||||
case Precision::U8 : { execImpl<uint8_t>(inputs[CUM_SUM_DATA], outputs[0]); break; }
|
||||
case Precision::I16 : { execImpl<int16_t>(inputs[CUM_SUM_DATA], outputs[0]); break; }
|
||||
case Precision::I32 : { execImpl<int32_t>(inputs[CUM_SUM_DATA], outputs[0]); break; }
|
||||
case Precision::FP32 : { execImpl<float>(inputs[CUM_SUM_DATA], outputs[0]); break; }
|
||||
case Precision::I64 : { execImpl<int64_t>(inputs[CUM_SUM_DATA], outputs[0]); break; }
|
||||
case Precision::U64 : { execImpl<uint64_t>(inputs[CUM_SUM_DATA], outputs[0]); break; }
|
||||
default : {
|
||||
if (resp) {
|
||||
std::string errorMsg = "CumSum layer with name '" + layerName + "' has unsupported 'data' input precision: " + dataPrecision.name();
|
||||
errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
|
||||
}
|
||||
return GENERAL_ERROR;
|
||||
}
|
||||
}
|
||||
return OK;
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename dataType>
|
||||
void execImpl(const Blob::CPtr& _input, const Blob::Ptr& _output) {
|
||||
const auto *input = _input->cbuffer().as<const dataType *>() + _input->getTensorDesc().getBlockingDesc().getOffsetPadding();
|
||||
auto *output = _output->buffer().as<dataType *>() + _output->getTensorDesc().getBlockingDesc().getOffsetPadding();
|
||||
const size_t offset = _input->getTensorDesc().getBlockingDesc().getStrides()[axis];
|
||||
|
||||
if (reverse) {
|
||||
if (exclusive) {
|
||||
cumSum<true, true, dataType>(input, output, offset);
|
||||
} else {
|
||||
cumSum<true, false, dataType>(input, output, offset);
|
||||
}
|
||||
} else {
|
||||
if (exclusive) {
|
||||
cumSum<false, true, dataType>(input, output, offset);
|
||||
} else {
|
||||
cumSum<false, false, dataType>(input, output, offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <bool reverse, bool exclusive, typename dataType>
|
||||
void cumSum(const dataType *input, dataType *output, const size_t &offset) {
|
||||
std::vector<size_t> iterationRange(numOfDims - 1);
|
||||
size_t j = 0;
|
||||
for (size_t i = 0; i < shape5d.size(); i++) {
|
||||
if (i == axis)
|
||||
continue;
|
||||
iterationRange[j++] = shape5d[i];
|
||||
}
|
||||
parallel_for4d(iterationRange[0], iterationRange[1], iterationRange[2], iterationRange[3], [&](size_t ir0, size_t ir1, size_t ir2, size_t ir3) {
|
||||
std::vector<size_t> forStartOffset;
|
||||
forStartOffset.push_back(ir0); forStartOffset.push_back(ir1); forStartOffset.push_back(ir2); forStartOffset.push_back(ir3);
|
||||
forStartOffset.insert(forStartOffset.begin() + axis, 0);
|
||||
size_t startOffset = getStartOffset(forStartOffset);
|
||||
|
||||
const dataType *inputStart = input + startOffset;
|
||||
dataType *outputStart = output + startOffset;
|
||||
|
||||
if (reverse) {
|
||||
if (exclusive) {
|
||||
outputStart[offset*(shape5d[axis] - 1)] = 0;
|
||||
for (int64_t i = shape5d[axis] - 2; i >= 0; i--) {
|
||||
outputStart[i*offset] = inputStart[(i+1)*offset] + outputStart[(i+1)*offset];
|
||||
}
|
||||
} else {
|
||||
outputStart[offset*(shape5d[axis] - 1)] = inputStart[offset*(shape5d[axis] - 1)];
|
||||
for (int64_t i = shape5d[axis] - 2; i >= 0; i--) {
|
||||
outputStart[i*offset] = inputStart[i*offset] + outputStart[(i+1)*offset];
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (exclusive) {
|
||||
outputStart[0] = 0;
|
||||
for (size_t i = 1; i < shape5d[axis]; i++) {
|
||||
outputStart[i*offset] = inputStart[(i-1)*offset] + outputStart[(i-1)*offset];
|
||||
}
|
||||
} else {
|
||||
outputStart[0] = inputStart[0];
|
||||
for (size_t i = 1; i < shape5d[axis]; i++) {
|
||||
outputStart[i*offset] = inputStart[i*offset] + outputStart[(i-1)*offset];
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
size_t getStartOffset(std::vector<size_t> &forStartOffset) {
|
||||
return forStartOffset[N]*shape5d[C]*shape5d[D]*shape5d[H]*shape5d[W] + forStartOffset[C]*shape5d[D]*shape5d[H]*shape5d[W] +
|
||||
forStartOffset[D]*shape5d[H]*shape5d[W] + forStartOffset[H]*shape5d[W] + forStartOffset[W];
|
||||
}
|
||||
|
||||
size_t getAxis(const Blob::CPtr& _axis, const Blob::CPtr& _data) {
|
||||
const auto& axisPrecision = _axis->getTensorDesc().getPrecision();
|
||||
const int64_t dataShapeSize = static_cast<int64_t>(_data->getTensorDesc().getDims().size());
|
||||
int64_t axisValueFromBlob;
|
||||
switch (axisPrecision) {
|
||||
case Precision::I32 : {
|
||||
const auto *axisPtr = _axis->cbuffer().as<const int32_t *>();
|
||||
axisValueFromBlob = static_cast<int64_t>(axisPtr[0]);
|
||||
break;
|
||||
}
|
||||
case Precision::I64 : {
|
||||
const auto *axisPtr = _axis->cbuffer().as<const int64_t *>();
|
||||
axisValueFromBlob = axisPtr[0];
|
||||
break;
|
||||
}
|
||||
default : {
|
||||
THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "' doesn't support 'axis' input with precision: " << axisPrecision.name();
|
||||
}
|
||||
}
|
||||
if (axisValueFromBlob < -dataShapeSize || axisValueFromBlob > dataShapeSize - 1)
|
||||
THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "' has axis with a value out of range: " << axisValueFromBlob;
|
||||
return axisValueFromBlob >= 0 ? axisValueFromBlob : (axisValueFromBlob + dataShapeSize);
|
||||
}
|
||||
|
||||
std::vector<size_t> get5dShape(const SizeVector& dims) {
|
||||
std::vector<size_t> shape5d(numOfDims, 1);
|
||||
for (size_t i = 0; i < dims.size(); i++)
|
||||
shape5d[i] = dims[i];
|
||||
return shape5d;
|
||||
}
|
||||
|
||||
private:
|
||||
std::string layerName;
|
||||
};
|
||||
|
||||
REG_FACTORY_FOR(CumSumImpl, CumSum);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
} // namespace InferenceEngine
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
|
||||
#include <cmath>
|
||||
@ -118,7 +117,7 @@ private:
|
||||
size_t ownStrides[CNTR_SIZE];
|
||||
};
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<DepthToSpaceImpl>, DepthToSpace);
|
||||
REG_FACTORY_FOR(DepthToSpaceImpl, DepthToSpace);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
|
||||
#include <cfloat>
|
||||
@ -604,7 +603,7 @@ void DetectionOutputImpl::nms_mx(const float* conf_data,
|
||||
}
|
||||
}
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<DetectionOutputImpl>, DetectionOutput);
|
||||
REG_FACTORY_FOR(DetectionOutputImpl, DetectionOutput);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
|
||||
#include <cassert>
|
||||
@ -392,7 +391,7 @@ private:
|
||||
|
||||
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<ExperimentalDetectronDetectionOutputImpl>, ExperimentalDetectronDetectionOutput);
|
||||
REG_FACTORY_FOR(ExperimentalDetectronDetectionOutputImpl, ExperimentalDetectronDetectionOutput);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
|
||||
#include <cmath>
|
||||
@ -121,7 +120,7 @@ private:
|
||||
const size_t FILL_VALUE = 1;
|
||||
};
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<FillImpl>, Fill);
|
||||
REG_FACTORY_FOR(FillImpl, Fill);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
|
||||
#include <cmath>
|
||||
@ -148,7 +147,7 @@ private:
|
||||
};
|
||||
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<GatherImpl>, Gather);
|
||||
REG_FACTORY_FOR(GatherImpl, Gather);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
|
||||
#include <cmath>
|
||||
@ -150,7 +149,7 @@ private:
|
||||
InferenceEngine::Precision precision;
|
||||
};
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<GatherTreeImpl>, GatherTree);
|
||||
REG_FACTORY_FOR(GatherTreeImpl, GatherTree);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
|
||||
#include <cmath>
|
||||
@ -58,7 +57,7 @@ private:
|
||||
float bias = 1.0f;
|
||||
};
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<GRNImpl>, GRN);
|
||||
REG_FACTORY_FOR(GRNImpl, GRN);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
@ -434,7 +433,7 @@ private:
|
||||
}
|
||||
};
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<InterpImpl>, Interp);
|
||||
REG_FACTORY_FOR(InterpImpl, Interp);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
29
inference-engine/src/mkldnn_plugin/nodes/list.cpp
Normal file
29
inference-engine/src/mkldnn_plugin/nodes/list.cpp
Normal file
@ -0,0 +1,29 @@
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "nodes/list.hpp"
|
||||
|
||||
namespace InferenceEngine {
|
||||
namespace Extensions {
|
||||
namespace Cpu {
|
||||
|
||||
#define FACTORY_DECLARATION(__prim, __type) \
|
||||
void __prim ## __type(MKLDNNExtensions * extInstance)
|
||||
|
||||
#define FACTORY_CALL(__prim, __type) \
|
||||
__prim ## __type(this)
|
||||
|
||||
#define MKLDNN_EXTENSION_NODE(__prim, __type) FACTORY_DECLARATION(__prim, __type)
|
||||
# include "list_tbl.hpp"
|
||||
#undef MKLDNN_EXTENSION_NODE
|
||||
|
||||
MKLDNNExtensions::MKLDNNExtensions() {
|
||||
#define MKLDNN_EXTENSION_NODE(__prim, __type) FACTORY_CALL(__prim, __type)
|
||||
# include "list_tbl.hpp"
|
||||
#undef MKLDNN_EXTENSION_NODE
|
||||
}
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
} // namespace InferenceEngine
|
@ -12,17 +12,6 @@
|
||||
#include <memory>
|
||||
#include <algorithm>
|
||||
|
||||
// WA for xbyak.h
|
||||
#ifdef _WIN32
|
||||
# ifndef _WINSOCKAPI_
|
||||
# define _WINSOCKAPI_
|
||||
# endif
|
||||
# ifndef _WINSOCK2API_
|
||||
# define _WINSOCK2API_
|
||||
# endif
|
||||
#endif
|
||||
#include <cpu_isa_traits.hpp>
|
||||
|
||||
namespace InferenceEngine {
|
||||
namespace Extensions {
|
||||
namespace Cpu {
|
||||
@ -37,14 +26,16 @@ struct ExtensionsHolder {
|
||||
|
||||
class MKLDNNExtensions : public IExtension {
|
||||
public:
|
||||
MKLDNNExtensions();
|
||||
|
||||
StatusCode getPrimitiveTypes(char**& types, unsigned int& size, ResponseDesc* resp) noexcept override {
|
||||
collectTypes(types, size, MKLDNNExtensions::GetExtensionsHolder()->list);
|
||||
collectTypes(types, size, extensionsHolder->list);
|
||||
return OK;
|
||||
}
|
||||
|
||||
StatusCode
|
||||
getFactoryFor(ILayerImplFactory*& factory, const CNNLayer* cnnLayer, ResponseDesc* resp) noexcept override {
|
||||
auto& factories = MKLDNNExtensions::GetExtensionsHolder()->list;
|
||||
auto& factories = extensionsHolder->list;
|
||||
if (factories.find(cnnLayer->type) == factories.end()) {
|
||||
std::string errorMsg = std::string("Factory for ") + cnnLayer->type + " wasn't found!";
|
||||
errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
|
||||
@ -78,21 +69,13 @@ public:
|
||||
delete this;
|
||||
}
|
||||
|
||||
static void AddExt(std::string name, ext_factory factory) {
|
||||
auto extensionsHolder = GetExtensionsHolder();
|
||||
if (extensionsHolder != nullptr)
|
||||
extensionsHolder->list[name] = factory;
|
||||
}
|
||||
|
||||
static std::shared_ptr<ExtensionsHolder> GetExtensionsHolder() {
|
||||
static std::shared_ptr<ExtensionsHolder> localHolder;
|
||||
if (localHolder == nullptr) {
|
||||
localHolder = std::make_shared<ExtensionsHolder>();
|
||||
}
|
||||
return localHolder;
|
||||
void AddExt(std::string name, ext_factory factory) {
|
||||
extensionsHolder->list[name] = factory;
|
||||
}
|
||||
|
||||
private:
|
||||
std::shared_ptr<ExtensionsHolder> extensionsHolder = std::make_shared<ExtensionsHolder>();
|
||||
|
||||
template<class T>
|
||||
void collectTypes(char**& types, unsigned int& size, const std::map<std::string, T> &factories) {
|
||||
types = new char *[factories.size()];
|
||||
@ -108,22 +91,6 @@ private:
|
||||
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
||||
template<typename Ext>
|
||||
class ExtRegisterBase {
|
||||
public:
|
||||
explicit ExtRegisterBase(const std::string& type) {
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
MKLDNNExtensions::AddExt(type,
|
||||
[](const CNNLayer* layer) -> InferenceEngine::ILayerImplFactory* {
|
||||
return new Ext(layer);
|
||||
});
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
}
|
||||
};
|
||||
|
||||
#define REG_FACTORY_FOR(__prim, __type) \
|
||||
static ExtRegisterBase<__prim> __reg__##__type(#__type)
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
} // namespace InferenceEngine
|
||||
|
93
inference-engine/src/mkldnn_plugin/nodes/list_tbl.hpp
Normal file
93
inference-engine/src/mkldnn_plugin/nodes/list_tbl.hpp
Normal file
@ -0,0 +1,93 @@
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#ifndef MKLDNN_EXTENSION_NODE
|
||||
# warning "MKLDNN_EXTENSION_NODE is not defined"
|
||||
# define MKLDNN_EXTENSION_NODE(__prim, __type)
|
||||
#endif
|
||||
|
||||
MKLDNN_EXTENSION_NODE(PriorBoxImpl, PriorBox);
|
||||
MKLDNN_EXTENSION_NODE(MathImpl, Abs);
|
||||
MKLDNN_EXTENSION_NODE(MathImpl, Acos);
|
||||
MKLDNN_EXTENSION_NODE(MathImpl, Acosh);
|
||||
MKLDNN_EXTENSION_NODE(MathImpl, Asin);
|
||||
MKLDNN_EXTENSION_NODE(MathImpl, Asinh);
|
||||
MKLDNN_EXTENSION_NODE(MathImpl, Atan);
|
||||
MKLDNN_EXTENSION_NODE(MathImpl, Atanh);
|
||||
MKLDNN_EXTENSION_NODE(MathImpl, Ceil);
|
||||
MKLDNN_EXTENSION_NODE(MathImpl, Cos);
|
||||
MKLDNN_EXTENSION_NODE(MathImpl, Cosh);
|
||||
MKLDNN_EXTENSION_NODE(MathImpl, Erf);
|
||||
MKLDNN_EXTENSION_NODE(MathImpl, Floor);
|
||||
MKLDNN_EXTENSION_NODE(MathImpl, HardSigmoid);
|
||||
MKLDNN_EXTENSION_NODE(MathImpl, Log);
|
||||
MKLDNN_EXTENSION_NODE(MathImpl, Neg);
|
||||
MKLDNN_EXTENSION_NODE(MathImpl, Reciprocal);
|
||||
MKLDNN_EXTENSION_NODE(MathImpl, Selu);
|
||||
MKLDNN_EXTENSION_NODE(MathImpl, Sign);
|
||||
MKLDNN_EXTENSION_NODE(MathImpl, Sin);
|
||||
MKLDNN_EXTENSION_NODE(MathImpl, Sinh);
|
||||
MKLDNN_EXTENSION_NODE(MathImpl, Softplus);
|
||||
MKLDNN_EXTENSION_NODE(MathImpl, Softsign);
|
||||
MKLDNN_EXTENSION_NODE(MathImpl, Tan);
|
||||
MKLDNN_EXTENSION_NODE(ExperimentalDetectronTopKROIsImpl, ExperimentalDetectronTopKROIs);
|
||||
MKLDNN_EXTENSION_NODE(ReverseSequenceImpl, ReverseSequence);
|
||||
MKLDNN_EXTENSION_NODE(DetectionOutputImpl, DetectionOutput);
|
||||
MKLDNN_EXTENSION_NODE(ArgMaxImpl, ArgMax);
|
||||
MKLDNN_EXTENSION_NODE(UnsqueezeImpl, Unsqueeze);
|
||||
MKLDNN_EXTENSION_NODE(StridedSliceImpl, StridedSlice);
|
||||
MKLDNN_EXTENSION_NODE(ExperimentalDetectronDetectionOutputImpl, ExperimentalDetectronDetectionOutput);
|
||||
MKLDNN_EXTENSION_NODE(RegionYoloImpl, RegionYolo);
|
||||
MKLDNN_EXTENSION_NODE(LogSoftmaxImpl, LogSoftmax);
|
||||
MKLDNN_EXTENSION_NODE(ReorgYoloImpl, ReorgYolo);
|
||||
MKLDNN_EXTENSION_NODE(SqueezeImpl, Squeeze);
|
||||
MKLDNN_EXTENSION_NODE(ConvertImpl, Convert);
|
||||
MKLDNN_EXTENSION_NODE(FillImpl, Fill);
|
||||
MKLDNN_EXTENSION_NODE(UniqueImpl, Unique);
|
||||
MKLDNN_EXTENSION_NODE(PSROIPoolingImpl, PSROIPooling);
|
||||
MKLDNN_EXTENSION_NODE(DepthToSpaceImpl, DepthToSpace);
|
||||
MKLDNN_EXTENSION_NODE(ScatterImpl, ScatterUpdate);
|
||||
MKLDNN_EXTENSION_NODE(OneHotImpl, OneHot);
|
||||
MKLDNN_EXTENSION_NODE(BroadcastImpl, Broadcast);
|
||||
MKLDNN_EXTENSION_NODE(ExperimentalSparseWeightedReduceImpl, ExperimentalSparseWeightedSum);
|
||||
MKLDNN_EXTENSION_NODE(SparseToDenseImpl, SparseToDense);
|
||||
MKLDNN_EXTENSION_NODE(ExperimentalDetectronROIFeatureExtractorImpl, ExperimentalDetectronROIFeatureExtractor);
|
||||
MKLDNN_EXTENSION_NODE(ONNXCustomProposalImpl, ExperimentalDetectronGenerateProposalsSingleImage);
|
||||
MKLDNN_EXTENSION_NODE(NonMaxSuppressionImpl, NonMaxSuppression);
|
||||
MKLDNN_EXTENSION_NODE(TopKImpl, TopK);
|
||||
MKLDNN_EXTENSION_NODE(ShuffleChannelsImpl, ShuffleChannels);
|
||||
MKLDNN_EXTENSION_NODE(SpaceToDepthImpl, SpaceToDepth);
|
||||
MKLDNN_EXTENSION_NODE(PowerFileImpl, PowerFile);
|
||||
MKLDNN_EXTENSION_NODE(InterpImpl, Interp);
|
||||
MKLDNN_EXTENSION_NODE(BatchToSpaceImpl, BatchToSpace);
|
||||
MKLDNN_EXTENSION_NODE(ExperimentalDetectronPriorGridGeneratorImpl, ExperimentalDetectronPriorGridGenerator);
|
||||
MKLDNN_EXTENSION_NODE(SimplerNMSImpl, SimplerNMS);
|
||||
MKLDNN_EXTENSION_NODE(PadImpl, Pad);
|
||||
MKLDNN_EXTENSION_NODE(GRNImpl, GRN);
|
||||
MKLDNN_EXTENSION_NODE(SparseFillEmptyRowsImpl, SparseFillEmptyRows);
|
||||
MKLDNN_EXTENSION_NODE(BucketizeImpl, Bucketize);
|
||||
MKLDNN_EXTENSION_NODE(CTCGreedyDecoderImpl, CTCGreedyDecoder);
|
||||
MKLDNN_EXTENSION_NODE(GatherImpl, Gather);
|
||||
MKLDNN_EXTENSION_NODE(ProposalImpl, Proposal);
|
||||
MKLDNN_EXTENSION_NODE(RangeImpl, Range);
|
||||
MKLDNN_EXTENSION_NODE(SelectImpl, Select);
|
||||
MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceAnd);
|
||||
MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceL1);
|
||||
MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceL2);
|
||||
MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceLogSum);
|
||||
MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceLogSumExp);
|
||||
MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceMax);
|
||||
MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceMean);
|
||||
MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceMin);
|
||||
MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceOr);
|
||||
MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceProd);
|
||||
MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceSum);
|
||||
MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceSumSquare);
|
||||
MKLDNN_EXTENSION_NODE(GatherTreeImpl, GatherTree);
|
||||
MKLDNN_EXTENSION_NODE(PriorBoxClusteredImpl, PriorBoxClustered);
|
||||
MKLDNN_EXTENSION_NODE(SpaceToBatchImpl, SpaceToBatch);
|
||||
MKLDNN_EXTENSION_NODE(SparseSegmentReduceImpl, SparseSegmentMean);
|
||||
MKLDNN_EXTENSION_NODE(SparseSegmentReduceImpl, SparseSegmentSqrtN);
|
||||
MKLDNN_EXTENSION_NODE(SparseSegmentReduceImpl, SparseSegmentSum);
|
||||
MKLDNN_EXTENSION_NODE(CumSumImpl, CumSum);
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
|
||||
#include <cmath>
|
||||
@ -105,7 +104,7 @@ private:
|
||||
bool is_last_dim = false;
|
||||
};
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<LogSoftmaxImpl>, LogSoftmax);
|
||||
REG_FACTORY_FOR(LogSoftmaxImpl, LogSoftmax);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
|
||||
#include <cmath>
|
||||
@ -272,29 +271,29 @@ private:
|
||||
float gamma = 0.0f;
|
||||
};
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<MathImpl>, Abs);
|
||||
REG_FACTORY_FOR(ImplFactory<MathImpl>, Acos);
|
||||
REG_FACTORY_FOR(ImplFactory<MathImpl>, Acosh);
|
||||
REG_FACTORY_FOR(ImplFactory<MathImpl>, Asin);
|
||||
REG_FACTORY_FOR(ImplFactory<MathImpl>, Asinh);
|
||||
REG_FACTORY_FOR(ImplFactory<MathImpl>, Atan);
|
||||
REG_FACTORY_FOR(ImplFactory<MathImpl>, Atanh);
|
||||
REG_FACTORY_FOR(ImplFactory<MathImpl>, Ceil);
|
||||
REG_FACTORY_FOR(ImplFactory<MathImpl>, Cos);
|
||||
REG_FACTORY_FOR(ImplFactory<MathImpl>, Cosh);
|
||||
REG_FACTORY_FOR(ImplFactory<MathImpl>, Erf);
|
||||
REG_FACTORY_FOR(ImplFactory<MathImpl>, Floor);
|
||||
REG_FACTORY_FOR(ImplFactory<MathImpl>, HardSigmoid);
|
||||
REG_FACTORY_FOR(ImplFactory<MathImpl>, Log);
|
||||
REG_FACTORY_FOR(ImplFactory<MathImpl>, Neg);
|
||||
REG_FACTORY_FOR(ImplFactory<MathImpl>, Reciprocal);
|
||||
REG_FACTORY_FOR(ImplFactory<MathImpl>, Selu);
|
||||
REG_FACTORY_FOR(ImplFactory<MathImpl>, Sign);
|
||||
REG_FACTORY_FOR(ImplFactory<MathImpl>, Sin);
|
||||
REG_FACTORY_FOR(ImplFactory<MathImpl>, Sinh);
|
||||
REG_FACTORY_FOR(ImplFactory<MathImpl>, Softplus);
|
||||
REG_FACTORY_FOR(ImplFactory<MathImpl>, Softsign);
|
||||
REG_FACTORY_FOR(ImplFactory<MathImpl>, Tan);
|
||||
REG_FACTORY_FOR(MathImpl, Abs);
|
||||
REG_FACTORY_FOR(MathImpl, Acos);
|
||||
REG_FACTORY_FOR(MathImpl, Acosh);
|
||||
REG_FACTORY_FOR(MathImpl, Asin);
|
||||
REG_FACTORY_FOR(MathImpl, Asinh);
|
||||
REG_FACTORY_FOR(MathImpl, Atan);
|
||||
REG_FACTORY_FOR(MathImpl, Atanh);
|
||||
REG_FACTORY_FOR(MathImpl, Ceil);
|
||||
REG_FACTORY_FOR(MathImpl, Cos);
|
||||
REG_FACTORY_FOR(MathImpl, Cosh);
|
||||
REG_FACTORY_FOR(MathImpl, Erf);
|
||||
REG_FACTORY_FOR(MathImpl, Floor);
|
||||
REG_FACTORY_FOR(MathImpl, HardSigmoid);
|
||||
REG_FACTORY_FOR(MathImpl, Log);
|
||||
REG_FACTORY_FOR(MathImpl, Neg);
|
||||
REG_FACTORY_FOR(MathImpl, Reciprocal);
|
||||
REG_FACTORY_FOR(MathImpl, Selu);
|
||||
REG_FACTORY_FOR(MathImpl, Sign);
|
||||
REG_FACTORY_FOR(MathImpl, Sin);
|
||||
REG_FACTORY_FOR(MathImpl, Sinh);
|
||||
REG_FACTORY_FOR(MathImpl, Softplus);
|
||||
REG_FACTORY_FOR(MathImpl, Softsign);
|
||||
REG_FACTORY_FOR(MathImpl, Tan);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
@ -108,7 +108,7 @@ void MKLDNNBatchNormalizationNode::getSupportedDescriptors() {
|
||||
|
||||
MKLDNNMemoryDesc MKLDNNBatchNormalizationNode::GetVarianceDesc(const memory::primitive_desc &primitive_desc) const {
|
||||
memory::primitive_desc aprimitive_desc;
|
||||
mkldnn_primitive_desc_t bndesc;
|
||||
mkldnn_primitive_desc_t bndesc = nullptr;
|
||||
mkldnn_batch_normalization_desc_t *p;
|
||||
error::wrap_c_api(mkldnn_primitive_desc_query(
|
||||
primitive_desc.get(), mkldnn::convert_to_c(batch_normalization_d), 0, &p),
|
||||
@ -128,7 +128,7 @@ MKLDNNMemoryDesc MKLDNNBatchNormalizationNode::GetVarianceDesc(const memory::pri
|
||||
|
||||
MKLDNNMemoryDesc MKLDNNBatchNormalizationNode::GetMeanDesc(const memory::primitive_desc &primitive_desc) const {
|
||||
memory::primitive_desc aprimitive_desc;
|
||||
mkldnn_primitive_desc_t bndesc;
|
||||
mkldnn_primitive_desc_t bndesc = nullptr;
|
||||
mkldnn_batch_normalization_desc_t *p;
|
||||
error::wrap_c_api(mkldnn_primitive_desc_query(
|
||||
primitive_desc.get(), mkldnn::convert_to_c(batch_normalization_d), 0, &p),
|
||||
@ -148,7 +148,7 @@ MKLDNNMemoryDesc MKLDNNBatchNormalizationNode::GetMeanDesc(const memory::primiti
|
||||
|
||||
MKLDNNMemoryDesc MKLDNNBatchNormalizationNode::GetScaleShiftWeightsDesc(const memory::primitive_desc &primitive_desc) const {
|
||||
memory::primitive_desc adesc;
|
||||
mkldnn_primitive_desc_t bndesc;
|
||||
mkldnn_primitive_desc_t bndesc = nullptr;
|
||||
const_mkldnn_primitive_desc_t const_bndesc =
|
||||
mkldnn_primitive_desc_query_pd(primitive_desc.get(),
|
||||
mkldnn::convert_to_c(weights_pd), 0);
|
||||
|
@ -17,6 +17,16 @@
|
||||
#include <mkldnn_types.h>
|
||||
#include <mkldnn_extension_utils.h>
|
||||
#include <ie_layers_internal.hpp>
|
||||
|
||||
// WA for xbyak.h
|
||||
#ifdef _WIN32
|
||||
# ifndef _WINSOCKAPI_
|
||||
# define _WINSOCKAPI_
|
||||
# endif
|
||||
# ifndef _WINSOCK2API_
|
||||
# define _WINSOCK2API_
|
||||
# endif
|
||||
#endif
|
||||
#include "cpu_isa_traits.hpp"
|
||||
|
||||
using namespace mkldnn;
|
||||
|
@ -279,8 +279,10 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
|
||||
getParentEdgeAt(0)->getDims().ndims() == 5 ? memory::ndhwc : memory::nhwc);
|
||||
createDescriptor({in_candidate}, {out_candidate});
|
||||
} else {
|
||||
inputDataType = convLayer->input()->getPrecision() == Precision::BF16 ? memory::bf16 : memory::f32;
|
||||
outputDataType = convLayer->outData[0]->getPrecision() == Precision::BF16 ? memory::bf16 : memory::f32;
|
||||
inputDataType = (convLayer->input()->getPrecision() == Precision::BF16
|
||||
&& !(isGrouped && getParentEdgeAt(0)->getDims().ndims() == 5)) ? memory::bf16 : memory::f32;
|
||||
outputDataType = (convLayer->outData[0]->getPrecision() == Precision::BF16
|
||||
&& !(isGrouped && getParentEdgeAt(0)->getDims().ndims() == 5)) ? memory::bf16 : memory::f32;
|
||||
eltwisePrecision = Precision::FP32;
|
||||
for (int i = 0; i < fusedWith.size(); i++) {
|
||||
auto *eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(fusedWith[i].get());
|
||||
|
@ -37,7 +37,7 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
|
||||
precision = InferenceEngine::Precision::FP32;
|
||||
auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
|
||||
|
||||
if (getParentEdges().size() != 1)
|
||||
if (getParentEdges().empty() || getParentEdges().size() > 3)
|
||||
THROW_IE_EXCEPTION << "Incorrect number of input edges for layer " << getName();
|
||||
if (getChildEdges().empty())
|
||||
THROW_IE_EXCEPTION << "Incorrect number of output edges for layer " << getName();
|
||||
@ -45,7 +45,7 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
|
||||
auto * deconvLayer = dynamic_cast<DeconvolutionLayer*>(getCnnLayer().get());
|
||||
if (deconvLayer == nullptr)
|
||||
THROW_IE_EXCEPTION << "Cannot convert deconvolution layer.";
|
||||
if (deconvLayer->_weights == nullptr) {
|
||||
if (getParentEdges().size() == 1 && deconvLayer->_weights == nullptr) {
|
||||
THROW_IE_EXCEPTION << "Weights are empty for layer: " << deconvLayer->name
|
||||
<< " used in MKLDNN node: " << getName() << "\n"
|
||||
<< "Use the second argumemt of InferenceEngine::Core::ReadNetwork"
|
||||
@ -54,11 +54,22 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
|
||||
withGroups = (deconvLayer->_group > 1);
|
||||
isDW = withGroups && deconvLayer->_group == deconvLayer->_out_depth &&
|
||||
deconvLayer->_group == deconvLayer->input()->getDims()[1];
|
||||
withBiases = (deconvLayer->_biases != nullptr && deconvLayer->_biases->size() != 0);
|
||||
|
||||
bool withBiases = (deconvLayer->_biases != nullptr && deconvLayer->_biases->size() != 0) || getParentEdges().size() == 3;
|
||||
if (withBiases) {
|
||||
biases = deconvLayer->_biases;
|
||||
Blob::Ptr biases;
|
||||
|
||||
if (getParentEdges().size() == 3) {
|
||||
auto biasLayer = getParentEdgesAtPort(2)[0]->getParent()->getCnnLayer();
|
||||
if (biasLayer->type != "Const")
|
||||
THROW_IE_EXCEPTION << "Deconvolution layer with name '" << getName() << "' doesn't support non-constant biases";
|
||||
biases = biasLayer->blobs["custom"];
|
||||
} else {
|
||||
biases = deconvLayer->_biases;
|
||||
}
|
||||
|
||||
// WA: we add bias as depthwise post op
|
||||
setBiasAsPostOp();
|
||||
setBiasAsPostOp(biases);
|
||||
}
|
||||
|
||||
/* Original layout format for deconv weights is iohw (from Caffe).
|
||||
@ -83,7 +94,8 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
|
||||
weightDims.push_back(deconvLayer->_kernel[deconvLayer->_kernel.size() - i]);
|
||||
}
|
||||
|
||||
internalBlobs.push_back(createInternalBlob(weightDims, true));
|
||||
if (getParentEdges().size() == 1)
|
||||
internalBlobs.push_back(createInternalBlob(weightDims, true));
|
||||
|
||||
invertVectorCopyUtoI(deconvLayer->_stride, stride);
|
||||
for (int i = 1; i <= deconvLayer->_dilation.size(); i++) {
|
||||
@ -113,7 +125,7 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNDeconvolutionNode::setBiasAsPostOp() {
|
||||
void MKLDNNDeconvolutionNode::setBiasAsPostOp(const InferenceEngine::Blob::Ptr& biases) {
|
||||
mkldnn::post_ops ops;
|
||||
MKLDNNDims depthwiseDims({static_cast<ptrdiff_t>(rnd_up(biases->size(), 16))});
|
||||
|
||||
@ -157,7 +169,7 @@ void MKLDNNDeconvolutionNode::createPrimitive() {
|
||||
|
||||
prim.reset(new convolution_backward_data(prim_desc,
|
||||
getParentEdgeAt(0)->getMemory().GetPrimitive(),
|
||||
internalBlobMemory[0]->GetPrimitive(),
|
||||
getWeights(),
|
||||
getChildEdgeAt(0)->getMemory().GetPrimitive()));
|
||||
}
|
||||
|
||||
@ -197,15 +209,32 @@ void MKLDNNDeconvolutionNode::createDescriptor(const std::vector<InferenceEngine
|
||||
}
|
||||
|
||||
MKLDNNMemoryDesc MKLDNNDeconvolutionNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
|
||||
InferenceEngine::TensorDesc desc = MKLDNNMemoryDesc(primitive_desc_it.diff_dst_primitive_desc(idx).desc());
|
||||
if (desc.getLayout() == InferenceEngine::Layout::ANY)
|
||||
InferenceEngine::TensorDesc desc = idx > 0 ? MKLDNNMemoryDesc(primitive_desc_it.weights_primitive_desc(idx - 1).desc())
|
||||
: MKLDNNMemoryDesc(primitive_desc_it.diff_dst_primitive_desc(idx).desc());
|
||||
|
||||
if (desc.getLayout() == InferenceEngine::Layout::ANY) {
|
||||
return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
|
||||
getParentEdgeAt(idx)->getDims().ToSizeVector(),
|
||||
desc.getLayout()));
|
||||
else
|
||||
return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
|
||||
getParentEdgeAt(idx)->getDims().ToSizeVector(),
|
||||
desc.getBlockingDesc()));
|
||||
} else {
|
||||
if (getParentEdgeAt(idx)->getDims().ToSizeVector().size() != *std::max_element(desc.getBlockingDesc().getOrder().begin(),
|
||||
desc.getBlockingDesc().getOrder().end()) + 1) {
|
||||
auto old_dims = getParentEdgeAt(idx)->getDims().ToSizeVector();
|
||||
auto new_dims = weightsDims.ToSizeVector();
|
||||
|
||||
auto td = InferenceEngine::TensorDesc(desc.getPrecision(),
|
||||
new_dims,
|
||||
desc.getBlockingDesc());
|
||||
if (new_dims.size() == desc.getBlockingDesc().getBlockDims().size()) {
|
||||
td.setLayout(BLOCKED);
|
||||
}
|
||||
return MKLDNNMemoryDesc(td);
|
||||
} else {
|
||||
return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
|
||||
getParentEdgeAt(idx)->getDims().ToSizeVector(),
|
||||
desc.getBlockingDesc()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
MKLDNNMemoryDesc MKLDNNDeconvolutionNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
|
||||
@ -219,4 +248,9 @@ MKLDNNMemoryDesc MKLDNNDeconvolutionNode::getDstMemDesc(mkldnn::primitive_desc_i
|
||||
getChildEdgeAt(idx)->getDims().ToSizeVector(),
|
||||
desc.getBlockingDesc()));
|
||||
}
|
||||
|
||||
const mkldnn::memory& MKLDNNDeconvolutionNode::getWeights() const {
|
||||
return getParentEdges().size() > 1 ? getParentEdgeAt(1)->getMemory().GetPrimitive() : internalBlobMemory[0]->GetPrimitive();
|
||||
}
|
||||
|
||||
REG_MKLDNN_PRIM_FOR(MKLDNNDeconvolutionNode, Deconvolution);
|
||||
|
@ -27,11 +27,14 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t descInputNumbers(MKLDNNDescriptor desc) override {
|
||||
return static_cast<size_t>(getParentEdges().size());
|
||||
}
|
||||
|
||||
MKLDNNMemoryDesc getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override;
|
||||
MKLDNNMemoryDesc getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override;
|
||||
|
||||
private:
|
||||
bool withBiases = false;
|
||||
bool withGroups = false;
|
||||
bool isDW = false;
|
||||
size_t groupNum = 1;
|
||||
@ -40,13 +43,14 @@ private:
|
||||
std::vector<ptrdiff_t> dilation;
|
||||
std::vector<ptrdiff_t> paddingR;
|
||||
MKLDNNDims weightsDims;
|
||||
InferenceEngine::Blob::Ptr biases;
|
||||
std::vector<std::shared_ptr<mkldnn::convolution_forward::desc>> descs_fwd;
|
||||
std::vector<std::shared_ptr<mkldnn::convolution_backward_data::desc>> descs_bwd;
|
||||
|
||||
mkldnn::primitive_attr attr;
|
||||
std::vector<MKLDNNMemoryPtr> PostOpsIntBlobMemory;
|
||||
void setBiasAsPostOp();
|
||||
void setBiasAsPostOp(const InferenceEngine::Blob::Ptr& biases);
|
||||
|
||||
const mkldnn::memory& getWeights() const;
|
||||
};
|
||||
|
||||
} // namespace MKLDNNPlugin
|
||||
|
@ -715,7 +715,9 @@ void MKLDNNNormalizeNode::initSupportedPrimitiveDescriptors() {
|
||||
setPostOps(attr, true);
|
||||
|
||||
Precision inputPrecision = getCnnLayer()->insData[0].lock()->getPrecision();
|
||||
inputPrecision = inputPrecision == Precision::BF16 ? Precision(Precision::FP32) : inputPrecision;
|
||||
Precision outputPrecision = getCnnLayer()->outData[0]->getPrecision();
|
||||
outputPrecision = outputPrecision == Precision::BF16 ? Precision(Precision::FP32) : outputPrecision;
|
||||
|
||||
if (!fusedWith.empty()) {
|
||||
auto lastFusedLayer = fusedWith[fusedWith.size() - 1].get()->getCnnLayer();
|
||||
|
@ -119,22 +119,38 @@ void MKLDNNReorderNode::createReorderPrimitive(const mkldnn::memory::desc &srcDe
|
||||
// MKLDNN doesn't support direct reorders from planar data formats to grouped weights formats.
|
||||
// Code block below tries to detect such cases and reinterpret data planar formats (e.g. nchw)
|
||||
// as grouped weights planar formats (e.g. goihw) since they have same physical memory layout.
|
||||
if (MKLDNNMemory::GetPlainFormat(src_blocked->GetDims()) == src_blocked->GetFormat() && MKLDNNMemory::IsGroupedFormat(dst_blocked->GetFormat())) {
|
||||
if (MKLDNNMemory::GetPlainFormat(src_blocked->GetDims()) == src_blocked->GetFormat() &&
|
||||
MKLDNNMemory::IsGroupedFormat(dst_blocked->GetFormat())) {
|
||||
try {
|
||||
mkldnn::memory::dims newDims = dst_blocked->GetDims();
|
||||
mkldnn::memory::format newFormat = src_blocked->GetDims().size() == 4 ? memory::goihw :
|
||||
src_blocked->GetDims().size() == 5 ? memory::goidhw :
|
||||
src_blocked->GetFormat();
|
||||
mkldnn::memory::format newFormat;
|
||||
newFormat = src_blocked->GetDims().size() == 4 ? memory::goihw :
|
||||
src_blocked->GetDims().size() == 5 ? memory::goidhw :
|
||||
src_blocked->GetFormat();
|
||||
|
||||
auto newDesc = mkldnn::memory::desc(newDims, src_blocked->GetDataType(), newFormat);
|
||||
src_blocked->Create(newDesc, srcPtr, false);
|
||||
|
||||
createReorder();
|
||||
} catch (const std::exception&) {
|
||||
} catch (...) {
|
||||
THROW_IE_EXCEPTION << "Cannot create reorder primitive: unsupported reorder case";
|
||||
}
|
||||
// MKLDNN doesn't support direct reorders between planar data formats in case they have different rank but the same number of elements.
|
||||
// Code block below detects these cases and substitute src dims with dst ones.
|
||||
} else if (MKLDNNMemory::GetPlainFormat(src_blocked->GetDims()) == src_blocked->GetFormat() &&
|
||||
MKLDNNMemory::GetPlainFormat(dst_blocked->GetDims()) == dst_blocked->GetFormat() &&
|
||||
src_blocked->GetElementsCount() == dst_blocked->GetElementsCount()) {
|
||||
try {
|
||||
auto newDesc = mkldnn::memory::desc(dst_blocked->GetDims(), src_blocked->GetDataType(), dst_blocked->GetFormat());
|
||||
src_blocked->Create(newDesc, srcPtr, false);
|
||||
|
||||
createReorder();
|
||||
} catch (...) {
|
||||
THROW_IE_EXCEPTION << "Cannot create reorder primitive: unsupported reorder case";
|
||||
}
|
||||
} else {
|
||||
THROW_IE_EXCEPTION << "Cannot create reorder primitive: unsupported reorder case";
|
||||
}
|
||||
// TODO: should't we throw exception in this case?
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
|
||||
#include <cmath>
|
||||
@ -242,7 +241,7 @@ private:
|
||||
bool sort_result_descending = true;
|
||||
};
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<NonMaxSuppressionImpl>, NonMaxSuppression);
|
||||
REG_FACTORY_FOR(NonMaxSuppressionImpl, NonMaxSuppression);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
#include "ie_parallel.hpp"
|
||||
|
||||
@ -103,7 +102,7 @@ private:
|
||||
Precision input_precision;
|
||||
};
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<OneHotImpl>, OneHot);
|
||||
REG_FACTORY_FOR(OneHotImpl, OneHot);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
|
||||
#include <cmath>
|
||||
@ -251,7 +250,7 @@ void PadImpl::pad_symmetric(const float *src_data, float* dst_data) {
|
||||
});
|
||||
}
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<PadImpl>, Pad);
|
||||
REG_FACTORY_FOR(PadImpl, Pad);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
|
||||
#include <cmath>
|
||||
@ -57,7 +56,7 @@ private:
|
||||
std::vector<int> shift_;
|
||||
};
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<PowerFileImpl>, PowerFile);
|
||||
REG_FACTORY_FOR(PowerFileImpl, PowerFile);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
|
||||
#include <vector>
|
||||
@ -338,7 +337,7 @@ private:
|
||||
int _num_priors = 0;
|
||||
};
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<PriorBoxImpl>, PriorBox);
|
||||
REG_FACTORY_FOR(PriorBoxImpl, PriorBox);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
@ -117,7 +116,7 @@ private:
|
||||
float offset_;
|
||||
};
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<PriorBoxClusteredImpl>, PriorBoxClustered);
|
||||
REG_FACTORY_FOR(PriorBoxClusteredImpl, PriorBoxClustered);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
@ -90,7 +89,7 @@ private:
|
||||
};
|
||||
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<ExperimentalDetectronPriorGridGeneratorImpl>, ExperimentalDetectronPriorGridGenerator);
|
||||
REG_FACTORY_FOR(ExperimentalDetectronPriorGridGeneratorImpl, ExperimentalDetectronPriorGridGenerator);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
|
||||
#include "proposal_imp.hpp"
|
||||
@ -179,7 +178,7 @@ private:
|
||||
bool store_prob; // store blob with proposal probabilities
|
||||
};
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<ProposalImpl>, Proposal);
|
||||
REG_FACTORY_FOR(ProposalImpl, Proposal);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
@ -4,6 +4,7 @@
|
||||
|
||||
#include "proposal_imp.hpp"
|
||||
|
||||
#include <cstring>
|
||||
#include <cmath>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
@ -137,7 +138,7 @@ static void nms_cpu(const int num_boxes, int is_dead[],
|
||||
const float* x1 = boxes + 2 * num_proposals;
|
||||
const float* y1 = boxes + 3 * num_proposals;
|
||||
|
||||
memset(is_dead, 0, num_boxes * sizeof(int));
|
||||
std::memset(is_dead, 0, num_boxes * sizeof(int));
|
||||
|
||||
#if defined(HAVE_AVX2)
|
||||
__m256 vc_fone = _mm256_set1_ps(coordinates_offset);
|
||||
|
@ -2,9 +2,9 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
|
||||
#include <cstring>
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <string>
|
||||
@ -130,7 +130,7 @@ void nms_cpu(const int num_boxes, int is_dead[],
|
||||
const float* x1 = boxes + 2 * num_proposals;
|
||||
const float* y1 = boxes + 3 * num_proposals;
|
||||
|
||||
memset(is_dead, 0, num_boxes * sizeof(int));
|
||||
std::memset(is_dead, 0, num_boxes * sizeof(int));
|
||||
|
||||
#if defined(HAVE_AVX2)
|
||||
__m256 vc_fone = _mm256_set1_ps(coordinates_offset);
|
||||
@ -410,7 +410,7 @@ private:
|
||||
std::vector<int> roi_indices_;
|
||||
};
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<ONNXCustomProposalImpl>, ExperimentalDetectronGenerateProposalsSingleImage);
|
||||
REG_FACTORY_FOR(ONNXCustomProposalImpl, ExperimentalDetectronGenerateProposalsSingleImage);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
#include <cmath>
|
||||
#include <vector>
|
||||
@ -303,7 +302,7 @@ private:
|
||||
float trans_std_;
|
||||
};
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<PSROIPoolingImpl>, PSROIPooling);
|
||||
REG_FACTORY_FOR(PSROIPoolingImpl, PSROIPooling);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
|
||||
#include <cmath>
|
||||
@ -125,7 +124,7 @@ StatusCode RangeImpl::range(data_t start, data_t limit, data_t delta, Blob::Ptr
|
||||
});
|
||||
return OK;
|
||||
}
|
||||
REG_FACTORY_FOR(ImplFactory<RangeImpl>, Range);
|
||||
REG_FACTORY_FOR(RangeImpl, Range);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
|
||||
#include <cmath>
|
||||
@ -388,18 +387,18 @@ void ReduceImpl::reduce(
|
||||
}
|
||||
}
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceAnd);
|
||||
REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceL1);
|
||||
REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceL2);
|
||||
REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceLogSum);
|
||||
REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceLogSumExp);
|
||||
REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceMax);
|
||||
REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceMean);
|
||||
REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceMin);
|
||||
REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceOr);
|
||||
REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceProd);
|
||||
REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceSum);
|
||||
REG_FACTORY_FOR(ImplFactory<ReduceImpl>, ReduceSumSquare);
|
||||
REG_FACTORY_FOR(ReduceImpl, ReduceAnd);
|
||||
REG_FACTORY_FOR(ReduceImpl, ReduceL1);
|
||||
REG_FACTORY_FOR(ReduceImpl, ReduceL2);
|
||||
REG_FACTORY_FOR(ReduceImpl, ReduceLogSum);
|
||||
REG_FACTORY_FOR(ReduceImpl, ReduceLogSumExp);
|
||||
REG_FACTORY_FOR(ReduceImpl, ReduceMax);
|
||||
REG_FACTORY_FOR(ReduceImpl, ReduceMean);
|
||||
REG_FACTORY_FOR(ReduceImpl, ReduceMin);
|
||||
REG_FACTORY_FOR(ReduceImpl, ReduceOr);
|
||||
REG_FACTORY_FOR(ReduceImpl, ReduceProd);
|
||||
REG_FACTORY_FOR(ReduceImpl, ReduceSum);
|
||||
REG_FACTORY_FOR(ReduceImpl, ReduceSumSquare);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
#include "common/defs.h"
|
||||
#include "common/softmax.h"
|
||||
@ -304,7 +303,7 @@ private:
|
||||
}
|
||||
};
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<RegionYoloImpl>, RegionYolo);
|
||||
REG_FACTORY_FOR(RegionYoloImpl, RegionYolo);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
#include <vector>
|
||||
|
||||
@ -64,7 +63,7 @@ private:
|
||||
int stride;
|
||||
};
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<ReorgYoloImpl>, ReorgYolo);
|
||||
REG_FACTORY_FOR(ReorgYoloImpl, ReorgYolo);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
|
||||
#include <cmath>
|
||||
@ -172,7 +171,7 @@ private:
|
||||
size_t work_amount_dst;
|
||||
};
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<ReverseSequenceImpl>, ReverseSequence);
|
||||
REG_FACTORY_FOR(ReverseSequenceImpl, ReverseSequence);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
@ -7,7 +7,6 @@
|
||||
// https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/csrc/cpu/ROIAlign_cpu.cpp
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
@ -406,7 +405,7 @@ private:
|
||||
int nw = 0;
|
||||
};
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<ExperimentalDetectronROIFeatureExtractorImpl>, ExperimentalDetectronROIFeatureExtractor);
|
||||
REG_FACTORY_FOR(ExperimentalDetectronROIFeatureExtractorImpl, ExperimentalDetectronROIFeatureExtractor);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
|
||||
#include <cmath>
|
||||
@ -182,7 +181,7 @@ private:
|
||||
const size_t SCATTER_UPDATES = 2;
|
||||
};
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<ScatterImpl>, ScatterUpdate);
|
||||
REG_FACTORY_FOR(ScatterImpl, ScatterUpdate);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
|
||||
#include <string>
|
||||
@ -221,7 +220,7 @@ private:
|
||||
}
|
||||
};
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<SelectImpl>, Select);
|
||||
REG_FACTORY_FOR(SelectImpl, Select);
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
} // namespace InferenceEngine
|
||||
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
|
||||
#include <cmath>
|
||||
@ -142,7 +141,7 @@ private:
|
||||
size_t ownStrides[CNTR_SIZE];
|
||||
};
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<ShuffleChannelsImpl>, ShuffleChannels);
|
||||
REG_FACTORY_FOR(ShuffleChannelsImpl, ShuffleChannels);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
|
||||
#include <cmath>
|
||||
@ -332,7 +331,7 @@ private:
|
||||
std::vector<simpler_nms_anchor> anchors_;
|
||||
};
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<SimplerNMSImpl>, SimplerNMS);
|
||||
REG_FACTORY_FOR(SimplerNMSImpl, SimplerNMS);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
@ -4,7 +4,6 @@
|
||||
|
||||
#include "base.hpp"
|
||||
#include "ie_parallel.hpp"
|
||||
#include "list.hpp"
|
||||
|
||||
#include <cmath>
|
||||
#include <string>
|
||||
@ -222,7 +221,7 @@ private:
|
||||
std::vector<size_t> _pads_end;
|
||||
};
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<SpaceToBatchImpl>, SpaceToBatch);
|
||||
REG_FACTORY_FOR(SpaceToBatchImpl, SpaceToBatch);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
|
||||
#include <cmath>
|
||||
@ -119,7 +118,7 @@ private:
|
||||
size_t ownStrides[CNTR_SIZE];
|
||||
};
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<SpaceToDepthImpl>, SpaceToDepth);
|
||||
REG_FACTORY_FOR(SpaceToDepthImpl, SpaceToDepth);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
|
||||
#include <cmath>
|
||||
@ -225,7 +224,7 @@ private:
|
||||
size_t outMaxNumValues = 0;
|
||||
};
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<SparseFillEmptyRowsImpl>, SparseFillEmptyRows);
|
||||
REG_FACTORY_FOR(SparseFillEmptyRowsImpl, SparseFillEmptyRows);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
|
||||
#include <cmath>
|
||||
@ -202,9 +201,9 @@ private:
|
||||
ReducedOp reduction_op;
|
||||
};
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<SparseSegmentReduceImpl>, SparseSegmentMean);
|
||||
REG_FACTORY_FOR(ImplFactory<SparseSegmentReduceImpl>, SparseSegmentSqrtN);
|
||||
REG_FACTORY_FOR(ImplFactory<SparseSegmentReduceImpl>, SparseSegmentSum);
|
||||
REG_FACTORY_FOR(SparseSegmentReduceImpl, SparseSegmentMean);
|
||||
REG_FACTORY_FOR(SparseSegmentReduceImpl, SparseSegmentSqrtN);
|
||||
REG_FACTORY_FOR(SparseSegmentReduceImpl, SparseSegmentSum);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
|
||||
#include <cmath>
|
||||
@ -152,7 +151,7 @@ private:
|
||||
bool with_default_value = false;
|
||||
};
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<SparseToDenseImpl>, SparseToDense);
|
||||
REG_FACTORY_FOR(SparseToDenseImpl, SparseToDense);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
|
||||
#include <cmath>
|
||||
@ -226,7 +225,7 @@ private:
|
||||
Precision input_default_value_precision;
|
||||
};
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<ExperimentalSparseWeightedReduceImpl>, ExperimentalSparseWeightedSum);
|
||||
REG_FACTORY_FOR(ExperimentalSparseWeightedReduceImpl, ExperimentalSparseWeightedSum);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
|
||||
#include <cmath>
|
||||
@ -57,7 +56,7 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<SqueezeImpl>, Squeeze);
|
||||
REG_FACTORY_FOR(SqueezeImpl, Squeeze);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
|
||||
#include <cmath>
|
||||
@ -376,7 +375,7 @@ void StridedSliceImpl::strided_slice_p(const float *src_data, float* dst_data) {
|
||||
});
|
||||
}
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<StridedSliceImpl>, StridedSlice);
|
||||
REG_FACTORY_FOR(StridedSliceImpl, StridedSlice);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
|
||||
#include <cmath>
|
||||
@ -578,7 +577,7 @@ private:
|
||||
}
|
||||
};
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<TopKImpl>, TopK);
|
||||
REG_FACTORY_FOR(TopKImpl, TopK);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
@ -71,7 +70,7 @@ private:
|
||||
int max_rois_num_;
|
||||
};
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<ExperimentalDetectronTopKROIsImpl>, ExperimentalDetectronTopKROIs);
|
||||
REG_FACTORY_FOR(ExperimentalDetectronTopKROIsImpl, ExperimentalDetectronTopKROIs);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
|
||||
#include <cmath>
|
||||
@ -199,7 +198,7 @@ private:
|
||||
size_t num_elements = 0;
|
||||
};
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<UniqueImpl>, Unique);
|
||||
REG_FACTORY_FOR(UniqueImpl, Unique);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "list.hpp"
|
||||
#include "base.hpp"
|
||||
|
||||
#include <cmath>
|
||||
@ -52,7 +51,7 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
REG_FACTORY_FOR(ImplFactory<UnsqueezeImpl>, Unsqueeze);
|
||||
REG_FACTORY_FOR(UnsqueezeImpl, Unsqueeze);
|
||||
|
||||
} // namespace Cpu
|
||||
} // namespace Extensions
|
||||
|
@ -38,9 +38,9 @@ class INFERENCE_ENGINE_API_CLASS(ConvFusion);
|
||||
class ngraph::pass::ConvFusion: public ngraph::pass::GraphRewrite {
|
||||
public:
|
||||
ConvFusion() : GraphRewrite() {
|
||||
fuse_convolution_with<op::ConvolutionIE, op::v1::Multiply>();
|
||||
fuse_convolution_with<op::ConvolutionIE, op::v1::Add>();
|
||||
fuse_convolution_with<op::DeconvolutionIE, op::v1::Add>();
|
||||
fuse_convolution_with<op::ConvolutionIE, opset1::Multiply>();
|
||||
fuse_convolution_with<op::ConvolutionIE, opset1::Add>();
|
||||
fuse_convolution_with<op::DeconvolutionIE, opset1::Add>();
|
||||
}
|
||||
|
||||
private:
|
||||
@ -53,8 +53,8 @@ private:
|
||||
|
||||
template <class Conv, class Eltwise>
|
||||
void ngraph::pass::ConvFusion::fuse_convolution_with() {
|
||||
static_assert(std::is_same<Eltwise, ngraph::op::v1::Multiply>() || std::is_same<Eltwise, ngraph::op::v1::Add>(),
|
||||
"This transformation works only with ngraph::op::v1::Add and ngraph::op::v1::Multiply");
|
||||
static_assert(std::is_same<Eltwise, ngraph::opset1::Multiply>() || std::is_same<Eltwise, ngraph::opset1::Add>(),
|
||||
"This transformation works only with ngraph::opset1::Add and ngraph::opset1::Multiply");
|
||||
|
||||
static_assert(std::is_same<Conv, ngraph::op::ConvolutionIE>() || std::is_same<Conv, ngraph::op::DeconvolutionIE>(),
|
||||
"This transformation works only with ngraph::op::ConvolutionIE and ngraph::op::DeconvolutionIE");
|
||||
@ -85,56 +85,63 @@ ngraph::graph_rewrite_callback ngraph::pass::ConvFusion::get_callback() {
|
||||
}
|
||||
|
||||
// TODO: check that constant can be scalar and do not match [1, C, 1, 1] layout
|
||||
auto constant_shape = m_const->get_shape();
|
||||
auto output_shape = m_conv->get_shape();
|
||||
size_t constant_size = std::accumulate(constant_shape.begin(), constant_shape.end(), 1, std::multiplies<size_t>());
|
||||
if (constant_size != output_shape[1]) {
|
||||
const auto constant_shape = m_const->get_shape();
|
||||
const auto output_pshape = m_conv->get_output_partial_shape(0);
|
||||
|
||||
if (output_pshape.rank().is_dynamic() || output_pshape[1].is_dynamic()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Node> constant(m_const);
|
||||
const auto channel_dim = output_pshape[1].get_length();
|
||||
|
||||
size_t constant_size = std::accumulate(constant_shape.begin(), constant_shape.end(), 1, std::multiplies<size_t>());
|
||||
if (constant_size != channel_dim) {
|
||||
return false;
|
||||
}
|
||||
|
||||
Output<Node> constant(m_const);
|
||||
|
||||
if (constant_shape.size() > 1) {
|
||||
constant = std::make_shared<op::v1::Reshape>(constant, op::Constant::create(element::i64, Shape{1}, {output_shape[1]}), true);
|
||||
constant = std::make_shared<opset1::Reshape>(constant, op::Constant::create(element::i64, Shape{1}, {channel_dim}), true);
|
||||
}
|
||||
|
||||
if (m_conv->output(0).get_target_inputs().size() != 1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::shared_ptr<Node> new_conv, new_weights, new_bias;
|
||||
if (std::dynamic_pointer_cast<op::v1::Add>(eltwise)) {
|
||||
Output<Node> new_conv, new_weights, new_bias;
|
||||
if (std::dynamic_pointer_cast<opset1::Add>(eltwise)) {
|
||||
// Fuse: ConvolutionIE/DeconvolutionIE->Add
|
||||
if (m_conv->inputs().size() == 2) {
|
||||
new_bias = constant;
|
||||
} else {
|
||||
new_bias = std::make_shared<op::v1::Add>(constant, m_conv->input_value(2));
|
||||
new_bias = std::make_shared<opset1::Add>(constant, m_conv->input_value(2));
|
||||
}
|
||||
new_conv = m_conv->clone_with_new_inputs({m_conv->input_value(0), m_conv->input_value(1), new_bias});
|
||||
} else if (std::is_same<Conv, op::ConvolutionIE>() && std::dynamic_pointer_cast<op::v1::Multiply>(eltwise)) {
|
||||
} else if (std::is_same<Conv, op::ConvolutionIE>() && std::dynamic_pointer_cast<opset1::Multiply>(eltwise)) {
|
||||
// Fuse: ConvolutionIE->Mul
|
||||
auto weights_shape = m_conv->input(1).get_shape();
|
||||
|
||||
Shape const_shape(weights_shape.size(), 1);
|
||||
const_shape[0] = weights_shape[0];
|
||||
|
||||
auto const_reshape = std::make_shared<op::v1::Reshape>(constant,
|
||||
auto const_reshape = std::make_shared<opset1::Reshape>(constant,
|
||||
op::Constant::create(element::i64, Shape{const_shape.size()}, const_shape), true);
|
||||
new_weights = std::make_shared<op::v1::Multiply> (m_conv->input_value(1), const_reshape);
|
||||
new_weights = std::make_shared<opset1::Multiply> (m_conv->input_value(1), const_reshape);
|
||||
if (m_conv->inputs().size() == 2) {
|
||||
new_conv = m_conv->clone_with_new_inputs({m_conv->input_value(0), new_weights});
|
||||
} else {
|
||||
auto bias_reshape = std::make_shared<op::v1::Reshape>(constant, op::Constant::create(element::i64, Shape{1}, {weights_shape[0]}), true);
|
||||
new_bias = std::make_shared<op::v1::Multiply>(bias_reshape, constant);
|
||||
auto bias_reshape = std::make_shared<opset1::Reshape>(constant, op::Constant::create(element::i64, Shape{1}, {weights_shape[0]}), true);
|
||||
new_bias = std::make_shared<opset1::Multiply>(bias_reshape, constant);
|
||||
new_conv = m_conv->clone_with_new_inputs({m_conv->input_value(0), new_weights, new_bias});
|
||||
}
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
ngraph::copy_runtime_info({m_conv, eltwise}, new_conv);
|
||||
new_conv->set_friendly_name(m.get_match_root()->get_friendly_name());
|
||||
ngraph::replace_node(m.get_match_root(), new_conv);
|
||||
ngraph::copy_runtime_info({m_conv, eltwise}, new_conv.get_node_shared_ptr());
|
||||
new_conv.get_node_shared_ptr()->set_friendly_name(m.get_match_root()->get_friendly_name());
|
||||
ngraph::replace_node(m.get_match_root(), new_conv.get_node_shared_ptr());
|
||||
return true;
|
||||
};
|
||||
return callback;
|
||||
|
@ -17,5 +17,6 @@
|
||||
NGRAPH_PASS(ConvertBroadcast3, ::ngraph::pass)
|
||||
NGRAPH_PASS(ConvertNMS3, ::ngraph::pass)
|
||||
NGRAPH_PASS(ConvertShapeOf3, ::ngraph::pass)
|
||||
NGRAPH_PASS(ConvertShuffleChannels3, ::ngraph::pass)
|
||||
NGRAPH_PASS(ConvertTopK3, ::ngraph::pass)
|
||||
|
||||
|
@ -0,0 +1,30 @@
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
#include <ie_api.h>
|
||||
|
||||
#include <ngraph/pass/graph_rewrite.hpp>
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
|
||||
class INFERENCE_ENGINE_API_CLASS(ConvertShuffleChannels3);
|
||||
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
||||
|
||||
class ngraph::pass::ConvertShuffleChannels3: public ngraph::pass::GraphRewrite {
|
||||
public:
|
||||
ConvertShuffleChannels3() : GraphRewrite() {
|
||||
convert_shuffle_channels3();
|
||||
}
|
||||
|
||||
private:
|
||||
void convert_shuffle_channels3();
|
||||
};
|
@ -54,43 +54,11 @@ op::ConvolutionIE::ConvolutionIE(const Output<Node>& data_batch,
|
||||
}
|
||||
|
||||
void op::ConvolutionIE::validate_and_infer_types() {
|
||||
const PartialShape& data_batch_pshape = get_input_partial_shape(0);
|
||||
PartialShape data_batch_shape = get_input_partial_shape(0);
|
||||
element::Type data_batch_et = get_input_element_type(0);
|
||||
const PartialShape& filters_pshape = get_input_partial_shape(1);
|
||||
PartialShape filters_shape = get_input_partial_shape(1);
|
||||
element::Type filters_et = get_input_element_type(1);
|
||||
|
||||
PartialShape result_shape{PartialShape::dynamic()};
|
||||
|
||||
// we need to adjust filters_shape to reuse helpers for normal convolution
|
||||
if (filters_pshape.is_static() && data_batch_pshape.is_static()) {
|
||||
auto filters_shape = filters_pshape.to_shape();
|
||||
auto groups = m_group;
|
||||
auto data_batch_shape = data_batch_pshape.to_shape();
|
||||
data_batch_shape[1] /= groups;
|
||||
|
||||
if (m_auto_pad == PadType::SAME_UPPER || m_auto_pad == PadType::SAME_LOWER) {
|
||||
m_pads_begin.clear();
|
||||
m_pads_end.clear();
|
||||
infer_auto_padding(
|
||||
data_batch_shape,
|
||||
Shape(filters_shape.begin() + 2, filters_shape.end()), // Remove {O,I}
|
||||
m_strides,
|
||||
m_dilations,
|
||||
m_auto_pad,
|
||||
m_pads_end,
|
||||
m_pads_begin);
|
||||
}
|
||||
|
||||
result_shape =
|
||||
infer_convolution_forward(this,
|
||||
data_batch_shape,
|
||||
Strides(m_strides.size(), 1), // dummy data dilations
|
||||
m_pads_begin,
|
||||
m_pads_end,
|
||||
filters_shape,
|
||||
m_strides,
|
||||
m_dilations);
|
||||
}
|
||||
element::Type result_et;
|
||||
|
||||
NODE_VALIDATION_CHECK(
|
||||
@ -102,6 +70,45 @@ void op::ConvolutionIE::validate_and_infer_types() {
|
||||
filters_et,
|
||||
").");
|
||||
|
||||
PartialShape result_shape{PartialShape::dynamic()};
|
||||
|
||||
// In case if number of groups greater than 1 and channel dimension is dynamic we can't calculate output shape
|
||||
if (m_group > 1) {
|
||||
if (data_batch_shape.rank().is_dynamic() || data_batch_shape[1].is_dynamic()) {
|
||||
set_output_type(0, result_et, result_shape);
|
||||
return;
|
||||
} else {
|
||||
// Update channel dimension according to groups count
|
||||
data_batch_shape[1] = data_batch_shape[1].get_length() / m_group;
|
||||
}
|
||||
}
|
||||
|
||||
// we need to adjust filters_shape to reuse helpers for normal convolution
|
||||
if (filters_shape.is_static() && data_batch_shape.is_static()) {
|
||||
if (m_auto_pad == PadType::SAME_UPPER || m_auto_pad == PadType::SAME_LOWER) {
|
||||
m_pads_begin.clear();
|
||||
m_pads_end.clear();
|
||||
auto filter_shape = filters_shape.to_shape();
|
||||
filter_shape.erase(filter_shape.begin(), filter_shape.begin() + 2); // Remove {O,I}
|
||||
infer_auto_padding(data_batch_shape.to_shape(),
|
||||
filter_shape,
|
||||
m_strides,
|
||||
m_dilations,
|
||||
m_auto_pad,
|
||||
m_pads_end,
|
||||
m_pads_begin);
|
||||
}
|
||||
}
|
||||
|
||||
result_shape = infer_convolution_forward(this,
|
||||
data_batch_shape,
|
||||
Strides(m_strides.size(), 1), // dummy data dilations
|
||||
m_pads_begin,
|
||||
m_pads_end,
|
||||
filters_shape,
|
||||
m_strides,
|
||||
m_dilations);
|
||||
|
||||
set_output_type(0, result_et, result_shape);
|
||||
}
|
||||
|
||||
|
@ -47,10 +47,16 @@ void ngraph::pass::ConvertInterpolateToInterpOrResample::convert_interpolate_to_
|
||||
interpolate_attrs.pads_end = std::vector<size_t>{0};
|
||||
|
||||
std::vector<size_t> useless_axes;
|
||||
for (const auto & axis : interpolate_axes)
|
||||
if (input_shape[axis] == out_spatial_shape[axis] && axis < 2)
|
||||
// keeping only those not spatial dimensions that are going to be changed
|
||||
useless_axes.push_back(axis);
|
||||
size_t axis_idx = 0;
|
||||
for (auto axis = 0; axis < input_shape.size(); ++axis) {
|
||||
if (interpolate_axes.count(axis)) {
|
||||
if (input_shape[axis] == out_spatial_shape[axis_idx] && axis < 2)
|
||||
// keeping only those not spatial dimensions that are going to be changed
|
||||
useless_axes.push_back(axis);
|
||||
++axis_idx;
|
||||
}
|
||||
}
|
||||
|
||||
std::reverse(useless_axes.begin(), useless_axes.end());
|
||||
for (const auto & axis : useless_axes) {
|
||||
interpolate_axes.erase(axis);
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user