diff --git a/.gitignore b/.gitignore
index 1f7c2fbdb8f..1c5368e74d5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,6 +16,7 @@ build/
.gdb_history
.vimspector.json
doc/
+!ngraph/doc
docs/build_documentation/work_dir/
inference-engine/plugins/
inference-engine/temp
@@ -55,4 +56,16 @@ __pycache__
/model-optimizer/!CMakeLists.txt
/model-optimizer/*.mapping
/model-optimizer/*.dat
-/model-optimizer/*.svg
\ No newline at end of file
+/model-optimizer/*.svg
+
+# ngraph
+ngraph/src/CPackConfig.cmake
+ngraph/src/CPackSourceConfig.cmake
+ngraph/src/VERSION
+ngraph/src/gtest/
+ngraph/src/json/
+ngraph/src/ngraphConfig.cmake
+ngraph/src/ngraphConfigVersion.cmake
+ngraph/src/protobuf/
+ngraph/src/src/
+ngraph/src/test/
diff --git a/cmake/developer_package.cmake b/cmake/developer_package.cmake
index 1deb23ff45b..e0027fdcc53 100644
--- a/cmake/developer_package.cmake
+++ b/cmake/developer_package.cmake
@@ -64,12 +64,11 @@ endmacro()
macro(ie_cpack)
set(CPACK_GENERATOR "TGZ")
+ string(REPLACE "/" "_" CPACK_PACKAGE_VERSION "${CI_BUILD_NUMBER}")
if(WIN32)
set(CPACK_PACKAGE_NAME inference-engine_${CMAKE_BUILD_TYPE})
- string(REPLACE "\\" "_" CPACK_PACKAGE_VERSION "${CI_BUILD_NUMBER}")
else()
set(CPACK_PACKAGE_NAME inference-engine)
- string(REPLACE "/" "_" CPACK_PACKAGE_VERSION "${CI_BUILD_NUMBER}")
endif()
set(CPACK_INCLUDE_TOPLEVEL_DIRECTORY OFF)
set(CPACK_ARCHIVE_COMPONENT_INSTALL ON)
diff --git a/inference-engine/CMakeLists.txt b/inference-engine/CMakeLists.txt
index 79894a74a6c..2aa19ac73bb 100644
--- a/inference-engine/CMakeLists.txt
+++ b/inference-engine/CMakeLists.txt
@@ -159,6 +159,17 @@ if(ENABLE_PYTHON)
COMPONENT python_samples)
endif()
+# install speech demo files
+
+if(SPEECH_LIBS_AND_DEMOS)
+ ie_cpack_add_component(speech_demo_files REQUIRED)
+
+ install(DIRECTORY ${TEMP}/deployment_tools
+ ${TEMP}/data_processing
+ DESTINATION .
+ COMPONENT speech_demo_files)
+endif()
+
#
# Developer package
#
diff --git a/inference-engine/ie_bridges/python/CMakeLists.txt b/inference-engine/ie_bridges/python/CMakeLists.txt
index 519a5c64338..977260b9dff 100644
--- a/inference-engine/ie_bridges/python/CMakeLists.txt
+++ b/inference-engine/ie_bridges/python/CMakeLists.txt
@@ -57,7 +57,7 @@ add_subdirectory (src/openvino/inference_engine)
# Check Cython version
if("${CYTHON_VERSION}" VERSION_LESS "0.29")
- message(FATAL_ERROR "OpenVINO Python API needs at least Cython version 0.29, found verson ${CYTHON_VERSION}")
+ message(FATAL_ERROR "OpenVINO Python API needs at least Cython version 0.29, found version ${CYTHON_VERSION}")
else()
message(STATUS "Found Cython version ${CYTHON_VERSION}")
endif()
diff --git a/inference-engine/ie_bridges/python/cmake/FindCython.cmake b/inference-engine/ie_bridges/python/cmake/FindCython.cmake
index f960fe20998..5ac7c7049f7 100644
--- a/inference-engine/ie_bridges/python/cmake/FindCython.cmake
+++ b/inference-engine/ie_bridges/python/cmake/FindCython.cmake
@@ -58,6 +58,6 @@ FIND_PACKAGE_HANDLE_STANDARD_ARGS( Cython REQUIRED_VARS CYTHON_EXECUTABLE )
# Find Cython version
execute_process(COMMAND ${CYTHON_EXECUTABLE} -V ERROR_VARIABLE CYTHON_OUTPUT OUTPUT_QUIET)
-string(REGEX REPLACE "^Cython version ([0-9]+\\.[0-9]+\\.[0-9]+).*" "\\1" CYTHON_VERSION "${CYTHON_OUTPUT}")
+string(REGEX REPLACE "^Cython version ([0-9]+\\.[0-9]+(\\.[0-9]+)?).*" "\\1" CYTHON_VERSION "${CYTHON_OUTPUT}")
mark_as_advanced( CYTHON_EXECUTABLE CYTHON_VERSION )
diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt b/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt
index 2ad199f8b35..aa2a30c0555 100644
--- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt
+++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt
@@ -23,6 +23,7 @@ foreach(PYX_FILE ${OTHER_SOURCES})
get_filename_component(PYX_NAME "${PYX_FILE}" NAME_WE)
set_source_files_properties(${PYX_FILE} PROPERTIES CYTHON_IS_CXX ON)
cython_add_module(${PYX_NAME} ${PYX_FILE})
+ add_dependencies(${TARGET_NAME} ${PYX_NAME})
target_include_directories(${PYX_NAME} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}")
target_link_libraries(${PYX_NAME} PRIVATE ${InferenceEngine_LIBRARIES})
endforeach()
diff --git a/inference-engine/ie_bridges/python/tests/test_InferRequest.py b/inference-engine/ie_bridges/python/tests/test_InferRequest.py
index 0dd7ef2a33c..4de736279d5 100644
--- a/inference-engine/ie_bridges/python/tests/test_InferRequest.py
+++ b/inference-engine/ie_bridges/python/tests/test_InferRequest.py
@@ -3,6 +3,7 @@ import os
import pytest
import warnings
import threading
+from datetime import datetime
from openvino.inference_engine import ie_api as ie
from conftest import model_path, image_path
@@ -195,11 +196,25 @@ def test_async_infer_wait_finish(device):
def test_async_infer_wait_time(device):
ie_core = ie.IECore()
net = ie_core.read_network(test_net_xml, test_net_bin)
- exec_net = ie_core.load_network(net, device, num_requests=1)
+ exec_net = ie_core.load_network(net, device, num_requests=2)
img = read_image()
request = exec_net.requests[0]
request.async_infer({'data': img})
- request.wait(100)
+ start_time = datetime.utcnow()
+ status = request.wait(ie.WaitMode.RESULT_READY)
+ assert status == ie.StatusCode.OK
+ time_delta = datetime.utcnow() - start_time
+ latency_ms = (time_delta.microseconds / 1000) + (time_delta.seconds * 1000)
+ timeout = max(100, latency_ms)
+ request = exec_net.requests[1]
+ request.async_infer({'data': img})
+ max_repeat = 10
+ status = ie.StatusCode.REQUEST_BUSY
+ i = 0
+ while i < max_repeat and status != ie.StatusCode.OK:
+ status = request.wait(timeout)
+ i += 1
+ assert status == ie.StatusCode.OK
res = request.output_blobs['fc_out'].buffer
assert np.argmax(res) == 2
del exec_net
diff --git a/inference-engine/samples/benchmark_app/benchmark_app.hpp b/inference-engine/samples/benchmark_app/benchmark_app.hpp
index 9affebbd8bf..7ab2469e45c 100644
--- a/inference-engine/samples/benchmark_app/benchmark_app.hpp
+++ b/inference-engine/samples/benchmark_app/benchmark_app.hpp
@@ -100,6 +100,9 @@ static const char dump_config_message[] = "Optional. Path to XML/YAML/JSON file
static const char shape_message[] = "Optional. Set shape for input. For example, \"input1[1,3,224,224],input2[1,4]\" or \"[1,3,224,224]\""
" in case of one input size.";
+// @brief message for quantization bits
+static const char gna_qb_message[] = "Optional. Weight bits for quantization: 8 or 16 (default)";
+
/// @brief Define flag for showing help message
DEFINE_bool(h, false, help_message);
@@ -184,6 +187,9 @@ DEFINE_string(dump_config, "", dump_config_message);
/// @brief Define flag for input shape
DEFINE_string(shape, "", shape_message);
+/// @brief Define flag for quantization bits (default 16)
+DEFINE_int32(qb, 16, gna_qb_message);
+
/**
* @brief This function show a help message
*/
@@ -221,4 +227,5 @@ static void showUsage() {
std::cout << " -dump_config " << dump_config_message << std::endl;
std::cout << " -load_config " << load_config_message << std::endl;
#endif
+ std::cout << " -qb " << gna_qb_message << std::endl;
}
diff --git a/inference-engine/samples/benchmark_app/main.cpp b/inference-engine/samples/benchmark_app/main.cpp
index 684753b8b55..87da8c5d224 100644
--- a/inference-engine/samples/benchmark_app/main.cpp
+++ b/inference-engine/samples/benchmark_app/main.cpp
@@ -13,6 +13,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -274,6 +275,14 @@ int main(int argc, char *argv[]) {
}
} else if (device == "MYRIAD") {
device_config[CONFIG_KEY(LOG_LEVEL)] = CONFIG_VALUE(LOG_WARNING);
+ } else if (device == "GNA") {
+ if (FLAGS_qb == 8)
+ device_config[GNA_CONFIG_KEY(PRECISION)] = "I8";
+ else
+ device_config[GNA_CONFIG_KEY(PRECISION)] = "I16";
+
+ if (isFlagSetInCommandLine("nthreads"))
+ device_config[GNA_CONFIG_KEY(LIB_N_THREADS)] = std::to_string(FLAGS_nthreads);
}
}
diff --git a/inference-engine/src/cldnn_engine/cldnn_engine.cpp b/inference-engine/src/cldnn_engine/cldnn_engine.cpp
index 94079aebdf3..80254dca3c0 100644
--- a/inference-engine/src/cldnn_engine/cldnn_engine.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_engine.cpp
@@ -24,6 +24,7 @@
#include "details/caseless.hpp"
#include
#include
+#include
#include
#include
#include
@@ -73,7 +74,8 @@ InferenceEngine::ICNNNetwork::Ptr clDNNEngine::CloneNetwork(const InferenceEngin
std::shared_ptr clonedNetwork(nullptr);
if (network.getFunction()) {
const auto transformations_callback = [](const std::shared_ptr &node) -> bool {
- return std::dynamic_pointer_cast(node) != nullptr;
+ return std::dynamic_pointer_cast(node) ||
+ std::dynamic_pointer_cast(node);
};
CNNNetwork net(network.getFunction());
auto nGraphFunc = net.getFunction();
diff --git a/inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp b/inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp
index ae54fa67796..8a25d74d0ca 100644
--- a/inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp
+++ b/inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp
@@ -17,6 +17,8 @@
#include "blob_factory.hpp"
#include "precision_ex.hpp"
#include "layers/gna_layer_info.hpp"
+#include "weights_converter.hpp"
+#include "layer_transform.hpp"
namespace GNAPluginNS {
namespace frontend {
@@ -137,6 +139,48 @@ class Quant {
}
};
+template
+inline InferenceEngine::Blob::Ptr fp32_to_precision_blob(InferenceEngine::Blob::Ptr fp32_blob, InferenceEngine::Precision precision, float scale_factor) {
+ auto prec_blob = InferenceEngine::make_shared_blob({ precision,
+ fp32_blob->getTensorDesc().getDims(), fp32_blob->getTensorDesc().getLayout() });
+ prec_blob->allocate();
+
+ int i = 0;
+ for (auto& precValue : *prec_blob) {
+ auto f32Value = fp32_blob->buffer().template as::value_type*>()[i++] * scale_factor;
+ if (f32Value > std::numeric_limits::max()) {
+ precValue = std::numeric_limits::max();
+ } else if (f32Value < std::numeric_limits::min()) {
+ precValue = std::numeric_limits::min();
+ } else {
+ precValue = static_cast(f32Value);
+ }
+ }
+
+ return static_cast(prec_blob);
+}
+
+inline InferenceEngine::Blob::Ptr fp32_to_precision_blob(InferenceEngine::Blob::Ptr fp32_blob, InferenceEngine::Precision precision, float scale_factor) {
+ InferenceEngine::Blob::Ptr result_ptr = nullptr;
+ switch (precision) {
+ case InferenceEngine::Precision::FP32:
+ result_ptr = fp32_to_precision_blob(fp32_blob, precision, scale_factor);
+ break;
+ case InferenceEngine::Precision::I32:
+ result_ptr = fp32_to_precision_blob(fp32_blob, precision, scale_factor);
+ break;
+ case InferenceEngine::Precision::I16:
+ result_ptr = fp32_to_precision_blob(fp32_blob, precision, scale_factor);
+ break;
+ case InferenceEngine::Precision::I8:
+ result_ptr = fp32_to_precision_blob(fp32_blob, precision, scale_factor);
+ break;
+ default:
+ THROW_GNA_EXCEPTION << "FP32 to " << precision << " not supported";
+ }
+ return result_ptr;
+}
+
template
inline void quantizeWeightsBiases(const QuantDesc & quantDesc,
InferenceEngine::WeightableLayer *wl,
@@ -389,6 +433,18 @@ class DataQuantizer : public DataQuantizerBas
}
cnnLayer->precision = Desc::mandatory().getInputPrecision();
+ if (cnnLayer->type == "Const") {
+ if (cnnLayer->blobs["custom"]->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP16) {
+ cnnLayer->blobs["custom"] = make_fp32_blob(cnnLayer->blobs["custom"]);
+ }
+ auto const_scale_factor = InferenceEngine::getInjectedData(*cnnLayer)->_dst_quant.scale;
+ auto new_const_blob = InferenceEngine::Blob::CreateFromData(cnnLayer->outData[0]);
+ auto const_blob = cnnLayer->blobs["custom"];
+ if (const_blob->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32) {
+ cnnLayer->blobs["custom"] = fp32_to_precision_blob(const_blob, cnnLayer->outData[0]->getPrecision(), const_scale_factor);
+ }
+ }
+
return true;
}
};
diff --git a/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp b/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
index 9e9ebc10aaf..ba221f68ad3 100644
--- a/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
+++ b/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
@@ -197,6 +197,36 @@ class ScaleFactorPerLayer {
return true;
}
+ if (cnnLayer->type == "Const") {
+ auto blob = cnnLayer->blobs["custom"];
+ if (blob->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP16) {
+ blob = make_fp32_blob(blob);
+ }
+ auto max_val = std::numeric_limits::min();
+ auto min_val = std::numeric_limits::max();
+
+ auto flt_buf = blob->buffer().as();
+ auto size = blob->size();
+
+ for (int i=0; i < size; i++) {
+ auto val = flt_buf[i];
+ if (val > max_val) max_val = val;
+ if (val < min_val) min_val = val;
+ }
+
+ auto abs_val = std::max(std::abs(max_val), std::abs(min_val));
+ auto scale_val = static_cast(std::numeric_limits::max()) / abs_val;
+
+ // TODO: Investigate what should be the scale in such cases (31910)
+ if (std::isinf(scale_val)) {
+ quant->_dst_quant.scale = quant->_src_quant.scale;
+ } else {
+ quant->_dst_quant.scale = scale_val;
+ }
+
+ return ScaleFactorUpdateResult();
+ }
+
if (!CNNNetHasPrevLayer(cnnLayer)) {
quant->_dst_quant.scale = quant->_src_quant.scale;
return ScaleFactorUpdateResult();
@@ -231,6 +261,7 @@ class ScaleFactorPerLayer {
auto quantParams0 = InferenceEngine::getInjectedData(in0);
auto quantParams1 = InferenceEngine::getInjectedData(in1);
+
auto quantData = InferenceEngine::getInjectedData(*eltwiseLayer);
switch (eltwiseLayer->_operation) {
@@ -239,6 +270,7 @@ class ScaleFactorPerLayer {
quantData->_dst_quant.scale = quantParams0->_dst_quant.scale * quantParams1->_dst_quant.scale;
break;
}
+ case InferenceEngine::EltwiseLayer::Sub:
case InferenceEngine::EltwiseLayer::Sum: {
// detect which input will be used as biases
if (LayerInfo(in0).has32BOutput()) {
@@ -247,6 +279,7 @@ class ScaleFactorPerLayer {
}
// this path might result in significant data loss
+ quantData->_bias_quant.scale = quantParams1->_dst_quant.scale / quantParams0->_dst_quant.scale;
quantData->_weights_quant.scale = quantParams1->_dst_quant.scale / quantParams0->_dst_quant.scale;
quantData->_dst_quant.scale = quantParams1->_dst_quant.scale;
diff --git a/inference-engine/src/gna_plugin/frontend/weights_converter.hpp b/inference-engine/src/gna_plugin/frontend/weights_converter.hpp
index 549b8ef9e98..040f7bb11f6 100644
--- a/inference-engine/src/gna_plugin/frontend/weights_converter.hpp
+++ b/inference-engine/src/gna_plugin/frontend/weights_converter.hpp
@@ -7,22 +7,28 @@
#include "quantized_layer_params.hpp"
#include "precision_utils.h"
+inline InferenceEngine::Blob::Ptr make_fp32_blob(InferenceEngine::Blob::Ptr fp16_blob) {
+ auto fp32_blob = InferenceEngine::make_shared_blob({ InferenceEngine::Precision::FP32,
+ fp16_blob->getTensorDesc().getDims(), fp16_blob->getTensorDesc().getLayout() });
+ fp32_blob->allocate();
+
+ int i = 0;
+ for (auto& f32Value : *fp32_blob) {
+ auto f16Value = fp16_blob->buffer().template as::value_type*>()[i++];
+ f32Value = InferenceEngine::PrecisionUtils::f16tof32(f16Value);
+ }
+
+ return static_cast(fp32_blob);
+}
+
inline void fp16_to_fp32(InferenceEngine::WeightableLayer *lp) {
InferenceEngine::BlobMap newBlobs;
for (auto& blob : lp->blobs) {
if (blob.second->getTensorDesc().getPrecision() != InferenceEngine::Precision::FP16) {
THROW_GNA_EXCEPTION << "Unsupported precision. Layer: " << lp->name << " , Blob: " << blob.first;
}
- auto tmp =
- InferenceEngine::make_shared_blob({ InferenceEngine::Precision::FP32,
- blob.second->getTensorDesc().getDims(), InferenceEngine::Layout::C });
- tmp->allocate();
- int i = 0;
- for (auto& f32Value : *tmp) {
- auto f16Value = blob.second->buffer().template as::value_type*>()[i++];
- f32Value = InferenceEngine::PrecisionUtils::f16tof32(f16Value);
- }
- newBlobs[blob.first] = tmp;
+ auto fp32_blob = make_fp32_blob(blob.second);
+ newBlobs[blob.first] = fp32_blob;
}
lp->_biases = newBlobs["biases"];
lp->_weights = newBlobs["weights"];
@@ -44,6 +50,18 @@ inline bool convertWeights(InferenceEngine::CNNLayer* lp) {
for (auto& dataItem : lp->outData) {
dataItem->setPrecision(InferenceEngine::Precision::FP32);
}
+ InferenceEngine::BlobMap newBlobs;
+ for (auto& blob_pair : lp->blobs) {
+ auto blob_name = blob_pair.first;
+ auto blob_ptr = blob_pair.second;
+ if (blob_ptr->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP16) {
+ auto new_blob = make_fp32_blob(blob_ptr);
+ newBlobs[blob_name] = new_blob;
+ } else {
+ newBlobs[blob_name] = blob_ptr;
+ }
+ }
+
return true;
}
diff --git a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
index 60ff272b6fb..46f41199ee9 100644
--- a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
+++ b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
@@ -185,17 +185,16 @@ void GNAGraphCompiler::ConstPrimitive(InferenceEngine::CNNLayerPtr constLayer)
if (constLayer->blobs.find("custom") == constLayer->blobs.end()) {
THROW_GNA_EXCEPTION << "const layer: " << constLayer->name << "doesn't have custom in blobs section";
}
- auto constBlob = constLayer->blobs["custom"];
+ auto const_blob = constLayer->blobs["custom"];
- void* ptr_for_const_blob = &ptr_for_const_blob;
- connectOutput(constLayer, ptr_for_const_blob, constBlob->size());
-
- const_connections[constLayer->name] = ptr_for_const_blob;
+ const_connections[constLayer->name] = &const_connections[constLayer->name];
+ void* ptr_for_const_blob = &const_connections[constLayer->name];
+ connectOutput(constLayer, ptr_for_const_blob, const_blob->byteSize());
// TODO: segment type for bind, bind initializer not used - need refactor to separate bind and allocation requests
// dont see practical use case when bind storage type need to be different that allocation type
- gnamem->readonly().bind_initializer(ptr_for_const_blob, [constBlob](void* data, size_t size) {
- ie_memcpy(data, size, constBlob->buffer(), constBlob->byteSize());
+ gnamem->readonly().bind_initializer(ptr_for_const_blob, [const_blob](void* data, size_t size) {
+ ie_memcpy(data, size, const_blob->buffer(), const_blob->byteSize());
});
}
@@ -602,15 +601,35 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
if (cropLayer == nullptr) {
return;
}
- if (cropLayer->axis.size() > 1) {
+
+ IE_ASSERT(!layer->insData.empty());
+ auto inputs = layer->insData.begin()->lock();
+
+ IE_ASSERT(!cropLayer->axis.empty());
+ IE_ASSERT(cropLayer->axis.size() == cropLayer->dim.size());
+ IE_ASSERT(cropLayer->axis.size() == cropLayer->offset.size());
+
+ std::vector axis, dim, offset;
+ for (int n = 0; n < cropLayer->axis.size(); n++) {
+ uint32_t input_dim = FROM_IR_DIM(inputs, inputs->getDims().size() - cropLayer->axis[n]);
+ // Exclude crop layer components that do nothing
+ if (cropLayer->offset[n] == 0 && cropLayer->dim[n] == input_dim) {
+ continue;
+ }
+ axis.push_back(cropLayer->axis[n]);
+ dim.push_back(cropLayer->dim[n]);
+ offset.push_back(cropLayer->offset[n]);
+ }
+
+ if (axis.size() > 1) {
THROW_GNA_EXCEPTION <<
- "Crop layer does not support the number of cropped dimensions = "
- << cropLayer->axis.size() << ".";
+ "Crop layer does not support the number of (non-trivial) cropped dimensions more than 1, provided: "
+ << axis.size() << ".";
}
auto quantized = InferenceEngine::getInjectedData(layer);
- size_t cropOffset = cropLayer->offset.back() * cropLayer->precision.size();
- size_t cropOutputSize = cropLayer->dim.back() * cropLayer->precision.size();
+ size_t cropOffset = offset.front() * cropLayer->precision.size();
+ size_t cropOutputSize = dim.front() * cropLayer->precision.size();
if (ALIGN64(cropOffset) == cropOffset) {
// leave crop as it is
@@ -637,20 +656,18 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
} else {
gnalog() << "Crop " << layer->name << " is being replaced by Affine layer...\n";
IE_ASSERT(!layer->outData.empty());
- IE_ASSERT(!layer->insData.empty());
auto outputs = *layer->outData.begin();
- auto inputs = layer->insData.begin()->lock();
// only 1D crops supported
- if (cropLayer->axis.size() != 1) {
+ if (axis.size() != 1) {
THROW_GNA_EXCEPTION << "only 1D crop layer supported: " << cropLayer->name;
}
// TODO: add unit tests for 4d crops blobs
- uint32_t num_rows_in = FROM_IR_DIM(inputs, inputs->getDims().size() - cropLayer->axis[0]);
+ uint32_t num_rows_in = FROM_IR_DIM(inputs, inputs->getDims().size() - axis.front());
uint32_t num_columns_in = 1;
- uint32_t num_rows_out = FROM_IR_DIM(outputs, inputs->getDims().size() - cropLayer->axis[0]);
+ uint32_t num_rows_out = FROM_IR_DIM(outputs, inputs->getDims().size() - axis.front());
uint32_t num_padding = ALIGN(num_rows_in, 8) - num_rows_in;
void* ptr_inputs = nullptr;
@@ -686,7 +703,7 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 0);
connectOutput(layer, ptr_outputs, num_data_bytes_out);
- FillWeightOfAligningFilter(layer, ptr_weights, cropLayer->offset.back(), (quantized == nullptr) ? false : true);
+ FillWeightOfAligningFilter(layer, ptr_weights, offset.front(), (quantized == nullptr) ? false : true);
(quantized == nullptr) ?
gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64) :
@@ -713,17 +730,27 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
int biasesLayerIdx = 1;
if (quantized) {
- if (eltwise._operation == EltwiseLayer::Sum) {
+ switch (eltwise._operation) {
+ case InferenceEngine::EltwiseLayer::Sum:
+ case InferenceEngine::EltwiseLayer::Sub:
+ {
if (inputs4Bytes->getPrecision().size() != 4) {
std::swap(inputs4Bytes, inputs2Bytes);
biasesLayerIdx = 0;
}
GNA_LAYER_ASSERT(layer, inputs2Bytes->getPrecision().size() == 2);
GNA_LAYER_ASSERT(layer, inputs4Bytes->getPrecision().size() == 4);
- } else {
+ break;
+ }
+ case InferenceEngine::EltwiseLayer::Prod:
+ {
// for mul both inputs should be 2 bytes precision
GNA_LAYER_ASSERT(layer, inputs2Bytes->getPrecision().size() == 2);
GNA_LAYER_ASSERT(layer, inputs4Bytes->getPrecision().size() == 2);
+ break;
+ }
+ default:
+ THROW_GNA_EXCEPTION << "Unsupported eltwise operation for quantization: " << eltwise._operation;
}
}
@@ -767,6 +794,18 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 1 - biasesLayerIdx);
switch (eltwise._operation) {
+ case EltwiseLayer::Sub:
+ if (quantized == nullptr) {
+ gnamem->readonly().push_value(ptr_weights, -1.0f, num_rows_out, 64);
+ } else {
+ auto scaledIdentity = -quantized->_weights_quant.scale;
+
+ auto quantizedIdentity = FLOAT_TO_INT16(std::min(scaledIdentity, static_cast(INT16_MAX)));
+
+ gnamem->readonly().push_value(ptr_weights, quantizedIdentity, num_rows_out, 64);
+ }
+ connectInput(layer, ptr_biases, num_data_bytes_in, 0, biasesLayerIdx);
+ break;
case EltwiseLayer::Sum:
if (quantized == nullptr) {
gnamem->readonly().push_value(ptr_weights, 1.0f, num_rows_out, 64);
diff --git a/inference-engine/src/gna_plugin/gna_plugin_config.cpp b/inference-engine/src/gna_plugin/gna_plugin_config.cpp
index 216a2180c8e..8a7613584c8 100644
--- a/inference-engine/src/gna_plugin/gna_plugin_config.cpp
+++ b/inference-engine/src/gna_plugin/gna_plugin_config.cpp
@@ -248,8 +248,6 @@ void Config::AdjustKeyMapValues() {
key_config_map[CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS)] =
gnaFlags.exclusive_async_requests ? PluginConfigParams::YES: PluginConfigParams::NO;
key_config_map[GNA_CONFIG_KEY(PRECISION)] = gnaPrecision.name();
- key_config_map[CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS)] =
- gnaFlags.exclusive_async_requests ? PluginConfigParams::YES: PluginConfigParams::NO;
key_config_map[GNA_CONFIG_KEY(PWL_UNIFORM_DESIGN)] =
gnaFlags.uniformPwlDesign ? PluginConfigParams::YES: PluginConfigParams::NO;
key_config_map[CONFIG_KEY(PERF_COUNT)] =
diff --git a/inference-engine/src/gna_plugin/layers/gna_layer_info.hpp b/inference-engine/src/gna_plugin/layers/gna_layer_info.hpp
index 8b8f5591614..4cc135e1087 100644
--- a/inference-engine/src/gna_plugin/layers/gna_layer_info.hpp
+++ b/inference-engine/src/gna_plugin/layers/gna_layer_info.hpp
@@ -153,6 +153,15 @@ class LayerInfo {
return dynamic_cast(layer)->_operation ==
InferenceEngine::EltwiseLayer::Sum;
}
+ bool isEltwiseSub() const noexcept {
+ IS_VALID();
+ if (!isEltwise()) return false;
+ // dynamic_cast(layer) is validated in isEltwise function
+ // coverity[var_deref_op]
+ return dynamic_cast(layer)->_operation ==
+ InferenceEngine::EltwiseLayer::Sub;
+ }
+
bool isEltwiseMul() const noexcept {
IS_VALID();
if (!isEltwise()) return false;
diff --git a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
index 0c4b9f12961..b74d67710db 100644
--- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
+++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
@@ -150,6 +150,7 @@ static std::vector getCandidatesForIdentityInsertion(const CNNLayer
auto prev1 = PrevFunctionalLayer(l, 1);
switch (eltwise->_operation) {
+ case EltwiseLayer::Sub:
case EltwiseLayer::Sum:
if (!LayerInfo(prev0).has32BOutput() || !LayerInfo(prev1).has32BOutput()) {
return prevLayers;
@@ -227,7 +228,7 @@ void InsertDiagonalLayerPass::run() {
// for e mul if we have 2-4 - inputs we need to insert identity to put 4 bytes input into weights
// for e mul if we have 4-4 - inputs we need to insert 2 identities to put both 4 bytes input into weights
- if (eltwise->_operation != EltwiseLayer::Sum)
+ if (eltwise->_operation != EltwiseLayer::Sum && eltwise->_operation != EltwiseLayer::Sub)
continue;
auto prevLayer1 = CNNNetPrevLayerSkipCertain(l, 1, [](CNNLayerPtr ptr) {
diff --git a/inference-engine/src/ir_readers/ie_ir_parser.cpp b/inference-engine/src/ir_readers/ie_ir_parser.cpp
index 27222c95d65..7f1047f4f9f 100644
--- a/inference-engine/src/ir_readers/ie_ir_parser.cpp
+++ b/inference-engine/src/ir_readers/ie_ir_parser.cpp
@@ -392,7 +392,7 @@ std::shared_ptr V10Parser::createNode(const std::vector V10Parser::LayerCreator::cre
std::vector activations_beta = getParameters(dn, "activations_beta", {});
float clip = GetFloatAttr(dn, "clip", 0.f);
return std::make_shared(inputs[0], inputs[1], inputs[2], inputs[3], inputs[4], inputs[5],
- GetUIntAttr(dn, "hidden_size"), ngraph::op::LSTMWeightsFormat::IFCO,
+ GetUInt64Attr(dn, "hidden_size"), ngraph::op::LSTMWeightsFormat::IFCO,
activations, activations_alpha, activations_beta, clip);
}
@@ -1365,8 +1365,8 @@ std::shared_ptr V10Parser::LayerCreator::cre
if (dn.empty())
THROW_IE_EXCEPTION << "Cannot read parameter for " << getType() << " layer with name: " << layerParsePrms.name;
- size_t offset = GetUIntAttr(dn, "offset");
- size_t size = GetUIntAttr(dn, "size");
+ size_t offset = GetUInt64Attr(dn, "offset");
+ size_t size = GetUInt64Attr(dn, "size");
if (!weights || weights->cbuffer() == nullptr)
THROW_IE_EXCEPTION << "Cannot read network! The model requires weights data! "
diff --git a/inference-engine/src/ir_readers/ie_ir_parser.hpp b/inference-engine/src/ir_readers/ie_ir_parser.hpp
index 4b2d96e1dde..17b331cb3fb 100644
--- a/inference-engine/src/ir_readers/ie_ir_parser.hpp
+++ b/inference-engine/src/ir_readers/ie_ir_parser.hpp
@@ -166,12 +166,12 @@ private:
class XmlDeserializer : public ngraph::AttributeVisitor {
public:
explicit XmlDeserializer(const pugi::xml_node& node): node(node) {}
- void on_attribute(const std::string& name, std::string& value) override {
+ void on_adapter(const std::string& name, ngraph::ValueAccessor& value) override {
std::string val;
if (!getStrAttribute(node.child("data"), name, val)) return;
- value = val;
+ value.set(val);
}
- void on_attribute(const std::string& name, bool& value) override {
+ void on_adapter(const std::string& name, ngraph::ValueAccessor& value) override {
std::string val;
if (!getStrAttribute(node.child("data"), name, val)) return;
std::transform(val.begin(), val.end(), val.begin(), [](char ch) {
@@ -184,7 +184,7 @@ private:
bool is_false = false_names.find(val) != false_names.end();
if (!is_true && !is_false) return;
- value = is_true;
+ value.set(is_true);
}
void on_adapter(const std::string& name, ngraph::ValueAccessor& adapter) override {
std::string val;
diff --git a/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp b/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp
index d1eef3df1f0..1c6ed0f3227 100644
--- a/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp
+++ b/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp
@@ -63,12 +63,8 @@ public:
CNNLayerPtr create();
- void on_attribute(const std::string& name, std::string& value) override {
- params[name] = value;
- }
-
- void on_attribute(const std::string& name, bool& value) override {
- params[name] = value ? "true" : "false";
+ void on_adapter(const std::string& name, ::ngraph::ValueAccessor &value) override {
+ params[name] = value.get() ? "true" : "false";
}
void addSpecificCreator(const std::vector& forTypes, const CreatorFor& creator) {
@@ -417,6 +413,15 @@ InferenceEngine::details::CNNLayerCreator::CNNLayerCreator(const std::shared_ptr
res->params = params;
return res;
});
+
+ addSpecificCreator({"StaticShapeTopK"}, [](const std::shared_ptr<::ngraph::Node>& node,
+ const std::map params) -> CNNLayerPtr {
+ LayerParams attrs = {node->get_friendly_name(), "TopK",
+ details::convertPrecision(node->get_output_element_type(0))};
+ auto res = std::make_shared(attrs);
+ res->params = params;
+ return res;
+ });
}
CNNLayerPtr InferenceEngine::details::CNNLayerCreator::create() {
@@ -530,7 +535,6 @@ std::shared_ptr convertFunctionToICNNNetwork(const std::shared_p
std::make_shared>(),
std::make_shared>(),
std::make_shared>(),
- std::make_shared>(),
std::make_shared>(),
std::make_shared>(),
std::make_shared>(),
diff --git a/inference-engine/src/legacy_api/src/graph_transformer.cpp b/inference-engine/src/legacy_api/src/graph_transformer.cpp
index cb5afbff4e2..180c63c0eda 100644
--- a/inference-engine/src/legacy_api/src/graph_transformer.cpp
+++ b/inference-engine/src/legacy_api/src/graph_transformer.cpp
@@ -16,6 +16,7 @@
#include
#include
#include
+#include
#include
#include "blob_factory.hpp"
@@ -228,6 +229,12 @@ std::vector ConstTransformer::foldConstSubgraphsInternal(const std:
return remainingConstLayers;
}
+static std::vector skipConstInfer = {
+ "FakeQuantize",
+ "Quantize",
+ "CumSum" // Const inference function for CumSum is not implemented!
+};
+
const std::map ConstTransformer::getConstLayers(const std::vector& sortedLayers) {
std::map mapConstLayers;
// collect all const layers, which inputs are const layers.
@@ -235,7 +242,7 @@ const std::map ConstTransformer::getConstLayers(const std::ve
// Layers with "Shape" and "Const" type are Const by definition
if (layer->type == "Shape" || layer->type == "Const") {
mapConstLayers[layer->name] = false;
- } else if ((layer->type != "FakeQuantize") && (layer->type != "Quantize") && (!isForFakeQuantzie(*layer))) {
+ } else if (std::find(skipConstInfer.begin(), skipConstInfer.end(), layer->type) == skipConstInfer.end() && !isForFakeQuantzie(*layer)) {
bool isAllInputsConst = true;
for (auto const& data : layer->insData) {
auto creator = data.lock()->getCreatorLayer().lock();
@@ -336,7 +343,7 @@ const BlobMap ConstTransformer::getConstData(const std::map&
};
for (const auto& layer : sortedLayers) {
- if (layer->type == "FakeQuantize" || layer->type == "Quantize") {
+ if (std::find(skipConstInfer.begin(), skipConstInfer.end(), layer->type) != skipConstInfer.end()) {
continue;
}
@@ -346,13 +353,13 @@ const BlobMap ConstTransformer::getConstData(const std::map&
auto implPtr = holder.getConstInferImpl(layer->type);
if (!implPtr && !isForShape)
- if (layer->type != "FakeQuantize" && layer->type != "Quantize")
+ if (std::find(skipConstInfer.begin(), skipConstInfer.end(), layer->type) == skipConstInfer.end())
THROW_IE_EXCEPTION << "Failed to find reference implementation for `" + layer->name +
"` Layer with `" + layer->type + "` Type on constant propagation";
if (!isForShape) {
auto outputBlobs = getOutputBlobs(layer->outData);
auto inp = getInputBlobs(layer->insData, isForShape);
- if (layer->type != "FakeQuantize" && layer->type != "Quantize")
+ if (std::find(skipConstInfer.begin(), skipConstInfer.end(), layer->type) == skipConstInfer.end())
implPtr->infer(inp, layer->params, layer->blobs, outputBlobs);
for (int i = 0; i < layer->outData.size(); i++) {
std::string dataName = layer->outData[i]->getName();
diff --git a/inference-engine/src/legacy_api/src/ie_cnn_layer_builder_ngraph.cpp b/inference-engine/src/legacy_api/src/ie_cnn_layer_builder_ngraph.cpp
index 290dc13e452..0bf0115d1db 100644
--- a/inference-engine/src/legacy_api/src/ie_cnn_layer_builder_ngraph.cpp
+++ b/inference-engine/src/legacy_api/src/ie_cnn_layer_builder_ngraph.cpp
@@ -1381,24 +1381,6 @@ CNNLayer::Ptr NodeConverter::createLayer(const st
return res;
}
-template <>
-CNNLayer::Ptr NodeConverter::createLayer(const std::shared_ptr& layer) const {
- LayerParams params = {layer->get_friendly_name(), "Select", details::convertPrecision(layer->get_output_element_type(0))};
-
- auto res = std::make_shared(params);
- auto castedLayer = ngraph::as_type_ptr(layer);
- if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name;
-
- auto broadcast = castedLayer->get_auto_broadcast().m_type;
- if (broadcast == ngraph::op::AutoBroadcastType::NUMPY) {
- res->params["auto_broadcast"] = "numpy";
- } else if (broadcast == ngraph::op::AutoBroadcastType::NONE) {
- res->params["auto_broadcast"] = "none";
- }
-
- return res;
-}
-
template <>
CNNLayer::Ptr NodeConverter::createLayer(
const std::shared_ptr& layer) const {
diff --git a/inference-engine/src/mkldnn_plugin/CMakeLists.txt b/inference-engine/src/mkldnn_plugin/CMakeLists.txt
index 2bb9b37dfbd..2e7df2f0bdc 100644
--- a/inference-engine/src/mkldnn_plugin/CMakeLists.txt
+++ b/inference-engine/src/mkldnn_plugin/CMakeLists.txt
@@ -4,6 +4,10 @@
set(TARGET_NAME "MKLDNNPlugin")
+if(ENABLE_LTO)
+ ie_enable_lto()
+endif()
+
if (WIN32)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNOMINMAX")
endif()
@@ -41,6 +45,7 @@ set(LAYERS
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_resample_node.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_normalize_node.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/nodes/list.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/batch_to_space.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/broadcast.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/convert.cpp
@@ -93,6 +98,7 @@ set(LAYERS
${CMAKE_CURRENT_SOURCE_DIR}/nodes/topk.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/proposal.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/proposal_imp.cpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/nodes/cum_sum.cpp
)
foreach(LAYER ${LAYERS})
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn/desc_iterator.hpp b/inference-engine/src/mkldnn_plugin/mkldnn/desc_iterator.hpp
index 5095ea31d3a..12c9644b4cb 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn/desc_iterator.hpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn/desc_iterator.hpp
@@ -51,7 +51,7 @@ struct primitive_desc_iterator : public handle
memory::primitive_desc fetch() const {
memory::primitive_desc adesc;
- mkldnn_primitive_desc_t cdesc;
+ mkldnn_primitive_desc_t cdesc = nullptr;
cdesc = mkldnn_primitive_desc_iterator_fetch(get());
@@ -72,7 +72,7 @@ struct primitive_desc_iterator : public handle
memory::primitive_desc src_primitive_desc(size_t index = 0) const {
memory::primitive_desc adesc;
memory::primitive_desc cdesc_elem;
- mkldnn_primitive_desc_t cdesc;
+ mkldnn_primitive_desc_t cdesc = nullptr;
cdesc_elem.reset(mkldnn_primitive_desc_iterator_fetch(get()));
const_mkldnn_primitive_desc_t const_cdesc =
mkldnn_primitive_desc_query_pd(cdesc_elem.get(),
@@ -86,7 +86,7 @@ struct primitive_desc_iterator : public handle
memory::primitive_desc dst_primitive_desc(size_t index = 0) const {
memory::primitive_desc adesc;
memory::primitive_desc cdesc_elem;
- mkldnn_primitive_desc_t cdesc;
+ mkldnn_primitive_desc_t cdesc = nullptr;
cdesc_elem.reset(mkldnn_primitive_desc_iterator_fetch(get()));
const_mkldnn_primitive_desc_t const_cdesc =
mkldnn_primitive_desc_query_pd(cdesc_elem.get(),
@@ -101,7 +101,7 @@ struct primitive_desc_iterator : public handle
memory::primitive_desc diff_src_primitive_desc(size_t index = 0) const {
memory::primitive_desc adesc;
memory::primitive_desc cdesc_elem;
- mkldnn_primitive_desc_t cdesc;
+ mkldnn_primitive_desc_t cdesc = nullptr;
cdesc_elem.reset(mkldnn_primitive_desc_iterator_fetch(get()));
const_mkldnn_primitive_desc_t const_cdesc =
mkldnn_primitive_desc_query_pd(cdesc_elem.get(),
@@ -115,7 +115,7 @@ struct primitive_desc_iterator : public handle
memory::primitive_desc weights_primitive_desc(size_t index = 0) const {
memory::primitive_desc adesc;
memory::primitive_desc cdesc_elem;
- mkldnn_primitive_desc_t cdesc;
+ mkldnn_primitive_desc_t cdesc = nullptr;
cdesc_elem.reset(mkldnn_primitive_desc_iterator_fetch(get()));
const_mkldnn_primitive_desc_t const_cdesc =
mkldnn_primitive_desc_query_pd(cdesc_elem.get(),
@@ -129,7 +129,7 @@ struct primitive_desc_iterator : public handle
memory::primitive_desc diff_dst_primitive_desc(size_t index = 0) const {
memory::primitive_desc adesc;
memory::primitive_desc cdesc_elem;
- mkldnn_primitive_desc_t cdesc;
+ mkldnn_primitive_desc_t cdesc = nullptr;
cdesc_elem.reset(mkldnn_primitive_desc_iterator_fetch(get()));
const_mkldnn_primitive_desc_t const_cdesc =
mkldnn_primitive_desc_query_pd(cdesc_elem.get(),
@@ -152,7 +152,7 @@ struct primitive_desc_iterator : public handle
template
void getPrimitiveDescriptor(T& pdesc) const {
- mkldnn_primitive_desc_t cdesc;
+ mkldnn_primitive_desc_t cdesc = nullptr;
memory::primitive_desc cdescpd;
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp
index 43c0b93995f..209bcc44d61 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp
@@ -151,7 +151,9 @@ void MKLDNNEdge::allocate(const void* mem_ptr) {
auto inputDesc = getInputDesc();
auto outputDesc = getOutputDesc();
if (!MKLDNNExtensionUtils::initTensorsAreEqual(outputDesc, inputDesc) ||
- (inputDesc.getDims().size() > 0 && inputDesc.getDims()[0] != 1 && inputDesc != outputDesc))
+ (inputDesc.getDims().size() > 0 && inputDesc.getDims()[0] != 1 &&
+ (inputDesc.getPrecision() != outputDesc.getPrecision() ||
+ inputDesc.getBlockingDesc() != outputDesc.getBlockingDesc())))
THROW_IE_EXCEPTION << "Cannot allocate memory. Nodes have primitive descriptors with different formats.";
if (inputDesc.getLayout() == InferenceEngine::Layout::ANY)
THROW_IE_EXCEPTION << "Cannot get input descriptor!";
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_layers_dispatcher.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_layers_dispatcher.cpp
deleted file mode 100644
index c71790729ce..00000000000
--- a/inference-engine/src/mkldnn_plugin/mkldnn_layers_dispatcher.cpp
+++ /dev/null
@@ -1,22 +0,0 @@
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "mkldnn_layers_dispatcher.hpp"
-#include
-#include "nodes/list.hpp"
-#include
-
-using namespace InferenceEngine;
-
-namespace MKLDNNPlugin {
-
-void addDefaultExtensions(MKLDNNExtensionManager::Ptr mngr) {
- if (!mngr)
- THROW_IE_EXCEPTION << "Cannot add default extensions! Extension manager is empty.";
-
- auto defaultExtensions = std::make_shared();
- mngr->AddExtension(defaultExtensions);
-}
-
-} // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_layers_dispatcher.hpp b/inference-engine/src/mkldnn_plugin/mkldnn_layers_dispatcher.hpp
deleted file mode 100644
index 4c7e5b6cb67..00000000000
--- a/inference-engine/src/mkldnn_plugin/mkldnn_layers_dispatcher.hpp
+++ /dev/null
@@ -1,13 +0,0 @@
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include "mkldnn_extension_mngr.h"
-
-namespace MKLDNNPlugin {
-
-void addDefaultExtensions(MKLDNNExtensionManager::Ptr mngr);
-
-} // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp
index ad1bc551220..1ea2ecd2c0f 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp
@@ -23,11 +23,14 @@ MKLDNNMemory::MKLDNNMemory(const engine& eng) : eng(eng) {}
size_t MKLDNNMemory::GetSize() const {
uint8_t itemSize = MKLDNNExtensionUtils::sizeOfDataType(mkldnn::memory::data_type(GetDataType()));
+ return GetElementsCount() * itemSize;
+}
+size_t MKLDNNMemory::GetElementsCount() const {
auto desc = GetDescriptor();
std::vector dims(desc.data.layout_desc.blocking.padding_dims,
desc.data.layout_desc.blocking.padding_dims + desc.data.ndims);
- return std::accumulate(std::begin(dims), std::end(dims), (size_t) 1, std::multiplies()) * itemSize;
+ return std::accumulate(std::begin(dims), std::end(dims), (size_t) 1, std::multiplies());
}
void MKLDNNMemory::Create(memory::dims dims, memory::data_type data_type, memory::format format, const void* data) {
@@ -182,6 +185,7 @@ bool MKLDNNMemory::isConsistant(memory::dims dims, memory::format format) {
case f::OhIw16o4i:
case f::OIhw4i16o4i:
case f::OhIw8o4i:
+ case f::IOhw16o16i:
ndims = 4; break;
// DHW
case f::ncdhw:
@@ -411,6 +415,7 @@ std::string MKLDNNMemory::formatToString(memory::format fmt) {
case memory::OhIw8o4i: return "OhIw8o4i";
case memory::OhIw16o4i: return "OhIw16o4i";
case memory::OIhw4i16o4i: return "OIhw4i16o4i";
+ case memory::IOhw16o16i: return "IOhw16o16i";
case memory::oidhw: return "oidhw";
case memory::dhwio: return "dhwio";
@@ -718,6 +723,33 @@ MKLDNNMemoryDesc::operator InferenceEngine::TensorDesc() const {
blkDims.push_back(16);
layout = Layout::BLOCKED;
break;
+ case memory::OIhw8o8i:
+ order = {0, 1, 2, 3, 0, 1};
+ blkDims = dims;
+ blkDims[0] = blkDims[0] / 8 + (blkDims[0] % 8 ? 1 : 0);
+ blkDims[1] = blkDims[1] / 8 + (blkDims[1] % 8 ? 1 : 0);
+ blkDims.push_back(8);
+ blkDims.push_back(8);
+ layout = Layout::BLOCKED;
+ break;
+ case memory::OIhw16o16i:
+ order = {0, 1, 2, 3, 0, 1};
+ blkDims = dims;
+ blkDims[0] = blkDims[0] / 16 + (blkDims[0] % 16 ? 1 : 0);
+ blkDims[1] = blkDims[1] / 16 + (blkDims[1] % 16 ? 1 : 0);
+ blkDims.push_back(16);
+ blkDims.push_back(16);
+ layout = Layout::BLOCKED;
+ break;
+ case memory::IOhw16o16i:
+ order = {1, 0, 2, 3, 0, 1};
+ blkDims = dims;
+ blkDims[0] = blkDims[0] / 16 + (blkDims[0] % 16 ? 1 : 0);
+ blkDims[1] = blkDims[1] / 16 + (blkDims[1] % 16 ? 1 : 0);
+ blkDims.push_back(16);
+ blkDims.push_back(16);
+ layout = Layout::BLOCKED;
+ break;
case memory::OIdhw8i8o:
order = {0, 1, 2, 3, 4, 1, 0};
blkDims = dims;
@@ -736,8 +768,26 @@ MKLDNNMemoryDesc::operator InferenceEngine::TensorDesc() const {
blkDims.push_back(16);
layout = Layout::BLOCKED;
break;
+ case memory::OIdhw8o8i:
+ order = {0, 1, 2, 3, 4, 1, 0};
+ blkDims = dims;
+ blkDims[0] = blkDims[0] / 8 + (blkDims[0] % 8 ? 1 : 0);
+ blkDims[1] = blkDims[1] / 8 + (blkDims[1] % 8 ? 1 : 0);
+ blkDims.push_back(8);
+ blkDims.push_back(8);
+ layout = Layout::BLOCKED;
+ break;
+ case memory::OIdhw16o16i:
+ order = {0, 1, 2, 3, 4, 0, 1};
+ blkDims = dims;
+ blkDims[0] = blkDims[0] / 16 + (blkDims[0] % 16 ? 1 : 0);
+ blkDims[1] = blkDims[1] / 16 + (blkDims[1] % 16 ? 1 : 0);
+ blkDims.push_back(16);
+ blkDims.push_back(16);
+ layout = Layout::BLOCKED;
+ break;
case memory::gOIhw4o4i:
- order = {0, 1, 2, 3, 4, 2, 1};
+ order = {0, 1, 2, 3, 4, 1, 2};
blkDims = dims;
blkDims[1] = blkDims[1] / 4 + (blkDims[1] % 4 ? 1 : 0);
blkDims[2] = blkDims[2] / 4 + (blkDims[2] % 4 ? 1 : 0);
@@ -754,6 +804,15 @@ MKLDNNMemoryDesc::operator InferenceEngine::TensorDesc() const {
blkDims.push_back(8);
layout = Layout::BLOCKED;
break;
+ case memory::gOIhw8o8i:
+ order = {0, 1, 2, 3, 4, 1, 2};
+ blkDims = dims;
+ blkDims[1] = blkDims[1] / 8 + (blkDims[1] % 8 ? 1 : 0);
+ blkDims[2] = blkDims[2] / 8 + (blkDims[2] % 8 ? 1 : 0);
+ blkDims.push_back(8);
+ blkDims.push_back(8);
+ layout = Layout::BLOCKED;
+ break;
case memory::gOIhw16i16o:
order = {0, 1, 2, 3, 4, 2, 1};
blkDims = dims;
@@ -763,6 +822,15 @@ MKLDNNMemoryDesc::operator InferenceEngine::TensorDesc() const {
blkDims.push_back(16);
layout = Layout::BLOCKED;
break;
+ case memory::gOIhw16o16i:
+ order = {0, 1, 2, 3, 4, 1, 2};
+ blkDims = dims;
+ blkDims[1] = blkDims[1] / 16 + (blkDims[1] % 16 ? 1 : 0);
+ blkDims[2] = blkDims[2] / 16 + (blkDims[2] % 16 ? 1 : 0);
+ blkDims.push_back(16);
+ blkDims.push_back(16);
+ layout = Layout::BLOCKED;
+ break;
case memory::OhIw8o4i:
order = {0, 2, 1, 3, 0, 1};
blkDims = dims;
@@ -1067,6 +1135,16 @@ MKLDNNMemoryDesc::MKLDNNMemoryDesc(const TensorDesc& tDesc):
} else if (blkdDims[4] == 16 && blkdDims[5] == 16) {
mkldnnFormat = memory::format::OIhw16i16o;
}
+ } else if (order.size() == 6 && order[0] == 0 && order[1] == 1 && order[2] == 2 && order[3] == 3 && order[4] == 0 && order[5] == 1) {
+ if (blkdDims[4] == 8 && blkdDims[5] == 8) {
+ mkldnnFormat = memory::format::OIhw8o8i;
+ } else if (blkdDims[4] == 16 && blkdDims[5] == 16) {
+ mkldnnFormat = memory::format::OIhw16o16i;
+ }
+ } else if (order.size() == 6 && order[0] == 1 && order[1] == 0 && order[2] == 2 && order[3] == 3 && order[4] == 0 && order[5] == 1) {
+ if (blkdDims[4] == 16 && blkdDims[5] == 16) {
+ mkldnnFormat = memory::format::IOhw16o16i;
+ }
} else if (order.size() == 5 && order[0] == 0 && order[1] == 1 && order[2] == 2 && order[3] == 3 && order[4] == 0) {
if (blkdDims[4] == 8) {
mkldnnFormat = memory::format::Ohwi8o;
@@ -1122,6 +1200,13 @@ MKLDNNMemoryDesc::MKLDNNMemoryDesc(const TensorDesc& tDesc):
} else if (blkdDims[6] == 16) {
mkldnnFormat = memory::format::OIdhw16i16o;
}
+ } else if (order.size() == 7 &&
+ order[0] == 0 && order[1] == 1 && order[2] == 2 && order[3] == 3 && order[4] == 4 && order[5] == 0 && order[6] == 1) {
+ if (blkdDims[6] == 8) {
+ mkldnnFormat = memory::format::OIdhw8o8i;
+ } else if (blkdDims[6] == 16) {
+ mkldnnFormat = memory::format::OIdhw16o16i;
+ }
} else if (order.size() == 7 &&
order[0] == 0 && order[1] == 2 && order[2] == 3 && order[3] == 1 && order[4] == 4 && order[5] == 0 && order[6] == 1) {
if (blkdDims[5] == 8) {
@@ -1136,12 +1221,21 @@ MKLDNNMemoryDesc::MKLDNNMemoryDesc(const TensorDesc& tDesc):
} else if (order.size() == 7 &&
order[0] == 0 && order[1] == 1 && order[2] == 2 && order[3] == 3 && order[4] == 4 && order[5] == 2 && order[6] == 1) {
if (blkdDims[6] == 4) {
- mkldnnFormat = memory::format::gOIhw4o4i;
+ mkldnnFormat = memory::format::gOIhw4i4o;
} else if (blkdDims[6] == 8) {
mkldnnFormat = memory::format::gOIhw8i8o;
} else if (blkdDims[6] == 16) {
mkldnnFormat = memory::format::gOIhw16i16o;
}
+ } else if (order.size() == 7 &&
+ order[0] == 0 && order[1] == 1 && order[2] == 2 && order[3] == 3 && order[4] == 4 && order[5] == 1 && order[6] == 2) {
+ if (blkdDims[6] == 4) {
+ mkldnnFormat = memory::format::gOIhw4o4i;
+ } else if (blkdDims[6] == 8) {
+ mkldnnFormat = memory::format::gOIhw8o8i;
+ } else if (blkdDims[6] == 16) {
+ mkldnnFormat = memory::format::gOIhw16o16i;
+ }
} else if (order.size() == 7 &&
order[0] == 0 && order[1] == 1 && order[2] == 3 && order[3] == 2 && order[4] == 4 && order[5] == 1 && order[6] == 2) {
if (blkdDims[5] == 8 && blkdDims[6] == 4) {
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_memory.h b/inference-engine/src/mkldnn_plugin/mkldnn_memory.h
index 63b46c9e531..4b0d024d223 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_memory.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_memory.h
@@ -87,6 +87,7 @@ public:
}
size_t GetSize() const;
+ size_t GetElementsCount() const;
mkldnn::memory::format GetFormat() const {
return static_cast(prim->get_primitive_desc().desc().data.format);
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
index e40475fa40d..e5afa640662 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
@@ -1017,12 +1017,17 @@ void MKLDNNNode::initOptimalPrimitiveDescriptor() {
auto config = selected_pd->getConfig();
if (!isInitConfig(config)) {
for (size_t i = 0; i < config.inConfs.size(); i++) {
- config.inConfs[i].desc = getConfiguredInputDesc(config, i);
+ // TensorDescriptor constructor which is called inside getConfiguredInputDesc incorrectly computes offset field.
+ // What's why MKLDNNMemoryDesc routine is used to reinitialize TD with expected offset values.
+ config.inConfs[i].desc = MKLDNNMemoryDesc(getConfiguredInputDesc(config, i));
}
for (size_t i = 0; i < config.outConfs.size(); i++) {
- config.outConfs[i].desc = getConfiguredOutputDesc(config, i);
+ // TensorDescriptor constructor which is called inside getConfiguredOutputDesc incorrectly computes offset field.
+ // What's why MKLDNNMemoryDesc routine is used to reinitialize TD with expected offset values.
+ config.outConfs[i].desc = MKLDNNMemoryDesc(getConfiguredOutputDesc(config, i));
}
+
initDescriptor(config);
} else if (getType() != RNNSeq && getType() != RNNCell) {
initDescriptor(config);
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.h b/inference-engine/src/mkldnn_plugin/mkldnn_node.h
index 46c3d26fe02..02c5083863c 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_node.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.h
@@ -370,7 +370,10 @@ public:
if (srcDescs.empty() || selectedDescs.empty())
return false;
for (size_t i = 0; i < srcDescs.size() && i < selectedDescs.size(); i++) {
- if (srcDescs[i] != selectedDescs[i].desc && srcDescs[i].getLayout() != InferenceEngine::Layout::ANY)
+ if (!(srcDescs[i].getBlockingDesc() == selectedDescs[i].desc.getBlockingDesc() &&
+ srcDescs[i].getPrecision() == selectedDescs[i].desc.getPrecision() &&
+ srcDescs[i].getDims() == selectedDescs[i].desc.getDims()) &&
+ srcDescs[i].getLayout() != InferenceEngine::Layout::ANY)
return false;
}
return true;
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
index f626c75f0bf..95c7a38a59c 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
@@ -5,7 +5,6 @@
#include "ie_metric_helpers.hpp"
#include "mkldnn_plugin.h"
#include "mkldnn_extension_mngr.h"
-#include "mkldnn_layers_dispatcher.hpp"
#include "mkldnn_weights_cache.hpp"
#include
#include
@@ -15,6 +14,7 @@
#include
#include
#include
+#include
#include "convert_function_to_cnn_network.hpp"
#include
@@ -23,6 +23,7 @@
#include
#include
#include
+#include
#include
#if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
@@ -40,7 +41,7 @@ using namespace InferenceEngine;
Engine::Engine() {
_pluginName = "CPU";
- addDefaultExtensions(extensionManager);
+ extensionManager->AddExtension(std::make_shared());
}
Engine::~Engine() {
@@ -83,7 +84,8 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::ICNNNetwork &network, const st
const auto transformations_callback = [](const std::shared_ptr &node) -> bool {
return std::dynamic_pointer_cast(node) ||
std::dynamic_pointer_cast(node) ||
- std::dynamic_pointer_cast(node);
+ std::dynamic_pointer_cast(node) ||
+ std::dynamic_pointer_cast(node);
};
auto nGraphFunc = clonedNetwork->getFunction();
// Disable shape inference (WA for generic operations)
diff --git a/inference-engine/src/mkldnn_plugin/nodes/argmax.cpp b/inference-engine/src/mkldnn_plugin/nodes/argmax.cpp
index c7134eba5e0..449168f504c 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/argmax.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/argmax.cpp
@@ -2,7 +2,6 @@
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include "argmax_imp.hpp"
@@ -49,7 +48,7 @@ private:
argmax_conf conf;
};
-REG_FACTORY_FOR(ImplFactory, ArgMax);
+REG_FACTORY_FOR(ArgMaxImpl, ArgMax);
} // namespace Cpu
} // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/argmax_imp.cpp b/inference-engine/src/mkldnn_plugin/nodes/argmax_imp.cpp
index 4fabb6010d1..b90851387d6 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/argmax_imp.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/argmax_imp.cpp
@@ -4,6 +4,7 @@
#include "argmax_imp.hpp"
+#include
#include
#include
#include
@@ -181,7 +182,7 @@ void argmax_many_classes_has_axis(const float* src_data, float* dst_data, Shape
vmask_type vmask;
int s_index = i0 * dim * after_num + ib1 * block_size;
- memset(reinterpret_cast(&vmax_values[0]), 0, sizeof(vmax_values));
+ std::memset(reinterpret_cast(&vmax_values[0]), 0, sizeof(vmax_values));
auto vswap_func = [&](int index1, int index2) {
vtmp = vmax_values[index1];
diff --git a/inference-engine/src/mkldnn_plugin/nodes/base.hpp b/inference-engine/src/mkldnn_plugin/nodes/base.hpp
index 2f9014ad081..db0268846f8 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/base.hpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/base.hpp
@@ -6,7 +6,7 @@
#include
#include "ie_util_internal.hpp"
-#include "list.hpp"
+#include "nodes/list.hpp"
#include
#include
@@ -176,6 +176,21 @@ protected:
IE_SUPPRESS_DEPRECATED_END
+template
+inline void extRegister(MKLDNNExtensions * extInstance, const char * __type) {
+ IE_SUPPRESS_DEPRECATED_START
+ extInstance->AddExt(__type,
+ [](const CNNLayer* layer) -> InferenceEngine::ILayerImplFactory* {
+ return new __prim(layer);
+ });
+ IE_SUPPRESS_DEPRECATED_END
+}
+
+#define REG_FACTORY_FOR(__prim, __type) \
+ void __prim ## __type(MKLDNNExtensions * extInstance) { \
+ extRegister>(extInstance, #__type); \
+ }
+
} // namespace Cpu
} // namespace Extensions
} // namespace InferenceEngine
diff --git a/inference-engine/src/mkldnn_plugin/nodes/batch_to_space.cpp b/inference-engine/src/mkldnn_plugin/nodes/batch_to_space.cpp
index 2d0d3fc2948..60e15726fc9 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/batch_to_space.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/batch_to_space.cpp
@@ -2,7 +2,6 @@
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include
@@ -219,7 +218,7 @@ private:
std::vector _crops_end;
};
-REG_FACTORY_FOR(ImplFactory, BatchToSpace);
+REG_FACTORY_FOR(BatchToSpaceImpl, BatchToSpace);
} // namespace Cpu
} // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/broadcast.cpp b/inference-engine/src/mkldnn_plugin/nodes/broadcast.cpp
index cdaedb24d64..2e784c4b85c 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/broadcast.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/broadcast.cpp
@@ -2,7 +2,6 @@
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include
@@ -129,7 +128,7 @@ private:
const size_t BROADCAST_SHAPE = 1;
};
-REG_FACTORY_FOR(ImplFactory, Broadcast);
+REG_FACTORY_FOR(BroadcastImpl, Broadcast);
} // namespace Cpu
} // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/bucketize.cpp b/inference-engine/src/mkldnn_plugin/nodes/bucketize.cpp
index 278cd53b68b..bae370b59f0 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/bucketize.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/bucketize.cpp
@@ -2,7 +2,6 @@
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include
@@ -123,7 +122,7 @@ private:
bool with_bins = false;
};
-REG_FACTORY_FOR(ImplFactory, Bucketize);
+REG_FACTORY_FOR(BucketizeImpl, Bucketize);
} // namespace Cpu
} // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/common/uni_simd.h b/inference-engine/src/mkldnn_plugin/nodes/common/uni_simd.h
index 7f460dd9faa..bd55bb86294 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/common/uni_simd.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/common/uni_simd.h
@@ -14,8 +14,16 @@ namespace Cpu {
#if defined(HAVE_AVX512F)
namespace AVX512F {
+ static inline __m512 _mm_uni_any_ps() {
+ return __m512{};
+ }
+
+ static inline __m512i _mm_uni_any_epi32() {
+ return __m512i{};
+ }
+
static inline __m512 _mm_uni_loadu_ps(const float* psrc) {
- return _mm512_loadu_ps(psrc);
+ return _mm512_mask_loadu_ps(_mm_uni_any_ps(), (__mmask16)-1, psrc);
}
static inline void _mm_uni_storeu_ps(float* pdst, const __m512& vec) {
@@ -62,8 +70,12 @@ namespace AVX512F {
return _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vec0), _mm512_castps_si512(vec1)));
}
+ static inline __m512i _mm_uni_set1_epi32(int value) {
+ return _mm512_mask_set1_epi32(_mm_uni_any_epi32(), (__mmask16)-1, value);
+ }
+
static inline __m512 _mm_uni_blendv_ps(__m512 vec0, __m512 vec1, __m512 vmask) {
- return _mm512_mask_blend_ps(_mm512_cmpneq_epi32_mask(_mm512_castps_si512(vmask), _mm512_set1_epi32(0)), vec0, vec1);
+ return _mm512_mask_blend_ps(_mm512_cmpneq_epi32_mask(_mm512_castps_si512(vmask), _mm_uni_set1_epi32(0)), vec0, vec1);
}
static inline __m512 _mm_uni_blendv_ps(__m512 vec0, __m512 vec1, __mmask16 vmask) {
@@ -90,10 +102,6 @@ namespace AVX512F {
return _mm512_add_epi32(vec0, vec1);
}
- static inline __m512i _mm_uni_set1_epi32(int value) {
- return _mm512_set1_epi32(value);
- }
-
static inline __m512i _mm_uni_slli_epi32(__m512i vec, int value) {
return _mm512_sll_epi32(vec, _mm_set1_epi64x(value));
}
@@ -119,7 +127,7 @@ namespace AVX512F {
}
static inline __m512 _mm_uni_cvtepi32_ps(__m512i vec) {
- return _mm512_cvtepi32_ps(vec);
+ return _mm512_mask_cvtepi32_ps(_mm_uni_any_ps(), (__mmask16)-1, vec);
}
} // namespace AVX512F
#elif defined(HAVE_AVX2)
diff --git a/inference-engine/src/mkldnn_plugin/nodes/convert.cpp b/inference-engine/src/mkldnn_plugin/nodes/convert.cpp
index f171a142828..5e4bd96fb46 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/convert.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/convert.cpp
@@ -2,7 +2,6 @@
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include
@@ -129,7 +128,7 @@ private:
std::string precision;
};
-REG_FACTORY_FOR(ImplFactory, Convert);
+REG_FACTORY_FOR(ConvertImpl, Convert);
} // namespace Cpu
} // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/ctc_greedy.cpp b/inference-engine/src/mkldnn_plugin/nodes/ctc_greedy.cpp
index 372b237c7c4..717af9f9e3f 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/ctc_greedy.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/ctc_greedy.cpp
@@ -2,7 +2,6 @@
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include
@@ -86,7 +85,7 @@ public:
}
};
-REG_FACTORY_FOR(ImplFactory, CTCGreedyDecoder);
+REG_FACTORY_FOR(CTCGreedyDecoderImpl, CTCGreedyDecoder);
} // namespace Cpu
} // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/cum_sum.cpp b/inference-engine/src/mkldnn_plugin/nodes/cum_sum.cpp
new file mode 100644
index 00000000000..03a4f2dbc6f
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/cum_sum.cpp
@@ -0,0 +1,230 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "list.hpp"
+#include "base.hpp"
+
+#include
+#include
+#include "ie_parallel.hpp"
+#include "ie_precision.hpp"
+
+namespace InferenceEngine {
+namespace Extensions {
+namespace Cpu {
+
+class CumSumImpl: public ExtLayerBase {
+ enum { CUM_SUM_DATA, AXIS, numOfInputs };
+ enum { N, C, D, H, W, numOfDims };
+ bool exclusive;
+ bool reverse;
+ size_t axis = 0;
+ std::vector shape5d;
+
+public:
+ explicit CumSumImpl(const CNNLayer* layer) {
+ try {
+ layerName = layer->name;
+ if ((layer->insData.size() != numOfInputs && layer->insData.size() != (numOfInputs - 1)) || layer->outData.size() != 1)
+ THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "' has incorrect number of input/output edges!";
+
+ const auto &dataTensor = layer->insData[CUM_SUM_DATA].lock()->getTensorDesc();
+ const auto &dataShape = dataTensor.getDims();
+ if (dataShape.size() < 1 || dataShape.size() > 5) {
+ THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "' doesn't support 'data' input tensor with rank: " << dataShape.size();
+ }
+
+ exclusive = layer->GetParamAsBool("exclusive", false);
+ reverse = layer->GetParamAsBool("reverse", false);
+
+ const auto& dataPrecision = dataTensor.getPrecision();
+ if (dataPrecision != Precision::I8 && dataPrecision != Precision::U8 && dataPrecision != Precision::I16 && dataPrecision != Precision::I32 &&
+ dataPrecision != Precision::FP32 && dataPrecision != Precision::I64 && dataPrecision != Precision::U64 && dataPrecision != Precision::BF16)
+ THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "' has unsupported 'data' input precision: " << dataPrecision.name();
+
+ if (layer->insData.size() == numOfInputs) {
+ const auto& axisTensor = layer->insData[AXIS].lock()->getTensorDesc();
+ const auto& axisTensorPrec = layer->insData[AXIS].lock()->getTensorDesc().getPrecision();
+ if (axisTensorPrec != Precision::I32 && axisTensorPrec != Precision::I64)
+ THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "' has unsupported 'axis' input precision: " << axisTensorPrec.name();
+
+ const auto axisTensorRank = axisTensor.getDims().size();
+ if (axisTensorRank != 0)
+ THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "' doesn't support 'axis' input tensor with rank: " << axisTensorRank;
+ }
+
+ if (dataShape != layer->outData[0]->getTensorDesc().getDims())
+ THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "' has different 'data' input and output dimensions";
+
+ shape5d = get5dShape(dataShape);
+
+ LayerConfig config;
+ for (size_t i = 0; i < layer->insData.size(); i++) {
+ DataConfig inConfig;
+ inConfig.inPlace = -1;
+ inConfig.constant = false;
+
+ Precision inPrecision = layer->insData[i].lock()->getTensorDesc().getPrecision();
+ if (inPrecision == Precision::BF16)
+ inPrecision = Precision::FP32;
+ const SizeVector& inDims = layer->insData[i].lock()->getTensorDesc().getDims();
+ inConfig.desc = TensorDesc(inPrecision, inDims, InferenceEngine::TensorDesc::getLayoutByDims(inDims));
+
+ config.inConfs.push_back(inConfig);
+ }
+ DataConfig outConfig;
+ outConfig.inPlace = -1;
+ outConfig.constant = false;
+ Precision outPrecision = layer->insData[CUM_SUM_DATA].lock()->getTensorDesc().getPrecision();
+ if (outPrecision == Precision::BF16)
+ outPrecision = Precision::FP32;
+ const SizeVector& outDims = layer->outData[0]->getTensorDesc().getDims();
+ outConfig.desc = TensorDesc(outPrecision, outDims, InferenceEngine::TensorDesc::getLayoutByDims(outDims));
+
+ config.outConfs.push_back(outConfig);
+
+ config.dynBatchSupport = false;
+ confs.push_back(config);
+ } catch (InferenceEngine::details::InferenceEngineException &ex) {
+ errorMsg = ex.what();
+ }
+ }
+
+ StatusCode execute(std::vector& inputs, std::vector& outputs, ResponseDesc *resp) noexcept override {
+ if (inputs.size() == numOfInputs)
+ axis = getAxis(inputs[AXIS], inputs[CUM_SUM_DATA]);
+
+ const auto &dataPrecision = inputs[CUM_SUM_DATA]->getTensorDesc().getPrecision();
+ switch (dataPrecision) {
+ case Precision::I8 : { execImpl(inputs[CUM_SUM_DATA], outputs[0]); break; }
+ case Precision::U8 : { execImpl(inputs[CUM_SUM_DATA], outputs[0]); break; }
+ case Precision::I16 : { execImpl(inputs[CUM_SUM_DATA], outputs[0]); break; }
+ case Precision::I32 : { execImpl(inputs[CUM_SUM_DATA], outputs[0]); break; }
+ case Precision::FP32 : { execImpl(inputs[CUM_SUM_DATA], outputs[0]); break; }
+ case Precision::I64 : { execImpl(inputs[CUM_SUM_DATA], outputs[0]); break; }
+ case Precision::U64 : { execImpl(inputs[CUM_SUM_DATA], outputs[0]); break; }
+ default : {
+ if (resp) {
+ std::string errorMsg = "CumSum layer with name '" + layerName + "' has unsupported 'data' input precision: " + dataPrecision.name();
+ errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
+ }
+ return GENERAL_ERROR;
+ }
+ }
+ return OK;
+ }
+
+private:
+ template
+ void execImpl(const Blob::CPtr& _input, const Blob::Ptr& _output) {
+ const auto *input = _input->cbuffer().as() + _input->getTensorDesc().getBlockingDesc().getOffsetPadding();
+ auto *output = _output->buffer().as() + _output->getTensorDesc().getBlockingDesc().getOffsetPadding();
+ const size_t offset = _input->getTensorDesc().getBlockingDesc().getStrides()[axis];
+
+ if (reverse) {
+ if (exclusive) {
+ cumSum(input, output, offset);
+ } else {
+ cumSum(input, output, offset);
+ }
+ } else {
+ if (exclusive) {
+ cumSum(input, output, offset);
+ } else {
+ cumSum(input, output, offset);
+ }
+ }
+ }
+
+ template
+ void cumSum(const dataType *input, dataType *output, const size_t &offset) {
+ std::vector iterationRange(numOfDims - 1);
+ size_t j = 0;
+ for (size_t i = 0; i < shape5d.size(); i++) {
+ if (i == axis)
+ continue;
+ iterationRange[j++] = shape5d[i];
+ }
+ parallel_for4d(iterationRange[0], iterationRange[1], iterationRange[2], iterationRange[3], [&](size_t ir0, size_t ir1, size_t ir2, size_t ir3) {
+ std::vector forStartOffset;
+ forStartOffset.push_back(ir0); forStartOffset.push_back(ir1); forStartOffset.push_back(ir2); forStartOffset.push_back(ir3);
+ forStartOffset.insert(forStartOffset.begin() + axis, 0);
+ size_t startOffset = getStartOffset(forStartOffset);
+
+ const dataType *inputStart = input + startOffset;
+ dataType *outputStart = output + startOffset;
+
+ if (reverse) {
+ if (exclusive) {
+ outputStart[offset*(shape5d[axis] - 1)] = 0;
+ for (int64_t i = shape5d[axis] - 2; i >= 0; i--) {
+ outputStart[i*offset] = inputStart[(i+1)*offset] + outputStart[(i+1)*offset];
+ }
+ } else {
+ outputStart[offset*(shape5d[axis] - 1)] = inputStart[offset*(shape5d[axis] - 1)];
+ for (int64_t i = shape5d[axis] - 2; i >= 0; i--) {
+ outputStart[i*offset] = inputStart[i*offset] + outputStart[(i+1)*offset];
+ }
+ }
+ } else {
+ if (exclusive) {
+ outputStart[0] = 0;
+ for (size_t i = 1; i < shape5d[axis]; i++) {
+ outputStart[i*offset] = inputStart[(i-1)*offset] + outputStart[(i-1)*offset];
+ }
+ } else {
+ outputStart[0] = inputStart[0];
+ for (size_t i = 1; i < shape5d[axis]; i++) {
+ outputStart[i*offset] = inputStart[i*offset] + outputStart[(i-1)*offset];
+ }
+ }
+ }
+ });
+ }
+
+ size_t getStartOffset(std::vector &forStartOffset) {
+ return forStartOffset[N]*shape5d[C]*shape5d[D]*shape5d[H]*shape5d[W] + forStartOffset[C]*shape5d[D]*shape5d[H]*shape5d[W] +
+ forStartOffset[D]*shape5d[H]*shape5d[W] + forStartOffset[H]*shape5d[W] + forStartOffset[W];
+ }
+
+ size_t getAxis(const Blob::CPtr& _axis, const Blob::CPtr& _data) {
+ const auto& axisPrecision = _axis->getTensorDesc().getPrecision();
+ const int64_t dataShapeSize = static_cast(_data->getTensorDesc().getDims().size());
+ int64_t axisValueFromBlob;
+ switch (axisPrecision) {
+ case Precision::I32 : {
+ const auto *axisPtr = _axis->cbuffer().as();
+ axisValueFromBlob = static_cast(axisPtr[0]);
+ break;
+ }
+ case Precision::I64 : {
+ const auto *axisPtr = _axis->cbuffer().as();
+ axisValueFromBlob = axisPtr[0];
+ break;
+ }
+ default : {
+ THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "' doesn't support 'axis' input with precision: " << axisPrecision.name();
+ }
+ }
+ if (axisValueFromBlob < -dataShapeSize || axisValueFromBlob > dataShapeSize - 1)
+ THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "' has axis with a value out of range: " << axisValueFromBlob;
+ return axisValueFromBlob >= 0 ? axisValueFromBlob : (axisValueFromBlob + dataShapeSize);
+ }
+
+ std::vector get5dShape(const SizeVector& dims) {
+ std::vector shape5d(numOfDims, 1);
+ for (size_t i = 0; i < dims.size(); i++)
+ shape5d[i] = dims[i];
+ return shape5d;
+ }
+
+private:
+ std::string layerName;
+};
+
+REG_FACTORY_FOR(CumSumImpl, CumSum);
+
+} // namespace Cpu
+} // namespace Extensions
+} // namespace InferenceEngine
diff --git a/inference-engine/src/mkldnn_plugin/nodes/depth_to_space.cpp b/inference-engine/src/mkldnn_plugin/nodes/depth_to_space.cpp
index f3b208b0994..69d9024029f 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/depth_to_space.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/depth_to_space.cpp
@@ -2,7 +2,6 @@
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include
@@ -118,7 +117,7 @@ private:
size_t ownStrides[CNTR_SIZE];
};
-REG_FACTORY_FOR(ImplFactory, DepthToSpace);
+REG_FACTORY_FOR(DepthToSpaceImpl, DepthToSpace);
} // namespace Cpu
} // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/detectionoutput.cpp b/inference-engine/src/mkldnn_plugin/nodes/detectionoutput.cpp
index f80b48e29af..e5a7c09956a 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/detectionoutput.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/detectionoutput.cpp
@@ -2,7 +2,6 @@
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include
@@ -604,7 +603,7 @@ void DetectionOutputImpl::nms_mx(const float* conf_data,
}
}
-REG_FACTORY_FOR(ImplFactory, DetectionOutput);
+REG_FACTORY_FOR(DetectionOutputImpl, DetectionOutput);
} // namespace Cpu
} // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/detectionoutput_onnx.cpp b/inference-engine/src/mkldnn_plugin/nodes/detectionoutput_onnx.cpp
index ca8c8876343..c1f75770669 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/detectionoutput_onnx.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/detectionoutput_onnx.cpp
@@ -2,7 +2,6 @@
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include
@@ -392,7 +391,7 @@ private:
-REG_FACTORY_FOR(ImplFactory, ExperimentalDetectronDetectionOutput);
+REG_FACTORY_FOR(ExperimentalDetectronDetectionOutputImpl, ExperimentalDetectronDetectionOutput);
} // namespace Cpu
} // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/fill.cpp b/inference-engine/src/mkldnn_plugin/nodes/fill.cpp
index c8d347244ed..e08897184a1 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/fill.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/fill.cpp
@@ -2,7 +2,6 @@
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include
@@ -121,7 +120,7 @@ private:
const size_t FILL_VALUE = 1;
};
-REG_FACTORY_FOR(ImplFactory, Fill);
+REG_FACTORY_FOR(FillImpl, Fill);
} // namespace Cpu
} // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/gather.cpp b/inference-engine/src/mkldnn_plugin/nodes/gather.cpp
index e624bf03651..cd7e0378f07 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/gather.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/gather.cpp
@@ -2,7 +2,6 @@
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include
@@ -148,7 +147,7 @@ private:
};
-REG_FACTORY_FOR(ImplFactory, Gather);
+REG_FACTORY_FOR(GatherImpl, Gather);
} // namespace Cpu
} // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/gather_tree.cpp b/inference-engine/src/mkldnn_plugin/nodes/gather_tree.cpp
index 5bb17c8a0bc..5e420b22ddd 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/gather_tree.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/gather_tree.cpp
@@ -2,7 +2,6 @@
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include
@@ -150,7 +149,7 @@ private:
InferenceEngine::Precision precision;
};
-REG_FACTORY_FOR(ImplFactory, GatherTree);
+REG_FACTORY_FOR(GatherTreeImpl, GatherTree);
} // namespace Cpu
} // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/grn.cpp b/inference-engine/src/mkldnn_plugin/nodes/grn.cpp
index 46e647d7206..b5e4e214965 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/grn.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/grn.cpp
@@ -2,7 +2,6 @@
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include
@@ -58,7 +57,7 @@ private:
float bias = 1.0f;
};
-REG_FACTORY_FOR(ImplFactory, GRN);
+REG_FACTORY_FOR(GRNImpl, GRN);
} // namespace Cpu
} // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/interp.cpp b/inference-engine/src/mkldnn_plugin/nodes/interp.cpp
index 02138570a1b..873575b8be4 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/interp.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/interp.cpp
@@ -2,7 +2,6 @@
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include
#include
@@ -434,7 +433,7 @@ private:
}
};
-REG_FACTORY_FOR(ImplFactory, Interp);
+REG_FACTORY_FOR(InterpImpl, Interp);
} // namespace Cpu
} // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/list.cpp b/inference-engine/src/mkldnn_plugin/nodes/list.cpp
new file mode 100644
index 00000000000..e017bae6c38
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/list.cpp
@@ -0,0 +1,29 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "nodes/list.hpp"
+
+namespace InferenceEngine {
+namespace Extensions {
+namespace Cpu {
+
+#define FACTORY_DECLARATION(__prim, __type) \
+ void __prim ## __type(MKLDNNExtensions * extInstance)
+
+#define FACTORY_CALL(__prim, __type) \
+ __prim ## __type(this)
+
+#define MKLDNN_EXTENSION_NODE(__prim, __type) FACTORY_DECLARATION(__prim, __type)
+# include "list_tbl.hpp"
+#undef MKLDNN_EXTENSION_NODE
+
+MKLDNNExtensions::MKLDNNExtensions() {
+ #define MKLDNN_EXTENSION_NODE(__prim, __type) FACTORY_CALL(__prim, __type)
+ # include "list_tbl.hpp"
+ #undef MKLDNN_EXTENSION_NODE
+}
+
+} // namespace Cpu
+} // namespace Extensions
+} // namespace InferenceEngine
diff --git a/inference-engine/src/mkldnn_plugin/nodes/list.hpp b/inference-engine/src/mkldnn_plugin/nodes/list.hpp
index 63b66b4ebd7..92ae6d80c69 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/list.hpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/list.hpp
@@ -12,17 +12,6 @@
#include
#include
-// WA for xbyak.h
-#ifdef _WIN32
-# ifndef _WINSOCKAPI_
-# define _WINSOCKAPI_
-# endif
-# ifndef _WINSOCK2API_
-# define _WINSOCK2API_
-# endif
-#endif
-#include
-
namespace InferenceEngine {
namespace Extensions {
namespace Cpu {
@@ -37,14 +26,16 @@ struct ExtensionsHolder {
class MKLDNNExtensions : public IExtension {
public:
+ MKLDNNExtensions();
+
StatusCode getPrimitiveTypes(char**& types, unsigned int& size, ResponseDesc* resp) noexcept override {
- collectTypes(types, size, MKLDNNExtensions::GetExtensionsHolder()->list);
+ collectTypes(types, size, extensionsHolder->list);
return OK;
}
StatusCode
getFactoryFor(ILayerImplFactory*& factory, const CNNLayer* cnnLayer, ResponseDesc* resp) noexcept override {
- auto& factories = MKLDNNExtensions::GetExtensionsHolder()->list;
+ auto& factories = extensionsHolder->list;
if (factories.find(cnnLayer->type) == factories.end()) {
std::string errorMsg = std::string("Factory for ") + cnnLayer->type + " wasn't found!";
errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
@@ -78,21 +69,13 @@ public:
delete this;
}
- static void AddExt(std::string name, ext_factory factory) {
- auto extensionsHolder = GetExtensionsHolder();
- if (extensionsHolder != nullptr)
- extensionsHolder->list[name] = factory;
- }
-
- static std::shared_ptr GetExtensionsHolder() {
- static std::shared_ptr localHolder;
- if (localHolder == nullptr) {
- localHolder = std::make_shared();
- }
- return localHolder;
+ void AddExt(std::string name, ext_factory factory) {
+ extensionsHolder->list[name] = factory;
}
private:
+ std::shared_ptr extensionsHolder = std::make_shared();
+
template
void collectTypes(char**& types, unsigned int& size, const std::map &factories) {
types = new char *[factories.size()];
@@ -108,22 +91,6 @@ private:
IE_SUPPRESS_DEPRECATED_END
-template
-class ExtRegisterBase {
-public:
- explicit ExtRegisterBase(const std::string& type) {
- IE_SUPPRESS_DEPRECATED_START
- MKLDNNExtensions::AddExt(type,
- [](const CNNLayer* layer) -> InferenceEngine::ILayerImplFactory* {
- return new Ext(layer);
- });
- IE_SUPPRESS_DEPRECATED_END
- }
-};
-
-#define REG_FACTORY_FOR(__prim, __type) \
-static ExtRegisterBase<__prim> __reg__##__type(#__type)
-
} // namespace Cpu
} // namespace Extensions
} // namespace InferenceEngine
diff --git a/inference-engine/src/mkldnn_plugin/nodes/list_tbl.hpp b/inference-engine/src/mkldnn_plugin/nodes/list_tbl.hpp
new file mode 100644
index 00000000000..e0ebf3ff8cd
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/list_tbl.hpp
@@ -0,0 +1,93 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#ifndef MKLDNN_EXTENSION_NODE
+# warning "MKLDNN_EXTENSION_NODE is not defined"
+# define MKLDNN_EXTENSION_NODE(__prim, __type)
+#endif
+
+MKLDNN_EXTENSION_NODE(PriorBoxImpl, PriorBox);
+MKLDNN_EXTENSION_NODE(MathImpl, Abs);
+MKLDNN_EXTENSION_NODE(MathImpl, Acos);
+MKLDNN_EXTENSION_NODE(MathImpl, Acosh);
+MKLDNN_EXTENSION_NODE(MathImpl, Asin);
+MKLDNN_EXTENSION_NODE(MathImpl, Asinh);
+MKLDNN_EXTENSION_NODE(MathImpl, Atan);
+MKLDNN_EXTENSION_NODE(MathImpl, Atanh);
+MKLDNN_EXTENSION_NODE(MathImpl, Ceil);
+MKLDNN_EXTENSION_NODE(MathImpl, Cos);
+MKLDNN_EXTENSION_NODE(MathImpl, Cosh);
+MKLDNN_EXTENSION_NODE(MathImpl, Erf);
+MKLDNN_EXTENSION_NODE(MathImpl, Floor);
+MKLDNN_EXTENSION_NODE(MathImpl, HardSigmoid);
+MKLDNN_EXTENSION_NODE(MathImpl, Log);
+MKLDNN_EXTENSION_NODE(MathImpl, Neg);
+MKLDNN_EXTENSION_NODE(MathImpl, Reciprocal);
+MKLDNN_EXTENSION_NODE(MathImpl, Selu);
+MKLDNN_EXTENSION_NODE(MathImpl, Sign);
+MKLDNN_EXTENSION_NODE(MathImpl, Sin);
+MKLDNN_EXTENSION_NODE(MathImpl, Sinh);
+MKLDNN_EXTENSION_NODE(MathImpl, Softplus);
+MKLDNN_EXTENSION_NODE(MathImpl, Softsign);
+MKLDNN_EXTENSION_NODE(MathImpl, Tan);
+MKLDNN_EXTENSION_NODE(ExperimentalDetectronTopKROIsImpl, ExperimentalDetectronTopKROIs);
+MKLDNN_EXTENSION_NODE(ReverseSequenceImpl, ReverseSequence);
+MKLDNN_EXTENSION_NODE(DetectionOutputImpl, DetectionOutput);
+MKLDNN_EXTENSION_NODE(ArgMaxImpl, ArgMax);
+MKLDNN_EXTENSION_NODE(UnsqueezeImpl, Unsqueeze);
+MKLDNN_EXTENSION_NODE(StridedSliceImpl, StridedSlice);
+MKLDNN_EXTENSION_NODE(ExperimentalDetectronDetectionOutputImpl, ExperimentalDetectronDetectionOutput);
+MKLDNN_EXTENSION_NODE(RegionYoloImpl, RegionYolo);
+MKLDNN_EXTENSION_NODE(LogSoftmaxImpl, LogSoftmax);
+MKLDNN_EXTENSION_NODE(ReorgYoloImpl, ReorgYolo);
+MKLDNN_EXTENSION_NODE(SqueezeImpl, Squeeze);
+MKLDNN_EXTENSION_NODE(ConvertImpl, Convert);
+MKLDNN_EXTENSION_NODE(FillImpl, Fill);
+MKLDNN_EXTENSION_NODE(UniqueImpl, Unique);
+MKLDNN_EXTENSION_NODE(PSROIPoolingImpl, PSROIPooling);
+MKLDNN_EXTENSION_NODE(DepthToSpaceImpl, DepthToSpace);
+MKLDNN_EXTENSION_NODE(ScatterImpl, ScatterUpdate);
+MKLDNN_EXTENSION_NODE(OneHotImpl, OneHot);
+MKLDNN_EXTENSION_NODE(BroadcastImpl, Broadcast);
+MKLDNN_EXTENSION_NODE(ExperimentalSparseWeightedReduceImpl, ExperimentalSparseWeightedSum);
+MKLDNN_EXTENSION_NODE(SparseToDenseImpl, SparseToDense);
+MKLDNN_EXTENSION_NODE(ExperimentalDetectronROIFeatureExtractorImpl, ExperimentalDetectronROIFeatureExtractor);
+MKLDNN_EXTENSION_NODE(ONNXCustomProposalImpl, ExperimentalDetectronGenerateProposalsSingleImage);
+MKLDNN_EXTENSION_NODE(NonMaxSuppressionImpl, NonMaxSuppression);
+MKLDNN_EXTENSION_NODE(TopKImpl, TopK);
+MKLDNN_EXTENSION_NODE(ShuffleChannelsImpl, ShuffleChannels);
+MKLDNN_EXTENSION_NODE(SpaceToDepthImpl, SpaceToDepth);
+MKLDNN_EXTENSION_NODE(PowerFileImpl, PowerFile);
+MKLDNN_EXTENSION_NODE(InterpImpl, Interp);
+MKLDNN_EXTENSION_NODE(BatchToSpaceImpl, BatchToSpace);
+MKLDNN_EXTENSION_NODE(ExperimentalDetectronPriorGridGeneratorImpl, ExperimentalDetectronPriorGridGenerator);
+MKLDNN_EXTENSION_NODE(SimplerNMSImpl, SimplerNMS);
+MKLDNN_EXTENSION_NODE(PadImpl, Pad);
+MKLDNN_EXTENSION_NODE(GRNImpl, GRN);
+MKLDNN_EXTENSION_NODE(SparseFillEmptyRowsImpl, SparseFillEmptyRows);
+MKLDNN_EXTENSION_NODE(BucketizeImpl, Bucketize);
+MKLDNN_EXTENSION_NODE(CTCGreedyDecoderImpl, CTCGreedyDecoder);
+MKLDNN_EXTENSION_NODE(GatherImpl, Gather);
+MKLDNN_EXTENSION_NODE(ProposalImpl, Proposal);
+MKLDNN_EXTENSION_NODE(RangeImpl, Range);
+MKLDNN_EXTENSION_NODE(SelectImpl, Select);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceAnd);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceL1);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceL2);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceLogSum);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceLogSumExp);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceMax);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceMean);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceMin);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceOr);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceProd);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceSum);
+MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceSumSquare);
+MKLDNN_EXTENSION_NODE(GatherTreeImpl, GatherTree);
+MKLDNN_EXTENSION_NODE(PriorBoxClusteredImpl, PriorBoxClustered);
+MKLDNN_EXTENSION_NODE(SpaceToBatchImpl, SpaceToBatch);
+MKLDNN_EXTENSION_NODE(SparseSegmentReduceImpl, SparseSegmentMean);
+MKLDNN_EXTENSION_NODE(SparseSegmentReduceImpl, SparseSegmentSqrtN);
+MKLDNN_EXTENSION_NODE(SparseSegmentReduceImpl, SparseSegmentSum);
+MKLDNN_EXTENSION_NODE(CumSumImpl, CumSum);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/log_softmax.cpp b/inference-engine/src/mkldnn_plugin/nodes/log_softmax.cpp
index cea0169c56f..b79109d946b 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/log_softmax.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/log_softmax.cpp
@@ -2,7 +2,6 @@
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include
@@ -105,7 +104,7 @@ private:
bool is_last_dim = false;
};
-REG_FACTORY_FOR(ImplFactory, LogSoftmax);
+REG_FACTORY_FOR(LogSoftmaxImpl, LogSoftmax);
} // namespace Cpu
} // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/math.cpp b/inference-engine/src/mkldnn_plugin/nodes/math.cpp
index 92f2059618e..2920badc7f2 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/math.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/math.cpp
@@ -2,7 +2,6 @@
// SPDX-License-Identifier: Apache-2.0
//
-#include "list.hpp"
#include "base.hpp"
#include
@@ -272,29 +271,29 @@ private:
float gamma = 0.0f;
};
-REG_FACTORY_FOR(ImplFactory, Abs);
-REG_FACTORY_FOR(ImplFactory, Acos);
-REG_FACTORY_FOR(ImplFactory, Acosh);
-REG_FACTORY_FOR(ImplFactory, Asin);
-REG_FACTORY_FOR(ImplFactory, Asinh);
-REG_FACTORY_FOR(ImplFactory, Atan);
-REG_FACTORY_FOR(ImplFactory, Atanh);
-REG_FACTORY_FOR(ImplFactory, Ceil);
-REG_FACTORY_FOR(ImplFactory, Cos);
-REG_FACTORY_FOR(ImplFactory, Cosh);
-REG_FACTORY_FOR(ImplFactory, Erf);
-REG_FACTORY_FOR(ImplFactory, Floor);
-REG_FACTORY_FOR(ImplFactory, HardSigmoid);
-REG_FACTORY_FOR(ImplFactory, Log);
-REG_FACTORY_FOR(ImplFactory, Neg);
-REG_FACTORY_FOR(ImplFactory, Reciprocal);
-REG_FACTORY_FOR(ImplFactory, Selu);
-REG_FACTORY_FOR(ImplFactory, Sign);
-REG_FACTORY_FOR(ImplFactory, Sin);
-REG_FACTORY_FOR(ImplFactory, Sinh);
-REG_FACTORY_FOR(ImplFactory, Softplus);
-REG_FACTORY_FOR(ImplFactory, Softsign);
-REG_FACTORY_FOR(ImplFactory, Tan);
+REG_FACTORY_FOR(MathImpl, Abs);
+REG_FACTORY_FOR(MathImpl, Acos);
+REG_FACTORY_FOR(MathImpl, Acosh);
+REG_FACTORY_FOR(MathImpl, Asin);
+REG_FACTORY_FOR(MathImpl, Asinh);
+REG_FACTORY_FOR(MathImpl, Atan);
+REG_FACTORY_FOR(MathImpl, Atanh);
+REG_FACTORY_FOR(MathImpl, Ceil);
+REG_FACTORY_FOR(MathImpl, Cos);
+REG_FACTORY_FOR(MathImpl, Cosh);
+REG_FACTORY_FOR(MathImpl, Erf);
+REG_FACTORY_FOR(MathImpl, Floor);
+REG_FACTORY_FOR(MathImpl, HardSigmoid);
+REG_FACTORY_FOR(MathImpl, Log);
+REG_FACTORY_FOR(MathImpl, Neg);
+REG_FACTORY_FOR(MathImpl, Reciprocal);
+REG_FACTORY_FOR(MathImpl, Selu);
+REG_FACTORY_FOR(MathImpl, Sign);
+REG_FACTORY_FOR(MathImpl, Sin);
+REG_FACTORY_FOR(MathImpl, Sinh);
+REG_FACTORY_FOR(MathImpl, Softplus);
+REG_FACTORY_FOR(MathImpl, Softsign);
+REG_FACTORY_FOR(MathImpl, Tan);
} // namespace Cpu
} // namespace Extensions
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.cpp
index 66303d74405..28b25e25751 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.cpp
@@ -108,7 +108,7 @@ void MKLDNNBatchNormalizationNode::getSupportedDescriptors() {
MKLDNNMemoryDesc MKLDNNBatchNormalizationNode::GetVarianceDesc(const memory::primitive_desc &primitive_desc) const {
memory::primitive_desc aprimitive_desc;
- mkldnn_primitive_desc_t bndesc;
+ mkldnn_primitive_desc_t bndesc = nullptr;
mkldnn_batch_normalization_desc_t *p;
error::wrap_c_api(mkldnn_primitive_desc_query(
primitive_desc.get(), mkldnn::convert_to_c(batch_normalization_d), 0, &p),
@@ -128,7 +128,7 @@ MKLDNNMemoryDesc MKLDNNBatchNormalizationNode::GetVarianceDesc(const memory::pri
MKLDNNMemoryDesc MKLDNNBatchNormalizationNode::GetMeanDesc(const memory::primitive_desc &primitive_desc) const {
memory::primitive_desc aprimitive_desc;
- mkldnn_primitive_desc_t bndesc;
+ mkldnn_primitive_desc_t bndesc = nullptr;
mkldnn_batch_normalization_desc_t *p;
error::wrap_c_api(mkldnn_primitive_desc_query(
primitive_desc.get(), mkldnn::convert_to_c(batch_normalization_d), 0, &p),
@@ -148,7 +148,7 @@ MKLDNNMemoryDesc MKLDNNBatchNormalizationNode::GetMeanDesc(const memory::primiti
MKLDNNMemoryDesc MKLDNNBatchNormalizationNode::GetScaleShiftWeightsDesc(const memory::primitive_desc &primitive_desc) const {
memory::primitive_desc adesc;
- mkldnn_primitive_desc_t bndesc;
+ mkldnn_primitive_desc_t bndesc = nullptr;
const_mkldnn_primitive_desc_t const_bndesc =
mkldnn_primitive_desc_query_pd(primitive_desc.get(),
mkldnn::convert_to_c(weights_pd), 0);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp
index 9213a746438..e5f3b84f71e 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp
@@ -17,6 +17,16 @@
#include
#include
#include
+
+// WA for xbyak.h
+#ifdef _WIN32
+# ifndef _WINSOCKAPI_
+# define _WINSOCKAPI_
+# endif
+# ifndef _WINSOCK2API_
+# define _WINSOCK2API_
+# endif
+#endif
#include "cpu_isa_traits.hpp"
using namespace mkldnn;
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp
index c9e93ccc86e..e30e8842134 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp
@@ -279,8 +279,10 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
getParentEdgeAt(0)->getDims().ndims() == 5 ? memory::ndhwc : memory::nhwc);
createDescriptor({in_candidate}, {out_candidate});
} else {
- inputDataType = convLayer->input()->getPrecision() == Precision::BF16 ? memory::bf16 : memory::f32;
- outputDataType = convLayer->outData[0]->getPrecision() == Precision::BF16 ? memory::bf16 : memory::f32;
+ inputDataType = (convLayer->input()->getPrecision() == Precision::BF16
+ && !(isGrouped && getParentEdgeAt(0)->getDims().ndims() == 5)) ? memory::bf16 : memory::f32;
+ outputDataType = (convLayer->outData[0]->getPrecision() == Precision::BF16
+ && !(isGrouped && getParentEdgeAt(0)->getDims().ndims() == 5)) ? memory::bf16 : memory::f32;
eltwisePrecision = Precision::FP32;
for (int i = 0; i < fusedWith.size(); i++) {
auto *eltwiseNode = dynamic_cast(fusedWith[i].get());
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp
index 8591bebfbfb..a7c3fdc3046 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp
@@ -37,7 +37,7 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
precision = InferenceEngine::Precision::FP32;
auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
- if (getParentEdges().size() != 1)
+ if (getParentEdges().empty() || getParentEdges().size() > 3)
THROW_IE_EXCEPTION << "Incorrect number of input edges for layer " << getName();
if (getChildEdges().empty())
THROW_IE_EXCEPTION << "Incorrect number of output edges for layer " << getName();
@@ -45,7 +45,7 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
auto * deconvLayer = dynamic_cast(getCnnLayer().get());
if (deconvLayer == nullptr)
THROW_IE_EXCEPTION << "Cannot convert deconvolution layer.";
- if (deconvLayer->_weights == nullptr) {
+ if (getParentEdges().size() == 1 && deconvLayer->_weights == nullptr) {
THROW_IE_EXCEPTION << "Weights are empty for layer: " << deconvLayer->name
<< " used in MKLDNN node: " << getName() << "\n"
<< "Use the second argumemt of InferenceEngine::Core::ReadNetwork"
@@ -54,11 +54,22 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
withGroups = (deconvLayer->_group > 1);
isDW = withGroups && deconvLayer->_group == deconvLayer->_out_depth &&
deconvLayer->_group == deconvLayer->input()->getDims()[1];
- withBiases = (deconvLayer->_biases != nullptr && deconvLayer->_biases->size() != 0);
+
+ bool withBiases = (deconvLayer->_biases != nullptr && deconvLayer->_biases->size() != 0) || getParentEdges().size() == 3;
if (withBiases) {
- biases = deconvLayer->_biases;
+ Blob::Ptr biases;
+
+ if (getParentEdges().size() == 3) {
+ auto biasLayer = getParentEdgesAtPort(2)[0]->getParent()->getCnnLayer();
+ if (biasLayer->type != "Const")
+ THROW_IE_EXCEPTION << "Deconvolution layer with name '" << getName() << "' doesn't support non-constant biases";
+ biases = biasLayer->blobs["custom"];
+ } else {
+ biases = deconvLayer->_biases;
+ }
+
// WA: we add bias as depthwise post op
- setBiasAsPostOp();
+ setBiasAsPostOp(biases);
}
/* Original layout format for deconv weights is iohw (from Caffe).
@@ -83,7 +94,8 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
weightDims.push_back(deconvLayer->_kernel[deconvLayer->_kernel.size() - i]);
}
- internalBlobs.push_back(createInternalBlob(weightDims, true));
+ if (getParentEdges().size() == 1)
+ internalBlobs.push_back(createInternalBlob(weightDims, true));
invertVectorCopyUtoI(deconvLayer->_stride, stride);
for (int i = 1; i <= deconvLayer->_dilation.size(); i++) {
@@ -113,7 +125,7 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
}
}
-void MKLDNNDeconvolutionNode::setBiasAsPostOp() {
+void MKLDNNDeconvolutionNode::setBiasAsPostOp(const InferenceEngine::Blob::Ptr& biases) {
mkldnn::post_ops ops;
MKLDNNDims depthwiseDims({static_cast(rnd_up(biases->size(), 16))});
@@ -157,7 +169,7 @@ void MKLDNNDeconvolutionNode::createPrimitive() {
prim.reset(new convolution_backward_data(prim_desc,
getParentEdgeAt(0)->getMemory().GetPrimitive(),
- internalBlobMemory[0]->GetPrimitive(),
+ getWeights(),
getChildEdgeAt(0)->getMemory().GetPrimitive()));
}
@@ -197,15 +209,32 @@ void MKLDNNDeconvolutionNode::createDescriptor(const std::vector 0 ? MKLDNNMemoryDesc(primitive_desc_it.weights_primitive_desc(idx - 1).desc())
+ : MKLDNNMemoryDesc(primitive_desc_it.diff_dst_primitive_desc(idx).desc());
+
+ if (desc.getLayout() == InferenceEngine::Layout::ANY) {
return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
getParentEdgeAt(idx)->getDims().ToSizeVector(),
desc.getLayout()));
- else
- return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
- getParentEdgeAt(idx)->getDims().ToSizeVector(),
- desc.getBlockingDesc()));
+ } else {
+ if (getParentEdgeAt(idx)->getDims().ToSizeVector().size() != *std::max_element(desc.getBlockingDesc().getOrder().begin(),
+ desc.getBlockingDesc().getOrder().end()) + 1) {
+ auto old_dims = getParentEdgeAt(idx)->getDims().ToSizeVector();
+ auto new_dims = weightsDims.ToSizeVector();
+
+ auto td = InferenceEngine::TensorDesc(desc.getPrecision(),
+ new_dims,
+ desc.getBlockingDesc());
+ if (new_dims.size() == desc.getBlockingDesc().getBlockDims().size()) {
+ td.setLayout(BLOCKED);
+ }
+ return MKLDNNMemoryDesc(td);
+ } else {
+ return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(),
+ getParentEdgeAt(idx)->getDims().ToSizeVector(),
+ desc.getBlockingDesc()));
+ }
+ }
}
MKLDNNMemoryDesc MKLDNNDeconvolutionNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
@@ -219,4 +248,9 @@ MKLDNNMemoryDesc MKLDNNDeconvolutionNode::getDstMemDesc(mkldnn::primitive_desc_i
getChildEdgeAt(idx)->getDims().ToSizeVector(),
desc.getBlockingDesc()));
}
+
+const mkldnn::memory& MKLDNNDeconvolutionNode::getWeights() const {
+ return getParentEdges().size() > 1 ? getParentEdgeAt(1)->getMemory().GetPrimitive() : internalBlobMemory[0]->GetPrimitive();
+}
+
REG_MKLDNN_PRIM_FOR(MKLDNNDeconvolutionNode, Deconvolution);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h
index e4a5ab26e8a..c2493f45d5b 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h
@@ -27,11 +27,14 @@ public:
return false;
}
+ size_t descInputNumbers(MKLDNNDescriptor desc) override {
+ return static_cast(getParentEdges().size());
+ }
+
MKLDNNMemoryDesc getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override;
MKLDNNMemoryDesc getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override;
private:
- bool withBiases = false;
bool withGroups = false;
bool isDW = false;
size_t groupNum = 1;
@@ -40,13 +43,14 @@ private:
std::vector dilation;
std::vector paddingR;
MKLDNNDims weightsDims;
- InferenceEngine::Blob::Ptr biases;
std::vector> descs_fwd;
std::vector