diff --git a/.gitignore b/.gitignore index 1f7c2fbdb8f..1c5368e74d5 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,7 @@ build/ .gdb_history .vimspector.json doc/ +!ngraph/doc docs/build_documentation/work_dir/ inference-engine/plugins/ inference-engine/temp @@ -55,4 +56,16 @@ __pycache__ /model-optimizer/!CMakeLists.txt /model-optimizer/*.mapping /model-optimizer/*.dat -/model-optimizer/*.svg \ No newline at end of file +/model-optimizer/*.svg + +# ngraph +ngraph/src/CPackConfig.cmake +ngraph/src/CPackSourceConfig.cmake +ngraph/src/VERSION +ngraph/src/gtest/ +ngraph/src/json/ +ngraph/src/ngraphConfig.cmake +ngraph/src/ngraphConfigVersion.cmake +ngraph/src/protobuf/ +ngraph/src/src/ +ngraph/src/test/ diff --git a/cmake/developer_package.cmake b/cmake/developer_package.cmake index 1deb23ff45b..e0027fdcc53 100644 --- a/cmake/developer_package.cmake +++ b/cmake/developer_package.cmake @@ -64,12 +64,11 @@ endmacro() macro(ie_cpack) set(CPACK_GENERATOR "TGZ") + string(REPLACE "/" "_" CPACK_PACKAGE_VERSION "${CI_BUILD_NUMBER}") if(WIN32) set(CPACK_PACKAGE_NAME inference-engine_${CMAKE_BUILD_TYPE}) - string(REPLACE "\\" "_" CPACK_PACKAGE_VERSION "${CI_BUILD_NUMBER}") else() set(CPACK_PACKAGE_NAME inference-engine) - string(REPLACE "/" "_" CPACK_PACKAGE_VERSION "${CI_BUILD_NUMBER}") endif() set(CPACK_INCLUDE_TOPLEVEL_DIRECTORY OFF) set(CPACK_ARCHIVE_COMPONENT_INSTALL ON) diff --git a/inference-engine/CMakeLists.txt b/inference-engine/CMakeLists.txt index 79894a74a6c..2aa19ac73bb 100644 --- a/inference-engine/CMakeLists.txt +++ b/inference-engine/CMakeLists.txt @@ -159,6 +159,17 @@ if(ENABLE_PYTHON) COMPONENT python_samples) endif() +# install speech demo files + +if(SPEECH_LIBS_AND_DEMOS) + ie_cpack_add_component(speech_demo_files REQUIRED) + + install(DIRECTORY ${TEMP}/deployment_tools + ${TEMP}/data_processing + DESTINATION . + COMPONENT speech_demo_files) +endif() + # # Developer package # diff --git a/inference-engine/ie_bridges/python/CMakeLists.txt b/inference-engine/ie_bridges/python/CMakeLists.txt index 519a5c64338..977260b9dff 100644 --- a/inference-engine/ie_bridges/python/CMakeLists.txt +++ b/inference-engine/ie_bridges/python/CMakeLists.txt @@ -57,7 +57,7 @@ add_subdirectory (src/openvino/inference_engine) # Check Cython version if("${CYTHON_VERSION}" VERSION_LESS "0.29") - message(FATAL_ERROR "OpenVINO Python API needs at least Cython version 0.29, found verson ${CYTHON_VERSION}") + message(FATAL_ERROR "OpenVINO Python API needs at least Cython version 0.29, found version ${CYTHON_VERSION}") else() message(STATUS "Found Cython version ${CYTHON_VERSION}") endif() diff --git a/inference-engine/ie_bridges/python/cmake/FindCython.cmake b/inference-engine/ie_bridges/python/cmake/FindCython.cmake index f960fe20998..5ac7c7049f7 100644 --- a/inference-engine/ie_bridges/python/cmake/FindCython.cmake +++ b/inference-engine/ie_bridges/python/cmake/FindCython.cmake @@ -58,6 +58,6 @@ FIND_PACKAGE_HANDLE_STANDARD_ARGS( Cython REQUIRED_VARS CYTHON_EXECUTABLE ) # Find Cython version execute_process(COMMAND ${CYTHON_EXECUTABLE} -V ERROR_VARIABLE CYTHON_OUTPUT OUTPUT_QUIET) -string(REGEX REPLACE "^Cython version ([0-9]+\\.[0-9]+\\.[0-9]+).*" "\\1" CYTHON_VERSION "${CYTHON_OUTPUT}") +string(REGEX REPLACE "^Cython version ([0-9]+\\.[0-9]+(\\.[0-9]+)?).*" "\\1" CYTHON_VERSION "${CYTHON_OUTPUT}") mark_as_advanced( CYTHON_EXECUTABLE CYTHON_VERSION ) diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt b/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt index 2ad199f8b35..aa2a30c0555 100644 --- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt +++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt @@ -23,6 +23,7 @@ foreach(PYX_FILE ${OTHER_SOURCES}) get_filename_component(PYX_NAME "${PYX_FILE}" NAME_WE) set_source_files_properties(${PYX_FILE} PROPERTIES CYTHON_IS_CXX ON) cython_add_module(${PYX_NAME} ${PYX_FILE}) + add_dependencies(${TARGET_NAME} ${PYX_NAME}) target_include_directories(${PYX_NAME} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}") target_link_libraries(${PYX_NAME} PRIVATE ${InferenceEngine_LIBRARIES}) endforeach() diff --git a/inference-engine/ie_bridges/python/tests/test_InferRequest.py b/inference-engine/ie_bridges/python/tests/test_InferRequest.py index 0dd7ef2a33c..4de736279d5 100644 --- a/inference-engine/ie_bridges/python/tests/test_InferRequest.py +++ b/inference-engine/ie_bridges/python/tests/test_InferRequest.py @@ -3,6 +3,7 @@ import os import pytest import warnings import threading +from datetime import datetime from openvino.inference_engine import ie_api as ie from conftest import model_path, image_path @@ -195,11 +196,25 @@ def test_async_infer_wait_finish(device): def test_async_infer_wait_time(device): ie_core = ie.IECore() net = ie_core.read_network(test_net_xml, test_net_bin) - exec_net = ie_core.load_network(net, device, num_requests=1) + exec_net = ie_core.load_network(net, device, num_requests=2) img = read_image() request = exec_net.requests[0] request.async_infer({'data': img}) - request.wait(100) + start_time = datetime.utcnow() + status = request.wait(ie.WaitMode.RESULT_READY) + assert status == ie.StatusCode.OK + time_delta = datetime.utcnow() - start_time + latency_ms = (time_delta.microseconds / 1000) + (time_delta.seconds * 1000) + timeout = max(100, latency_ms) + request = exec_net.requests[1] + request.async_infer({'data': img}) + max_repeat = 10 + status = ie.StatusCode.REQUEST_BUSY + i = 0 + while i < max_repeat and status != ie.StatusCode.OK: + status = request.wait(timeout) + i += 1 + assert status == ie.StatusCode.OK res = request.output_blobs['fc_out'].buffer assert np.argmax(res) == 2 del exec_net diff --git a/inference-engine/samples/benchmark_app/benchmark_app.hpp b/inference-engine/samples/benchmark_app/benchmark_app.hpp index 9affebbd8bf..7ab2469e45c 100644 --- a/inference-engine/samples/benchmark_app/benchmark_app.hpp +++ b/inference-engine/samples/benchmark_app/benchmark_app.hpp @@ -100,6 +100,9 @@ static const char dump_config_message[] = "Optional. Path to XML/YAML/JSON file static const char shape_message[] = "Optional. Set shape for input. For example, \"input1[1,3,224,224],input2[1,4]\" or \"[1,3,224,224]\"" " in case of one input size."; +// @brief message for quantization bits +static const char gna_qb_message[] = "Optional. Weight bits for quantization: 8 or 16 (default)"; + /// @brief Define flag for showing help message
DEFINE_bool(h, false, help_message); @@ -184,6 +187,9 @@ DEFINE_string(dump_config, "", dump_config_message); /// @brief Define flag for input shape
DEFINE_string(shape, "", shape_message); +/// @brief Define flag for quantization bits (default 16) +DEFINE_int32(qb, 16, gna_qb_message); + /** * @brief This function show a help message */ @@ -221,4 +227,5 @@ static void showUsage() { std::cout << " -dump_config " << dump_config_message << std::endl; std::cout << " -load_config " << load_config_message << std::endl; #endif + std::cout << " -qb " << gna_qb_message << std::endl; } diff --git a/inference-engine/samples/benchmark_app/main.cpp b/inference-engine/samples/benchmark_app/main.cpp index 684753b8b55..87da8c5d224 100644 --- a/inference-engine/samples/benchmark_app/main.cpp +++ b/inference-engine/samples/benchmark_app/main.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -274,6 +275,14 @@ int main(int argc, char *argv[]) { } } else if (device == "MYRIAD") { device_config[CONFIG_KEY(LOG_LEVEL)] = CONFIG_VALUE(LOG_WARNING); + } else if (device == "GNA") { + if (FLAGS_qb == 8) + device_config[GNA_CONFIG_KEY(PRECISION)] = "I8"; + else + device_config[GNA_CONFIG_KEY(PRECISION)] = "I16"; + + if (isFlagSetInCommandLine("nthreads")) + device_config[GNA_CONFIG_KEY(LIB_N_THREADS)] = std::to_string(FLAGS_nthreads); } } diff --git a/inference-engine/src/cldnn_engine/cldnn_engine.cpp b/inference-engine/src/cldnn_engine/cldnn_engine.cpp index 94079aebdf3..80254dca3c0 100644 --- a/inference-engine/src/cldnn_engine/cldnn_engine.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_engine.cpp @@ -24,6 +24,7 @@ #include "details/caseless.hpp" #include
#include +#include #include #include #include @@ -73,7 +74,8 @@ InferenceEngine::ICNNNetwork::Ptr clDNNEngine::CloneNetwork(const InferenceEngin std::shared_ptr clonedNetwork(nullptr); if (network.getFunction()) { const auto transformations_callback = [](const std::shared_ptr &node) -> bool { - return std::dynamic_pointer_cast(node) != nullptr; + return std::dynamic_pointer_cast(node) || + std::dynamic_pointer_cast(node); }; CNNNetwork net(network.getFunction()); auto nGraphFunc = net.getFunction(); diff --git a/inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp b/inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp index ae54fa67796..8a25d74d0ca 100644 --- a/inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp +++ b/inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp @@ -17,6 +17,8 @@ #include "blob_factory.hpp" #include "precision_ex.hpp" #include "layers/gna_layer_info.hpp" +#include "weights_converter.hpp" +#include "layer_transform.hpp" namespace GNAPluginNS { namespace frontend { @@ -137,6 +139,48 @@ class Quant { } }; +template +inline InferenceEngine::Blob::Ptr fp32_to_precision_blob(InferenceEngine::Blob::Ptr fp32_blob, InferenceEngine::Precision precision, float scale_factor) { + auto prec_blob = InferenceEngine::make_shared_blob({ precision, + fp32_blob->getTensorDesc().getDims(), fp32_blob->getTensorDesc().getLayout() }); + prec_blob->allocate(); + + int i = 0; + for (auto& precValue : *prec_blob) { + auto f32Value = fp32_blob->buffer().template as::value_type*>()[i++] * scale_factor; + if (f32Value > std::numeric_limits::max()) { + precValue = std::numeric_limits::max(); + } else if (f32Value < std::numeric_limits::min()) { + precValue = std::numeric_limits::min(); + } else { + precValue = static_cast(f32Value); + } + } + + return static_cast(prec_blob); +} + +inline InferenceEngine::Blob::Ptr fp32_to_precision_blob(InferenceEngine::Blob::Ptr fp32_blob, InferenceEngine::Precision precision, float scale_factor) { + InferenceEngine::Blob::Ptr result_ptr = nullptr; + switch (precision) { + case InferenceEngine::Precision::FP32: + result_ptr = fp32_to_precision_blob(fp32_blob, precision, scale_factor); + break; + case InferenceEngine::Precision::I32: + result_ptr = fp32_to_precision_blob(fp32_blob, precision, scale_factor); + break; + case InferenceEngine::Precision::I16: + result_ptr = fp32_to_precision_blob(fp32_blob, precision, scale_factor); + break; + case InferenceEngine::Precision::I8: + result_ptr = fp32_to_precision_blob(fp32_blob, precision, scale_factor); + break; + default: + THROW_GNA_EXCEPTION << "FP32 to " << precision << " not supported"; + } + return result_ptr; +} + template inline void quantizeWeightsBiases(const QuantDesc & quantDesc, InferenceEngine::WeightableLayer *wl, @@ -389,6 +433,18 @@ class DataQuantizer : public DataQuantizerBas } cnnLayer->precision = Desc::mandatory().getInputPrecision(); + if (cnnLayer->type == "Const") { + if (cnnLayer->blobs["custom"]->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP16) { + cnnLayer->blobs["custom"] = make_fp32_blob(cnnLayer->blobs["custom"]); + } + auto const_scale_factor = InferenceEngine::getInjectedData(*cnnLayer)->_dst_quant.scale; + auto new_const_blob = InferenceEngine::Blob::CreateFromData(cnnLayer->outData[0]); + auto const_blob = cnnLayer->blobs["custom"]; + if (const_blob->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32) { + cnnLayer->blobs["custom"] = fp32_to_precision_blob(const_blob, cnnLayer->outData[0]->getPrecision(), const_scale_factor); + } + } + return true; } }; diff --git a/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp b/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp index 9e9ebc10aaf..ba221f68ad3 100644 --- a/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp +++ b/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp @@ -197,6 +197,36 @@ class ScaleFactorPerLayer { return true; } + if (cnnLayer->type == "Const") { + auto blob = cnnLayer->blobs["custom"]; + if (blob->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP16) { + blob = make_fp32_blob(blob); + } + auto max_val = std::numeric_limits::min(); + auto min_val = std::numeric_limits::max(); + + auto flt_buf = blob->buffer().as(); + auto size = blob->size(); + + for (int i=0; i < size; i++) { + auto val = flt_buf[i]; + if (val > max_val) max_val = val; + if (val < min_val) min_val = val; + } + + auto abs_val = std::max(std::abs(max_val), std::abs(min_val)); + auto scale_val = static_cast(std::numeric_limits::max()) / abs_val; + + // TODO: Investigate what should be the scale in such cases (31910) + if (std::isinf(scale_val)) { + quant->_dst_quant.scale = quant->_src_quant.scale; + } else { + quant->_dst_quant.scale = scale_val; + } + + return ScaleFactorUpdateResult(); + } + if (!CNNNetHasPrevLayer(cnnLayer)) { quant->_dst_quant.scale = quant->_src_quant.scale; return ScaleFactorUpdateResult(); @@ -231,6 +261,7 @@ class ScaleFactorPerLayer { auto quantParams0 = InferenceEngine::getInjectedData(in0); auto quantParams1 = InferenceEngine::getInjectedData(in1); + auto quantData = InferenceEngine::getInjectedData(*eltwiseLayer); switch (eltwiseLayer->_operation) { @@ -239,6 +270,7 @@ class ScaleFactorPerLayer { quantData->_dst_quant.scale = quantParams0->_dst_quant.scale * quantParams1->_dst_quant.scale; break; } + case InferenceEngine::EltwiseLayer::Sub: case InferenceEngine::EltwiseLayer::Sum: { // detect which input will be used as biases if (LayerInfo(in0).has32BOutput()) { @@ -247,6 +279,7 @@ class ScaleFactorPerLayer { } // this path might result in significant data loss + quantData->_bias_quant.scale = quantParams1->_dst_quant.scale / quantParams0->_dst_quant.scale; quantData->_weights_quant.scale = quantParams1->_dst_quant.scale / quantParams0->_dst_quant.scale; quantData->_dst_quant.scale = quantParams1->_dst_quant.scale; diff --git a/inference-engine/src/gna_plugin/frontend/weights_converter.hpp b/inference-engine/src/gna_plugin/frontend/weights_converter.hpp index 549b8ef9e98..040f7bb11f6 100644 --- a/inference-engine/src/gna_plugin/frontend/weights_converter.hpp +++ b/inference-engine/src/gna_plugin/frontend/weights_converter.hpp @@ -7,22 +7,28 @@ #include "quantized_layer_params.hpp" #include "precision_utils.h" +inline InferenceEngine::Blob::Ptr make_fp32_blob(InferenceEngine::Blob::Ptr fp16_blob) { + auto fp32_blob = InferenceEngine::make_shared_blob({ InferenceEngine::Precision::FP32, + fp16_blob->getTensorDesc().getDims(), fp16_blob->getTensorDesc().getLayout() }); + fp32_blob->allocate(); + + int i = 0; + for (auto& f32Value : *fp32_blob) { + auto f16Value = fp16_blob->buffer().template as::value_type*>()[i++]; + f32Value = InferenceEngine::PrecisionUtils::f16tof32(f16Value); + } + + return static_cast(fp32_blob); +} + inline void fp16_to_fp32(InferenceEngine::WeightableLayer *lp) { InferenceEngine::BlobMap newBlobs; for (auto& blob : lp->blobs) { if (blob.second->getTensorDesc().getPrecision() != InferenceEngine::Precision::FP16) { THROW_GNA_EXCEPTION << "Unsupported precision. Layer: " << lp->name << " , Blob: " << blob.first; } - auto tmp = - InferenceEngine::make_shared_blob({ InferenceEngine::Precision::FP32, - blob.second->getTensorDesc().getDims(), InferenceEngine::Layout::C }); - tmp->allocate(); - int i = 0; - for (auto& f32Value : *tmp) { - auto f16Value = blob.second->buffer().template as::value_type*>()[i++]; - f32Value = InferenceEngine::PrecisionUtils::f16tof32(f16Value); - } - newBlobs[blob.first] = tmp; + auto fp32_blob = make_fp32_blob(blob.second); + newBlobs[blob.first] = fp32_blob; } lp->_biases = newBlobs["biases"]; lp->_weights = newBlobs["weights"]; @@ -44,6 +50,18 @@ inline bool convertWeights(InferenceEngine::CNNLayer* lp) { for (auto& dataItem : lp->outData) { dataItem->setPrecision(InferenceEngine::Precision::FP32); } + InferenceEngine::BlobMap newBlobs; + for (auto& blob_pair : lp->blobs) { + auto blob_name = blob_pair.first; + auto blob_ptr = blob_pair.second; + if (blob_ptr->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP16) { + auto new_blob = make_fp32_blob(blob_ptr); + newBlobs[blob_name] = new_blob; + } else { + newBlobs[blob_name] = blob_ptr; + } + } + return true; } diff --git a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp index 60ff272b6fb..46f41199ee9 100644 --- a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp +++ b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp @@ -185,17 +185,16 @@ void GNAGraphCompiler::ConstPrimitive(InferenceEngine::CNNLayerPtr constLayer) if (constLayer->blobs.find("custom") == constLayer->blobs.end()) { THROW_GNA_EXCEPTION << "const layer: " << constLayer->name << "doesn't have custom in blobs section"; } - auto constBlob = constLayer->blobs["custom"]; + auto const_blob = constLayer->blobs["custom"]; - void* ptr_for_const_blob = &ptr_for_const_blob; - connectOutput(constLayer, ptr_for_const_blob, constBlob->size()); - - const_connections[constLayer->name] = ptr_for_const_blob; + const_connections[constLayer->name] = &const_connections[constLayer->name]; + void* ptr_for_const_blob = &const_connections[constLayer->name]; + connectOutput(constLayer, ptr_for_const_blob, const_blob->byteSize()); // TODO: segment type for bind, bind initializer not used - need refactor to separate bind and allocation requests // dont see practical use case when bind storage type need to be different that allocation type - gnamem->readonly().bind_initializer(ptr_for_const_blob, [constBlob](void* data, size_t size) { - ie_memcpy(data, size, constBlob->buffer(), constBlob->byteSize()); + gnamem->readonly().bind_initializer(ptr_for_const_blob, [const_blob](void* data, size_t size) { + ie_memcpy(data, size, const_blob->buffer(), const_blob->byteSize()); }); } @@ -602,15 +601,35 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) { if (cropLayer == nullptr) { return; } - if (cropLayer->axis.size() > 1) { + + IE_ASSERT(!layer->insData.empty()); + auto inputs = layer->insData.begin()->lock(); + + IE_ASSERT(!cropLayer->axis.empty()); + IE_ASSERT(cropLayer->axis.size() == cropLayer->dim.size()); + IE_ASSERT(cropLayer->axis.size() == cropLayer->offset.size()); + + std::vector axis, dim, offset; + for (int n = 0; n < cropLayer->axis.size(); n++) { + uint32_t input_dim = FROM_IR_DIM(inputs, inputs->getDims().size() - cropLayer->axis[n]); + // Exclude crop layer components that do nothing + if (cropLayer->offset[n] == 0 && cropLayer->dim[n] == input_dim) { + continue; + } + axis.push_back(cropLayer->axis[n]); + dim.push_back(cropLayer->dim[n]); + offset.push_back(cropLayer->offset[n]); + } + + if (axis.size() > 1) { THROW_GNA_EXCEPTION << - "Crop layer does not support the number of cropped dimensions = " - << cropLayer->axis.size() << "."; + "Crop layer does not support the number of (non-trivial) cropped dimensions more than 1, provided: " + << axis.size() << "."; } auto quantized = InferenceEngine::getInjectedData(layer); - size_t cropOffset = cropLayer->offset.back() * cropLayer->precision.size(); - size_t cropOutputSize = cropLayer->dim.back() * cropLayer->precision.size(); + size_t cropOffset = offset.front() * cropLayer->precision.size(); + size_t cropOutputSize = dim.front() * cropLayer->precision.size(); if (ALIGN64(cropOffset) == cropOffset) { // leave crop as it is @@ -637,20 +656,18 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) { } else { gnalog() << "Crop " << layer->name << " is being replaced by Affine layer...\n"; IE_ASSERT(!layer->outData.empty()); - IE_ASSERT(!layer->insData.empty()); auto outputs = *layer->outData.begin(); - auto inputs = layer->insData.begin()->lock(); // only 1D crops supported - if (cropLayer->axis.size() != 1) { + if (axis.size() != 1) { THROW_GNA_EXCEPTION << "only 1D crop layer supported: " << cropLayer->name; } // TODO: add unit tests for 4d crops blobs - uint32_t num_rows_in = FROM_IR_DIM(inputs, inputs->getDims().size() - cropLayer->axis[0]); + uint32_t num_rows_in = FROM_IR_DIM(inputs, inputs->getDims().size() - axis.front()); uint32_t num_columns_in = 1; - uint32_t num_rows_out = FROM_IR_DIM(outputs, inputs->getDims().size() - cropLayer->axis[0]); + uint32_t num_rows_out = FROM_IR_DIM(outputs, inputs->getDims().size() - axis.front()); uint32_t num_padding = ALIGN(num_rows_in, 8) - num_rows_in; void* ptr_inputs = nullptr; @@ -686,7 +703,7 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) { connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 0); connectOutput(layer, ptr_outputs, num_data_bytes_out); - FillWeightOfAligningFilter(layer, ptr_weights, cropLayer->offset.back(), (quantized == nullptr) ? false : true); + FillWeightOfAligningFilter(layer, ptr_weights, offset.front(), (quantized == nullptr) ? false : true); (quantized == nullptr) ? gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64) : @@ -713,17 +730,27 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) { int biasesLayerIdx = 1; if (quantized) { - if (eltwise._operation == EltwiseLayer::Sum) { + switch (eltwise._operation) { + case InferenceEngine::EltwiseLayer::Sum: + case InferenceEngine::EltwiseLayer::Sub: + { if (inputs4Bytes->getPrecision().size() != 4) { std::swap(inputs4Bytes, inputs2Bytes); biasesLayerIdx = 0; } GNA_LAYER_ASSERT(layer, inputs2Bytes->getPrecision().size() == 2); GNA_LAYER_ASSERT(layer, inputs4Bytes->getPrecision().size() == 4); - } else { + break; + } + case InferenceEngine::EltwiseLayer::Prod: + { // for mul both inputs should be 2 bytes precision GNA_LAYER_ASSERT(layer, inputs2Bytes->getPrecision().size() == 2); GNA_LAYER_ASSERT(layer, inputs4Bytes->getPrecision().size() == 2); + break; + } + default: + THROW_GNA_EXCEPTION << "Unsupported eltwise operation for quantization: " << eltwise._operation; } } @@ -767,6 +794,18 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) { connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 1 - biasesLayerIdx); switch (eltwise._operation) { + case EltwiseLayer::Sub: + if (quantized == nullptr) { + gnamem->readonly().push_value(ptr_weights, -1.0f, num_rows_out, 64); + } else { + auto scaledIdentity = -quantized->_weights_quant.scale; + + auto quantizedIdentity = FLOAT_TO_INT16(std::min(scaledIdentity, static_cast(INT16_MAX))); + + gnamem->readonly().push_value(ptr_weights, quantizedIdentity, num_rows_out, 64); + } + connectInput(layer, ptr_biases, num_data_bytes_in, 0, biasesLayerIdx); + break; case EltwiseLayer::Sum: if (quantized == nullptr) { gnamem->readonly().push_value(ptr_weights, 1.0f, num_rows_out, 64); diff --git a/inference-engine/src/gna_plugin/gna_plugin_config.cpp b/inference-engine/src/gna_plugin/gna_plugin_config.cpp index 216a2180c8e..8a7613584c8 100644 --- a/inference-engine/src/gna_plugin/gna_plugin_config.cpp +++ b/inference-engine/src/gna_plugin/gna_plugin_config.cpp @@ -248,8 +248,6 @@ void Config::AdjustKeyMapValues() { key_config_map[CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS)] = gnaFlags.exclusive_async_requests ? PluginConfigParams::YES: PluginConfigParams::NO; key_config_map[GNA_CONFIG_KEY(PRECISION)] = gnaPrecision.name(); - key_config_map[CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS)] = - gnaFlags.exclusive_async_requests ? PluginConfigParams::YES: PluginConfigParams::NO; key_config_map[GNA_CONFIG_KEY(PWL_UNIFORM_DESIGN)] = gnaFlags.uniformPwlDesign ? PluginConfigParams::YES: PluginConfigParams::NO; key_config_map[CONFIG_KEY(PERF_COUNT)] = diff --git a/inference-engine/src/gna_plugin/layers/gna_layer_info.hpp b/inference-engine/src/gna_plugin/layers/gna_layer_info.hpp index 8b8f5591614..4cc135e1087 100644 --- a/inference-engine/src/gna_plugin/layers/gna_layer_info.hpp +++ b/inference-engine/src/gna_plugin/layers/gna_layer_info.hpp @@ -153,6 +153,15 @@ class LayerInfo { return dynamic_cast(layer)->_operation == InferenceEngine::EltwiseLayer::Sum; } + bool isEltwiseSub() const noexcept { + IS_VALID(); + if (!isEltwise()) return false; + // dynamic_cast(layer) is validated in isEltwise function + // coverity[var_deref_op] + return dynamic_cast(layer)->_operation == + InferenceEngine::EltwiseLayer::Sub; + } + bool isEltwiseMul() const noexcept { IS_VALID(); if (!isEltwise()) return false; diff --git a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp index 0c4b9f12961..b74d67710db 100644 --- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp +++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp @@ -150,6 +150,7 @@ static std::vector getCandidatesForIdentityInsertion(const CNNLayer auto prev1 = PrevFunctionalLayer(l, 1); switch (eltwise->_operation) { + case EltwiseLayer::Sub: case EltwiseLayer::Sum: if (!LayerInfo(prev0).has32BOutput() || !LayerInfo(prev1).has32BOutput()) { return prevLayers; @@ -227,7 +228,7 @@ void InsertDiagonalLayerPass::run() { // for e mul if we have 2-4 - inputs we need to insert identity to put 4 bytes input into weights // for e mul if we have 4-4 - inputs we need to insert 2 identities to put both 4 bytes input into weights - if (eltwise->_operation != EltwiseLayer::Sum) + if (eltwise->_operation != EltwiseLayer::Sum && eltwise->_operation != EltwiseLayer::Sub) continue; auto prevLayer1 = CNNNetPrevLayerSkipCertain(l, 1, [](CNNLayerPtr ptr) { diff --git a/inference-engine/src/ir_readers/ie_ir_parser.cpp b/inference-engine/src/ir_readers/ie_ir_parser.cpp index 27222c95d65..7f1047f4f9f 100644 --- a/inference-engine/src/ir_readers/ie_ir_parser.cpp +++ b/inference-engine/src/ir_readers/ie_ir_parser.cpp @@ -392,7 +392,7 @@ std::shared_ptr V10Parser::createNode(const std::vector V10Parser::LayerCreator::cre std::vector activations_beta = getParameters(dn, "activations_beta", {}); float clip = GetFloatAttr(dn, "clip", 0.f); return std::make_shared(inputs[0], inputs[1], inputs[2], inputs[3], inputs[4], inputs[5], - GetUIntAttr(dn, "hidden_size"), ngraph::op::LSTMWeightsFormat::IFCO, + GetUInt64Attr(dn, "hidden_size"), ngraph::op::LSTMWeightsFormat::IFCO, activations, activations_alpha, activations_beta, clip); } @@ -1365,8 +1365,8 @@ std::shared_ptr V10Parser::LayerCreator::cre if (dn.empty()) THROW_IE_EXCEPTION << "Cannot read parameter for " << getType() << " layer with name: " << layerParsePrms.name; - size_t offset = GetUIntAttr(dn, "offset"); - size_t size = GetUIntAttr(dn, "size"); + size_t offset = GetUInt64Attr(dn, "offset"); + size_t size = GetUInt64Attr(dn, "size"); if (!weights || weights->cbuffer() == nullptr) THROW_IE_EXCEPTION << "Cannot read network! The model requires weights data! " diff --git a/inference-engine/src/ir_readers/ie_ir_parser.hpp b/inference-engine/src/ir_readers/ie_ir_parser.hpp index 4b2d96e1dde..17b331cb3fb 100644 --- a/inference-engine/src/ir_readers/ie_ir_parser.hpp +++ b/inference-engine/src/ir_readers/ie_ir_parser.hpp @@ -166,12 +166,12 @@ private: class XmlDeserializer : public ngraph::AttributeVisitor { public: explicit XmlDeserializer(const pugi::xml_node& node): node(node) {} - void on_attribute(const std::string& name, std::string& value) override { + void on_adapter(const std::string& name, ngraph::ValueAccessor& value) override { std::string val; if (!getStrAttribute(node.child("data"), name, val)) return; - value = val; + value.set(val); } - void on_attribute(const std::string& name, bool& value) override { + void on_adapter(const std::string& name, ngraph::ValueAccessor& value) override { std::string val; if (!getStrAttribute(node.child("data"), name, val)) return; std::transform(val.begin(), val.end(), val.begin(), [](char ch) { @@ -184,7 +184,7 @@ private: bool is_false = false_names.find(val) != false_names.end(); if (!is_true && !is_false) return; - value = is_true; + value.set(is_true); } void on_adapter(const std::string& name, ngraph::ValueAccessor& adapter) override { std::string val; diff --git a/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp b/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp index d1eef3df1f0..1c6ed0f3227 100644 --- a/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp +++ b/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp @@ -63,12 +63,8 @@ public: CNNLayerPtr create(); - void on_attribute(const std::string& name, std::string& value) override { - params[name] = value; - } - - void on_attribute(const std::string& name, bool& value) override { - params[name] = value ? "true" : "false"; + void on_adapter(const std::string& name, ::ngraph::ValueAccessor &value) override { + params[name] = value.get() ? "true" : "false"; } void addSpecificCreator(const std::vector& forTypes, const CreatorFor& creator) { @@ -417,6 +413,15 @@ InferenceEngine::details::CNNLayerCreator::CNNLayerCreator(const std::shared_ptr res->params = params; return res; }); + + addSpecificCreator({"StaticShapeTopK"}, [](const std::shared_ptr<::ngraph::Node>& node, + const std::map params) -> CNNLayerPtr { + LayerParams attrs = {node->get_friendly_name(), "TopK", + details::convertPrecision(node->get_output_element_type(0))}; + auto res = std::make_shared(attrs); + res->params = params; + return res; + }); } CNNLayerPtr InferenceEngine::details::CNNLayerCreator::create() { @@ -530,7 +535,6 @@ std::shared_ptr convertFunctionToICNNNetwork(const std::shared_p std::make_shared>(), std::make_shared>(), std::make_shared>(), - std::make_shared>(), std::make_shared>(), std::make_shared>(), std::make_shared>(), diff --git a/inference-engine/src/legacy_api/src/graph_transformer.cpp b/inference-engine/src/legacy_api/src/graph_transformer.cpp index cb5afbff4e2..180c63c0eda 100644 --- a/inference-engine/src/legacy_api/src/graph_transformer.cpp +++ b/inference-engine/src/legacy_api/src/graph_transformer.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include "blob_factory.hpp" @@ -228,6 +229,12 @@ std::vector ConstTransformer::foldConstSubgraphsInternal(const std: return remainingConstLayers; } +static std::vector skipConstInfer = { + "FakeQuantize", + "Quantize", + "CumSum" // Const inference function for CumSum is not implemented! +}; + const std::map ConstTransformer::getConstLayers(const std::vector& sortedLayers) { std::map mapConstLayers; // collect all const layers, which inputs are const layers. @@ -235,7 +242,7 @@ const std::map ConstTransformer::getConstLayers(const std::ve // Layers with "Shape" and "Const" type are Const by definition if (layer->type == "Shape" || layer->type == "Const") { mapConstLayers[layer->name] = false; - } else if ((layer->type != "FakeQuantize") && (layer->type != "Quantize") && (!isForFakeQuantzie(*layer))) { + } else if (std::find(skipConstInfer.begin(), skipConstInfer.end(), layer->type) == skipConstInfer.end() && !isForFakeQuantzie(*layer)) { bool isAllInputsConst = true; for (auto const& data : layer->insData) { auto creator = data.lock()->getCreatorLayer().lock(); @@ -336,7 +343,7 @@ const BlobMap ConstTransformer::getConstData(const std::map& }; for (const auto& layer : sortedLayers) { - if (layer->type == "FakeQuantize" || layer->type == "Quantize") { + if (std::find(skipConstInfer.begin(), skipConstInfer.end(), layer->type) != skipConstInfer.end()) { continue; } @@ -346,13 +353,13 @@ const BlobMap ConstTransformer::getConstData(const std::map& auto implPtr = holder.getConstInferImpl(layer->type); if (!implPtr && !isForShape) - if (layer->type != "FakeQuantize" && layer->type != "Quantize") + if (std::find(skipConstInfer.begin(), skipConstInfer.end(), layer->type) == skipConstInfer.end()) THROW_IE_EXCEPTION << "Failed to find reference implementation for `" + layer->name + "` Layer with `" + layer->type + "` Type on constant propagation"; if (!isForShape) { auto outputBlobs = getOutputBlobs(layer->outData); auto inp = getInputBlobs(layer->insData, isForShape); - if (layer->type != "FakeQuantize" && layer->type != "Quantize") + if (std::find(skipConstInfer.begin(), skipConstInfer.end(), layer->type) == skipConstInfer.end()) implPtr->infer(inp, layer->params, layer->blobs, outputBlobs); for (int i = 0; i < layer->outData.size(); i++) { std::string dataName = layer->outData[i]->getName(); diff --git a/inference-engine/src/legacy_api/src/ie_cnn_layer_builder_ngraph.cpp b/inference-engine/src/legacy_api/src/ie_cnn_layer_builder_ngraph.cpp index 290dc13e452..0bf0115d1db 100644 --- a/inference-engine/src/legacy_api/src/ie_cnn_layer_builder_ngraph.cpp +++ b/inference-engine/src/legacy_api/src/ie_cnn_layer_builder_ngraph.cpp @@ -1381,24 +1381,6 @@ CNNLayer::Ptr NodeConverter::createLayer(const st return res; } -template <> -CNNLayer::Ptr NodeConverter::createLayer(const std::shared_ptr& layer) const { - LayerParams params = {layer->get_friendly_name(), "Select", details::convertPrecision(layer->get_output_element_type(0))}; - - auto res = std::make_shared(params); - auto castedLayer = ngraph::as_type_ptr(layer); - if (castedLayer == nullptr) THROW_IE_EXCEPTION << "Cannot get " << params.type << " layer " << params.name; - - auto broadcast = castedLayer->get_auto_broadcast().m_type; - if (broadcast == ngraph::op::AutoBroadcastType::NUMPY) { - res->params["auto_broadcast"] = "numpy"; - } else if (broadcast == ngraph::op::AutoBroadcastType::NONE) { - res->params["auto_broadcast"] = "none"; - } - - return res; -} - template <> CNNLayer::Ptr NodeConverter::createLayer( const std::shared_ptr& layer) const { diff --git a/inference-engine/src/mkldnn_plugin/CMakeLists.txt b/inference-engine/src/mkldnn_plugin/CMakeLists.txt index 2bb9b37dfbd..2e7df2f0bdc 100644 --- a/inference-engine/src/mkldnn_plugin/CMakeLists.txt +++ b/inference-engine/src/mkldnn_plugin/CMakeLists.txt @@ -4,6 +4,10 @@ set(TARGET_NAME "MKLDNNPlugin") +if(ENABLE_LTO) + ie_enable_lto() +endif() + if (WIN32) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNOMINMAX") endif() @@ -41,6 +45,7 @@ set(LAYERS ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_resample_node.cpp ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_normalize_node.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/list.cpp ${CMAKE_CURRENT_SOURCE_DIR}/nodes/batch_to_space.cpp ${CMAKE_CURRENT_SOURCE_DIR}/nodes/broadcast.cpp ${CMAKE_CURRENT_SOURCE_DIR}/nodes/convert.cpp @@ -93,6 +98,7 @@ set(LAYERS ${CMAKE_CURRENT_SOURCE_DIR}/nodes/topk.cpp ${CMAKE_CURRENT_SOURCE_DIR}/nodes/proposal.cpp ${CMAKE_CURRENT_SOURCE_DIR}/nodes/proposal_imp.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/cum_sum.cpp ) foreach(LAYER ${LAYERS}) diff --git a/inference-engine/src/mkldnn_plugin/mkldnn/desc_iterator.hpp b/inference-engine/src/mkldnn_plugin/mkldnn/desc_iterator.hpp index 5095ea31d3a..12c9644b4cb 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn/desc_iterator.hpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn/desc_iterator.hpp @@ -51,7 +51,7 @@ struct primitive_desc_iterator : public handle memory::primitive_desc fetch() const { memory::primitive_desc adesc; - mkldnn_primitive_desc_t cdesc; + mkldnn_primitive_desc_t cdesc = nullptr; cdesc = mkldnn_primitive_desc_iterator_fetch(get()); @@ -72,7 +72,7 @@ struct primitive_desc_iterator : public handle memory::primitive_desc src_primitive_desc(size_t index = 0) const { memory::primitive_desc adesc; memory::primitive_desc cdesc_elem; - mkldnn_primitive_desc_t cdesc; + mkldnn_primitive_desc_t cdesc = nullptr; cdesc_elem.reset(mkldnn_primitive_desc_iterator_fetch(get())); const_mkldnn_primitive_desc_t const_cdesc = mkldnn_primitive_desc_query_pd(cdesc_elem.get(), @@ -86,7 +86,7 @@ struct primitive_desc_iterator : public handle memory::primitive_desc dst_primitive_desc(size_t index = 0) const { memory::primitive_desc adesc; memory::primitive_desc cdesc_elem; - mkldnn_primitive_desc_t cdesc; + mkldnn_primitive_desc_t cdesc = nullptr; cdesc_elem.reset(mkldnn_primitive_desc_iterator_fetch(get())); const_mkldnn_primitive_desc_t const_cdesc = mkldnn_primitive_desc_query_pd(cdesc_elem.get(), @@ -101,7 +101,7 @@ struct primitive_desc_iterator : public handle memory::primitive_desc diff_src_primitive_desc(size_t index = 0) const { memory::primitive_desc adesc; memory::primitive_desc cdesc_elem; - mkldnn_primitive_desc_t cdesc; + mkldnn_primitive_desc_t cdesc = nullptr; cdesc_elem.reset(mkldnn_primitive_desc_iterator_fetch(get())); const_mkldnn_primitive_desc_t const_cdesc = mkldnn_primitive_desc_query_pd(cdesc_elem.get(), @@ -115,7 +115,7 @@ struct primitive_desc_iterator : public handle memory::primitive_desc weights_primitive_desc(size_t index = 0) const { memory::primitive_desc adesc; memory::primitive_desc cdesc_elem; - mkldnn_primitive_desc_t cdesc; + mkldnn_primitive_desc_t cdesc = nullptr; cdesc_elem.reset(mkldnn_primitive_desc_iterator_fetch(get())); const_mkldnn_primitive_desc_t const_cdesc = mkldnn_primitive_desc_query_pd(cdesc_elem.get(), @@ -129,7 +129,7 @@ struct primitive_desc_iterator : public handle memory::primitive_desc diff_dst_primitive_desc(size_t index = 0) const { memory::primitive_desc adesc; memory::primitive_desc cdesc_elem; - mkldnn_primitive_desc_t cdesc; + mkldnn_primitive_desc_t cdesc = nullptr; cdesc_elem.reset(mkldnn_primitive_desc_iterator_fetch(get())); const_mkldnn_primitive_desc_t const_cdesc = mkldnn_primitive_desc_query_pd(cdesc_elem.get(), @@ -152,7 +152,7 @@ struct primitive_desc_iterator : public handle template void getPrimitiveDescriptor(T& pdesc) const { - mkldnn_primitive_desc_t cdesc; + mkldnn_primitive_desc_t cdesc = nullptr; memory::primitive_desc cdescpd; diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp index 43c0b93995f..209bcc44d61 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp @@ -151,7 +151,9 @@ void MKLDNNEdge::allocate(const void* mem_ptr) { auto inputDesc = getInputDesc(); auto outputDesc = getOutputDesc(); if (!MKLDNNExtensionUtils::initTensorsAreEqual(outputDesc, inputDesc) || - (inputDesc.getDims().size() > 0 && inputDesc.getDims()[0] != 1 && inputDesc != outputDesc)) + (inputDesc.getDims().size() > 0 && inputDesc.getDims()[0] != 1 && + (inputDesc.getPrecision() != outputDesc.getPrecision() || + inputDesc.getBlockingDesc() != outputDesc.getBlockingDesc()))) THROW_IE_EXCEPTION << "Cannot allocate memory. Nodes have primitive descriptors with different formats."; if (inputDesc.getLayout() == InferenceEngine::Layout::ANY) THROW_IE_EXCEPTION << "Cannot get input descriptor!"; diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_layers_dispatcher.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_layers_dispatcher.cpp deleted file mode 100644 index c71790729ce..00000000000 --- a/inference-engine/src/mkldnn_plugin/mkldnn_layers_dispatcher.cpp +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright (C) 2018-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "mkldnn_layers_dispatcher.hpp" -#include
-#include "nodes/list.hpp" -#include - -using namespace InferenceEngine; - -namespace MKLDNNPlugin { - -void addDefaultExtensions(MKLDNNExtensionManager::Ptr mngr) { - if (!mngr) - THROW_IE_EXCEPTION << "Cannot add default extensions! Extension manager is empty."; - - auto defaultExtensions = std::make_shared(); - mngr->AddExtension(defaultExtensions); -} - -} // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_layers_dispatcher.hpp b/inference-engine/src/mkldnn_plugin/mkldnn_layers_dispatcher.hpp deleted file mode 100644 index 4c7e5b6cb67..00000000000 --- a/inference-engine/src/mkldnn_plugin/mkldnn_layers_dispatcher.hpp +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright (C) 2018-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "mkldnn_extension_mngr.h" - -namespace MKLDNNPlugin { - -void addDefaultExtensions(MKLDNNExtensionManager::Ptr mngr); - -} // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp index ad1bc551220..1ea2ecd2c0f 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp @@ -23,11 +23,14 @@ MKLDNNMemory::MKLDNNMemory(const engine& eng) : eng(eng) {} size_t MKLDNNMemory::GetSize() const { uint8_t itemSize = MKLDNNExtensionUtils::sizeOfDataType(mkldnn::memory::data_type(GetDataType())); + return GetElementsCount() * itemSize; +} +size_t MKLDNNMemory::GetElementsCount() const { auto desc = GetDescriptor(); std::vector dims(desc.data.layout_desc.blocking.padding_dims, desc.data.layout_desc.blocking.padding_dims + desc.data.ndims); - return std::accumulate(std::begin(dims), std::end(dims), (size_t) 1, std::multiplies()) * itemSize; + return std::accumulate(std::begin(dims), std::end(dims), (size_t) 1, std::multiplies()); } void MKLDNNMemory::Create(memory::dims dims, memory::data_type data_type, memory::format format, const void* data) { @@ -182,6 +185,7 @@ bool MKLDNNMemory::isConsistant(memory::dims dims, memory::format format) { case f::OhIw16o4i: case f::OIhw4i16o4i: case f::OhIw8o4i: + case f::IOhw16o16i: ndims = 4; break; // DHW case f::ncdhw: @@ -411,6 +415,7 @@ std::string MKLDNNMemory::formatToString(memory::format fmt) { case memory::OhIw8o4i: return "OhIw8o4i"; case memory::OhIw16o4i: return "OhIw16o4i"; case memory::OIhw4i16o4i: return "OIhw4i16o4i"; + case memory::IOhw16o16i: return "IOhw16o16i"; case memory::oidhw: return "oidhw"; case memory::dhwio: return "dhwio"; @@ -718,6 +723,33 @@ MKLDNNMemoryDesc::operator InferenceEngine::TensorDesc() const { blkDims.push_back(16); layout = Layout::BLOCKED; break; + case memory::OIhw8o8i: + order = {0, 1, 2, 3, 0, 1}; + blkDims = dims; + blkDims[0] = blkDims[0] / 8 + (blkDims[0] % 8 ? 1 : 0); + blkDims[1] = blkDims[1] / 8 + (blkDims[1] % 8 ? 1 : 0); + blkDims.push_back(8); + blkDims.push_back(8); + layout = Layout::BLOCKED; + break; + case memory::OIhw16o16i: + order = {0, 1, 2, 3, 0, 1}; + blkDims = dims; + blkDims[0] = blkDims[0] / 16 + (blkDims[0] % 16 ? 1 : 0); + blkDims[1] = blkDims[1] / 16 + (blkDims[1] % 16 ? 1 : 0); + blkDims.push_back(16); + blkDims.push_back(16); + layout = Layout::BLOCKED; + break; + case memory::IOhw16o16i: + order = {1, 0, 2, 3, 0, 1}; + blkDims = dims; + blkDims[0] = blkDims[0] / 16 + (blkDims[0] % 16 ? 1 : 0); + blkDims[1] = blkDims[1] / 16 + (blkDims[1] % 16 ? 1 : 0); + blkDims.push_back(16); + blkDims.push_back(16); + layout = Layout::BLOCKED; + break; case memory::OIdhw8i8o: order = {0, 1, 2, 3, 4, 1, 0}; blkDims = dims; @@ -736,8 +768,26 @@ MKLDNNMemoryDesc::operator InferenceEngine::TensorDesc() const { blkDims.push_back(16); layout = Layout::BLOCKED; break; + case memory::OIdhw8o8i: + order = {0, 1, 2, 3, 4, 1, 0}; + blkDims = dims; + blkDims[0] = blkDims[0] / 8 + (blkDims[0] % 8 ? 1 : 0); + blkDims[1] = blkDims[1] / 8 + (blkDims[1] % 8 ? 1 : 0); + blkDims.push_back(8); + blkDims.push_back(8); + layout = Layout::BLOCKED; + break; + case memory::OIdhw16o16i: + order = {0, 1, 2, 3, 4, 0, 1}; + blkDims = dims; + blkDims[0] = blkDims[0] / 16 + (blkDims[0] % 16 ? 1 : 0); + blkDims[1] = blkDims[1] / 16 + (blkDims[1] % 16 ? 1 : 0); + blkDims.push_back(16); + blkDims.push_back(16); + layout = Layout::BLOCKED; + break; case memory::gOIhw4o4i: - order = {0, 1, 2, 3, 4, 2, 1}; + order = {0, 1, 2, 3, 4, 1, 2}; blkDims = dims; blkDims[1] = blkDims[1] / 4 + (blkDims[1] % 4 ? 1 : 0); blkDims[2] = blkDims[2] / 4 + (blkDims[2] % 4 ? 1 : 0); @@ -754,6 +804,15 @@ MKLDNNMemoryDesc::operator InferenceEngine::TensorDesc() const { blkDims.push_back(8); layout = Layout::BLOCKED; break; + case memory::gOIhw8o8i: + order = {0, 1, 2, 3, 4, 1, 2}; + blkDims = dims; + blkDims[1] = blkDims[1] / 8 + (blkDims[1] % 8 ? 1 : 0); + blkDims[2] = blkDims[2] / 8 + (blkDims[2] % 8 ? 1 : 0); + blkDims.push_back(8); + blkDims.push_back(8); + layout = Layout::BLOCKED; + break; case memory::gOIhw16i16o: order = {0, 1, 2, 3, 4, 2, 1}; blkDims = dims; @@ -763,6 +822,15 @@ MKLDNNMemoryDesc::operator InferenceEngine::TensorDesc() const { blkDims.push_back(16); layout = Layout::BLOCKED; break; + case memory::gOIhw16o16i: + order = {0, 1, 2, 3, 4, 1, 2}; + blkDims = dims; + blkDims[1] = blkDims[1] / 16 + (blkDims[1] % 16 ? 1 : 0); + blkDims[2] = blkDims[2] / 16 + (blkDims[2] % 16 ? 1 : 0); + blkDims.push_back(16); + blkDims.push_back(16); + layout = Layout::BLOCKED; + break; case memory::OhIw8o4i: order = {0, 2, 1, 3, 0, 1}; blkDims = dims; @@ -1067,6 +1135,16 @@ MKLDNNMemoryDesc::MKLDNNMemoryDesc(const TensorDesc& tDesc): } else if (blkdDims[4] == 16 && blkdDims[5] == 16) { mkldnnFormat = memory::format::OIhw16i16o; } + } else if (order.size() == 6 && order[0] == 0 && order[1] == 1 && order[2] == 2 && order[3] == 3 && order[4] == 0 && order[5] == 1) { + if (blkdDims[4] == 8 && blkdDims[5] == 8) { + mkldnnFormat = memory::format::OIhw8o8i; + } else if (blkdDims[4] == 16 && blkdDims[5] == 16) { + mkldnnFormat = memory::format::OIhw16o16i; + } + } else if (order.size() == 6 && order[0] == 1 && order[1] == 0 && order[2] == 2 && order[3] == 3 && order[4] == 0 && order[5] == 1) { + if (blkdDims[4] == 16 && blkdDims[5] == 16) { + mkldnnFormat = memory::format::IOhw16o16i; + } } else if (order.size() == 5 && order[0] == 0 && order[1] == 1 && order[2] == 2 && order[3] == 3 && order[4] == 0) { if (blkdDims[4] == 8) { mkldnnFormat = memory::format::Ohwi8o; @@ -1122,6 +1200,13 @@ MKLDNNMemoryDesc::MKLDNNMemoryDesc(const TensorDesc& tDesc): } else if (blkdDims[6] == 16) { mkldnnFormat = memory::format::OIdhw16i16o; } + } else if (order.size() == 7 && + order[0] == 0 && order[1] == 1 && order[2] == 2 && order[3] == 3 && order[4] == 4 && order[5] == 0 && order[6] == 1) { + if (blkdDims[6] == 8) { + mkldnnFormat = memory::format::OIdhw8o8i; + } else if (blkdDims[6] == 16) { + mkldnnFormat = memory::format::OIdhw16o16i; + } } else if (order.size() == 7 && order[0] == 0 && order[1] == 2 && order[2] == 3 && order[3] == 1 && order[4] == 4 && order[5] == 0 && order[6] == 1) { if (blkdDims[5] == 8) { @@ -1136,12 +1221,21 @@ MKLDNNMemoryDesc::MKLDNNMemoryDesc(const TensorDesc& tDesc): } else if (order.size() == 7 && order[0] == 0 && order[1] == 1 && order[2] == 2 && order[3] == 3 && order[4] == 4 && order[5] == 2 && order[6] == 1) { if (blkdDims[6] == 4) { - mkldnnFormat = memory::format::gOIhw4o4i; + mkldnnFormat = memory::format::gOIhw4i4o; } else if (blkdDims[6] == 8) { mkldnnFormat = memory::format::gOIhw8i8o; } else if (blkdDims[6] == 16) { mkldnnFormat = memory::format::gOIhw16i16o; } + } else if (order.size() == 7 && + order[0] == 0 && order[1] == 1 && order[2] == 2 && order[3] == 3 && order[4] == 4 && order[5] == 1 && order[6] == 2) { + if (blkdDims[6] == 4) { + mkldnnFormat = memory::format::gOIhw4o4i; + } else if (blkdDims[6] == 8) { + mkldnnFormat = memory::format::gOIhw8o8i; + } else if (blkdDims[6] == 16) { + mkldnnFormat = memory::format::gOIhw16o16i; + } } else if (order.size() == 7 && order[0] == 0 && order[1] == 1 && order[2] == 3 && order[3] == 2 && order[4] == 4 && order[5] == 1 && order[6] == 2) { if (blkdDims[5] == 8 && blkdDims[6] == 4) { diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_memory.h b/inference-engine/src/mkldnn_plugin/mkldnn_memory.h index 63b46c9e531..4b0d024d223 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_memory.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_memory.h @@ -87,6 +87,7 @@ public: } size_t GetSize() const; + size_t GetElementsCount() const; mkldnn::memory::format GetFormat() const { return static_cast(prim->get_primitive_desc().desc().data.format); diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp index e40475fa40d..e5afa640662 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp @@ -1017,12 +1017,17 @@ void MKLDNNNode::initOptimalPrimitiveDescriptor() { auto config = selected_pd->getConfig(); if (!isInitConfig(config)) { for (size_t i = 0; i < config.inConfs.size(); i++) { - config.inConfs[i].desc = getConfiguredInputDesc(config, i); + // TensorDescriptor constructor which is called inside getConfiguredInputDesc incorrectly computes offset field. + // What's why MKLDNNMemoryDesc routine is used to reinitialize TD with expected offset values. + config.inConfs[i].desc = MKLDNNMemoryDesc(getConfiguredInputDesc(config, i)); } for (size_t i = 0; i < config.outConfs.size(); i++) { - config.outConfs[i].desc = getConfiguredOutputDesc(config, i); + // TensorDescriptor constructor which is called inside getConfiguredOutputDesc incorrectly computes offset field. + // What's why MKLDNNMemoryDesc routine is used to reinitialize TD with expected offset values. + config.outConfs[i].desc = MKLDNNMemoryDesc(getConfiguredOutputDesc(config, i)); } + initDescriptor(config); } else if (getType() != RNNSeq && getType() != RNNCell) { initDescriptor(config); diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.h b/inference-engine/src/mkldnn_plugin/mkldnn_node.h index 46c3d26fe02..02c5083863c 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_node.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.h @@ -370,7 +370,10 @@ public: if (srcDescs.empty() || selectedDescs.empty()) return false; for (size_t i = 0; i < srcDescs.size() && i < selectedDescs.size(); i++) { - if (srcDescs[i] != selectedDescs[i].desc && srcDescs[i].getLayout() != InferenceEngine::Layout::ANY) + if (!(srcDescs[i].getBlockingDesc() == selectedDescs[i].desc.getBlockingDesc() && + srcDescs[i].getPrecision() == selectedDescs[i].desc.getPrecision() && + srcDescs[i].getDims() == selectedDescs[i].desc.getDims()) && + srcDescs[i].getLayout() != InferenceEngine::Layout::ANY) return false; } return true; diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp index f626c75f0bf..95c7a38a59c 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp @@ -5,7 +5,6 @@ #include "ie_metric_helpers.hpp" #include "mkldnn_plugin.h" #include "mkldnn_extension_mngr.h" -#include "mkldnn_layers_dispatcher.hpp" #include "mkldnn_weights_cache.hpp" #include #include @@ -15,6 +14,7 @@ #include #include #include +#include #include "convert_function_to_cnn_network.hpp" #include @@ -23,6 +23,7 @@ #include #include #include +#include #include #if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64) @@ -40,7 +41,7 @@ using namespace InferenceEngine; Engine::Engine() { _pluginName = "CPU"; - addDefaultExtensions(extensionManager); + extensionManager->AddExtension(std::make_shared()); } Engine::~Engine() { @@ -83,7 +84,8 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::ICNNNetwork &network, const st const auto transformations_callback = [](const std::shared_ptr &node) -> bool { return std::dynamic_pointer_cast(node) || std::dynamic_pointer_cast(node) || - std::dynamic_pointer_cast(node); + std::dynamic_pointer_cast(node) || + std::dynamic_pointer_cast(node); }; auto nGraphFunc = clonedNetwork->getFunction(); // Disable shape inference (WA for generic operations) diff --git a/inference-engine/src/mkldnn_plugin/nodes/argmax.cpp b/inference-engine/src/mkldnn_plugin/nodes/argmax.cpp index c7134eba5e0..449168f504c 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/argmax.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/argmax.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include "base.hpp" #include "argmax_imp.hpp" @@ -49,7 +48,7 @@ private: argmax_conf conf; }; -REG_FACTORY_FOR(ImplFactory, ArgMax); +REG_FACTORY_FOR(ArgMaxImpl, ArgMax); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/mkldnn_plugin/nodes/argmax_imp.cpp b/inference-engine/src/mkldnn_plugin/nodes/argmax_imp.cpp index 4fabb6010d1..b90851387d6 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/argmax_imp.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/argmax_imp.cpp @@ -4,6 +4,7 @@ #include "argmax_imp.hpp" +#include #include #include #include @@ -181,7 +182,7 @@ void argmax_many_classes_has_axis(const float* src_data, float* dst_data, Shape vmask_type vmask; int s_index = i0 * dim * after_num + ib1 * block_size; - memset(reinterpret_cast(&vmax_values[0]), 0, sizeof(vmax_values)); + std::memset(reinterpret_cast(&vmax_values[0]), 0, sizeof(vmax_values)); auto vswap_func = [&](int index1, int index2) { vtmp = vmax_values[index1]; diff --git a/inference-engine/src/mkldnn_plugin/nodes/base.hpp b/inference-engine/src/mkldnn_plugin/nodes/base.hpp index 2f9014ad081..db0268846f8 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/base.hpp +++ b/inference-engine/src/mkldnn_plugin/nodes/base.hpp @@ -6,7 +6,7 @@ #include #include "ie_util_internal.hpp" -#include "list.hpp" +#include "nodes/list.hpp" #include #include @@ -176,6 +176,21 @@ protected: IE_SUPPRESS_DEPRECATED_END +template +inline void extRegister(MKLDNNExtensions * extInstance, const char * __type) { + IE_SUPPRESS_DEPRECATED_START + extInstance->AddExt(__type, + [](const CNNLayer* layer) -> InferenceEngine::ILayerImplFactory* { + return new __prim(layer); + }); + IE_SUPPRESS_DEPRECATED_END +} + +#define REG_FACTORY_FOR(__prim, __type) \ + void __prim ## __type(MKLDNNExtensions * extInstance) { \ + extRegister>(extInstance, #__type); \ + } + } // namespace Cpu } // namespace Extensions } // namespace InferenceEngine diff --git a/inference-engine/src/mkldnn_plugin/nodes/batch_to_space.cpp b/inference-engine/src/mkldnn_plugin/nodes/batch_to_space.cpp index 2d0d3fc2948..60e15726fc9 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/batch_to_space.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/batch_to_space.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include "base.hpp" #include @@ -219,7 +218,7 @@ private: std::vector _crops_end; }; -REG_FACTORY_FOR(ImplFactory, BatchToSpace); +REG_FACTORY_FOR(BatchToSpaceImpl, BatchToSpace); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/mkldnn_plugin/nodes/broadcast.cpp b/inference-engine/src/mkldnn_plugin/nodes/broadcast.cpp index cdaedb24d64..2e784c4b85c 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/broadcast.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/broadcast.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include "base.hpp" #include @@ -129,7 +128,7 @@ private: const size_t BROADCAST_SHAPE = 1; }; -REG_FACTORY_FOR(ImplFactory, Broadcast); +REG_FACTORY_FOR(BroadcastImpl, Broadcast); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/mkldnn_plugin/nodes/bucketize.cpp b/inference-engine/src/mkldnn_plugin/nodes/bucketize.cpp index 278cd53b68b..bae370b59f0 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/bucketize.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/bucketize.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include "base.hpp" #include @@ -123,7 +122,7 @@ private: bool with_bins = false; }; -REG_FACTORY_FOR(ImplFactory, Bucketize); +REG_FACTORY_FOR(BucketizeImpl, Bucketize); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/mkldnn_plugin/nodes/common/uni_simd.h b/inference-engine/src/mkldnn_plugin/nodes/common/uni_simd.h index 7f460dd9faa..bd55bb86294 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/common/uni_simd.h +++ b/inference-engine/src/mkldnn_plugin/nodes/common/uni_simd.h @@ -14,8 +14,16 @@ namespace Cpu { #if defined(HAVE_AVX512F) namespace AVX512F { + static inline __m512 _mm_uni_any_ps() { + return __m512{}; + } + + static inline __m512i _mm_uni_any_epi32() { + return __m512i{}; + } + static inline __m512 _mm_uni_loadu_ps(const float* psrc) { - return _mm512_loadu_ps(psrc); + return _mm512_mask_loadu_ps(_mm_uni_any_ps(), (__mmask16)-1, psrc); } static inline void _mm_uni_storeu_ps(float* pdst, const __m512& vec) { @@ -62,8 +70,12 @@ namespace AVX512F { return _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vec0), _mm512_castps_si512(vec1))); } + static inline __m512i _mm_uni_set1_epi32(int value) { + return _mm512_mask_set1_epi32(_mm_uni_any_epi32(), (__mmask16)-1, value); + } + static inline __m512 _mm_uni_blendv_ps(__m512 vec0, __m512 vec1, __m512 vmask) { - return _mm512_mask_blend_ps(_mm512_cmpneq_epi32_mask(_mm512_castps_si512(vmask), _mm512_set1_epi32(0)), vec0, vec1); + return _mm512_mask_blend_ps(_mm512_cmpneq_epi32_mask(_mm512_castps_si512(vmask), _mm_uni_set1_epi32(0)), vec0, vec1); } static inline __m512 _mm_uni_blendv_ps(__m512 vec0, __m512 vec1, __mmask16 vmask) { @@ -90,10 +102,6 @@ namespace AVX512F { return _mm512_add_epi32(vec0, vec1); } - static inline __m512i _mm_uni_set1_epi32(int value) { - return _mm512_set1_epi32(value); - } - static inline __m512i _mm_uni_slli_epi32(__m512i vec, int value) { return _mm512_sll_epi32(vec, _mm_set1_epi64x(value)); } @@ -119,7 +127,7 @@ namespace AVX512F { } static inline __m512 _mm_uni_cvtepi32_ps(__m512i vec) { - return _mm512_cvtepi32_ps(vec); + return _mm512_mask_cvtepi32_ps(_mm_uni_any_ps(), (__mmask16)-1, vec); } } // namespace AVX512F #elif defined(HAVE_AVX2) diff --git a/inference-engine/src/mkldnn_plugin/nodes/convert.cpp b/inference-engine/src/mkldnn_plugin/nodes/convert.cpp index f171a142828..5e4bd96fb46 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/convert.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/convert.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include "base.hpp" #include @@ -129,7 +128,7 @@ private: std::string precision; }; -REG_FACTORY_FOR(ImplFactory, Convert); +REG_FACTORY_FOR(ConvertImpl, Convert); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/mkldnn_plugin/nodes/ctc_greedy.cpp b/inference-engine/src/mkldnn_plugin/nodes/ctc_greedy.cpp index 372b237c7c4..717af9f9e3f 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/ctc_greedy.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/ctc_greedy.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include "base.hpp" #include @@ -86,7 +85,7 @@ public: } }; -REG_FACTORY_FOR(ImplFactory, CTCGreedyDecoder); +REG_FACTORY_FOR(CTCGreedyDecoderImpl, CTCGreedyDecoder); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/mkldnn_plugin/nodes/cum_sum.cpp b/inference-engine/src/mkldnn_plugin/nodes/cum_sum.cpp new file mode 100644 index 00000000000..03a4f2dbc6f --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/nodes/cum_sum.cpp @@ -0,0 +1,230 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "list.hpp" +#include "base.hpp" + +#include +#include +#include "ie_parallel.hpp" +#include "ie_precision.hpp" + +namespace InferenceEngine { +namespace Extensions { +namespace Cpu { + +class CumSumImpl: public ExtLayerBase { + enum { CUM_SUM_DATA, AXIS, numOfInputs }; + enum { N, C, D, H, W, numOfDims }; + bool exclusive; + bool reverse; + size_t axis = 0; + std::vector shape5d; + +public: + explicit CumSumImpl(const CNNLayer* layer) { + try { + layerName = layer->name; + if ((layer->insData.size() != numOfInputs && layer->insData.size() != (numOfInputs - 1)) || layer->outData.size() != 1) + THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "' has incorrect number of input/output edges!"; + + const auto &dataTensor = layer->insData[CUM_SUM_DATA].lock()->getTensorDesc(); + const auto &dataShape = dataTensor.getDims(); + if (dataShape.size() < 1 || dataShape.size() > 5) { + THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "' doesn't support 'data' input tensor with rank: " << dataShape.size(); + } + + exclusive = layer->GetParamAsBool("exclusive", false); + reverse = layer->GetParamAsBool("reverse", false); + + const auto& dataPrecision = dataTensor.getPrecision(); + if (dataPrecision != Precision::I8 && dataPrecision != Precision::U8 && dataPrecision != Precision::I16 && dataPrecision != Precision::I32 && + dataPrecision != Precision::FP32 && dataPrecision != Precision::I64 && dataPrecision != Precision::U64 && dataPrecision != Precision::BF16) + THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "' has unsupported 'data' input precision: " << dataPrecision.name(); + + if (layer->insData.size() == numOfInputs) { + const auto& axisTensor = layer->insData[AXIS].lock()->getTensorDesc(); + const auto& axisTensorPrec = layer->insData[AXIS].lock()->getTensorDesc().getPrecision(); + if (axisTensorPrec != Precision::I32 && axisTensorPrec != Precision::I64) + THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "' has unsupported 'axis' input precision: " << axisTensorPrec.name(); + + const auto axisTensorRank = axisTensor.getDims().size(); + if (axisTensorRank != 0) + THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "' doesn't support 'axis' input tensor with rank: " << axisTensorRank; + } + + if (dataShape != layer->outData[0]->getTensorDesc().getDims()) + THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "' has different 'data' input and output dimensions"; + + shape5d = get5dShape(dataShape); + + LayerConfig config; + for (size_t i = 0; i < layer->insData.size(); i++) { + DataConfig inConfig; + inConfig.inPlace = -1; + inConfig.constant = false; + + Precision inPrecision = layer->insData[i].lock()->getTensorDesc().getPrecision(); + if (inPrecision == Precision::BF16) + inPrecision = Precision::FP32; + const SizeVector& inDims = layer->insData[i].lock()->getTensorDesc().getDims(); + inConfig.desc = TensorDesc(inPrecision, inDims, InferenceEngine::TensorDesc::getLayoutByDims(inDims)); + + config.inConfs.push_back(inConfig); + } + DataConfig outConfig; + outConfig.inPlace = -1; + outConfig.constant = false; + Precision outPrecision = layer->insData[CUM_SUM_DATA].lock()->getTensorDesc().getPrecision(); + if (outPrecision == Precision::BF16) + outPrecision = Precision::FP32; + const SizeVector& outDims = layer->outData[0]->getTensorDesc().getDims(); + outConfig.desc = TensorDesc(outPrecision, outDims, InferenceEngine::TensorDesc::getLayoutByDims(outDims)); + + config.outConfs.push_back(outConfig); + + config.dynBatchSupport = false; + confs.push_back(config); + } catch (InferenceEngine::details::InferenceEngineException &ex) { + errorMsg = ex.what(); + } + } + + StatusCode execute(std::vector& inputs, std::vector& outputs, ResponseDesc *resp) noexcept override { + if (inputs.size() == numOfInputs) + axis = getAxis(inputs[AXIS], inputs[CUM_SUM_DATA]); + + const auto &dataPrecision = inputs[CUM_SUM_DATA]->getTensorDesc().getPrecision(); + switch (dataPrecision) { + case Precision::I8 : { execImpl(inputs[CUM_SUM_DATA], outputs[0]); break; } + case Precision::U8 : { execImpl(inputs[CUM_SUM_DATA], outputs[0]); break; } + case Precision::I16 : { execImpl(inputs[CUM_SUM_DATA], outputs[0]); break; } + case Precision::I32 : { execImpl(inputs[CUM_SUM_DATA], outputs[0]); break; } + case Precision::FP32 : { execImpl(inputs[CUM_SUM_DATA], outputs[0]); break; } + case Precision::I64 : { execImpl(inputs[CUM_SUM_DATA], outputs[0]); break; } + case Precision::U64 : { execImpl(inputs[CUM_SUM_DATA], outputs[0]); break; } + default : { + if (resp) { + std::string errorMsg = "CumSum layer with name '" + layerName + "' has unsupported 'data' input precision: " + dataPrecision.name(); + errorMsg.copy(resp->msg, sizeof(resp->msg) - 1); + } + return GENERAL_ERROR; + } + } + return OK; + } + +private: + template + void execImpl(const Blob::CPtr& _input, const Blob::Ptr& _output) { + const auto *input = _input->cbuffer().as() + _input->getTensorDesc().getBlockingDesc().getOffsetPadding(); + auto *output = _output->buffer().as() + _output->getTensorDesc().getBlockingDesc().getOffsetPadding(); + const size_t offset = _input->getTensorDesc().getBlockingDesc().getStrides()[axis]; + + if (reverse) { + if (exclusive) { + cumSum(input, output, offset); + } else { + cumSum(input, output, offset); + } + } else { + if (exclusive) { + cumSum(input, output, offset); + } else { + cumSum(input, output, offset); + } + } + } + + template + void cumSum(const dataType *input, dataType *output, const size_t &offset) { + std::vector iterationRange(numOfDims - 1); + size_t j = 0; + for (size_t i = 0; i < shape5d.size(); i++) { + if (i == axis) + continue; + iterationRange[j++] = shape5d[i]; + } + parallel_for4d(iterationRange[0], iterationRange[1], iterationRange[2], iterationRange[3], [&](size_t ir0, size_t ir1, size_t ir2, size_t ir3) { + std::vector forStartOffset; + forStartOffset.push_back(ir0); forStartOffset.push_back(ir1); forStartOffset.push_back(ir2); forStartOffset.push_back(ir3); + forStartOffset.insert(forStartOffset.begin() + axis, 0); + size_t startOffset = getStartOffset(forStartOffset); + + const dataType *inputStart = input + startOffset; + dataType *outputStart = output + startOffset; + + if (reverse) { + if (exclusive) { + outputStart[offset*(shape5d[axis] - 1)] = 0; + for (int64_t i = shape5d[axis] - 2; i >= 0; i--) { + outputStart[i*offset] = inputStart[(i+1)*offset] + outputStart[(i+1)*offset]; + } + } else { + outputStart[offset*(shape5d[axis] - 1)] = inputStart[offset*(shape5d[axis] - 1)]; + for (int64_t i = shape5d[axis] - 2; i >= 0; i--) { + outputStart[i*offset] = inputStart[i*offset] + outputStart[(i+1)*offset]; + } + } + } else { + if (exclusive) { + outputStart[0] = 0; + for (size_t i = 1; i < shape5d[axis]; i++) { + outputStart[i*offset] = inputStart[(i-1)*offset] + outputStart[(i-1)*offset]; + } + } else { + outputStart[0] = inputStart[0]; + for (size_t i = 1; i < shape5d[axis]; i++) { + outputStart[i*offset] = inputStart[i*offset] + outputStart[(i-1)*offset]; + } + } + } + }); + } + + size_t getStartOffset(std::vector &forStartOffset) { + return forStartOffset[N]*shape5d[C]*shape5d[D]*shape5d[H]*shape5d[W] + forStartOffset[C]*shape5d[D]*shape5d[H]*shape5d[W] + + forStartOffset[D]*shape5d[H]*shape5d[W] + forStartOffset[H]*shape5d[W] + forStartOffset[W]; + } + + size_t getAxis(const Blob::CPtr& _axis, const Blob::CPtr& _data) { + const auto& axisPrecision = _axis->getTensorDesc().getPrecision(); + const int64_t dataShapeSize = static_cast(_data->getTensorDesc().getDims().size()); + int64_t axisValueFromBlob; + switch (axisPrecision) { + case Precision::I32 : { + const auto *axisPtr = _axis->cbuffer().as(); + axisValueFromBlob = static_cast(axisPtr[0]); + break; + } + case Precision::I64 : { + const auto *axisPtr = _axis->cbuffer().as(); + axisValueFromBlob = axisPtr[0]; + break; + } + default : { + THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "' doesn't support 'axis' input with precision: " << axisPrecision.name(); + } + } + if (axisValueFromBlob < -dataShapeSize || axisValueFromBlob > dataShapeSize - 1) + THROW_IE_EXCEPTION << "CumSum layer with name '" << layerName << "' has axis with a value out of range: " << axisValueFromBlob; + return axisValueFromBlob >= 0 ? axisValueFromBlob : (axisValueFromBlob + dataShapeSize); + } + + std::vector get5dShape(const SizeVector& dims) { + std::vector shape5d(numOfDims, 1); + for (size_t i = 0; i < dims.size(); i++) + shape5d[i] = dims[i]; + return shape5d; + } + +private: + std::string layerName; +}; + +REG_FACTORY_FOR(CumSumImpl, CumSum); + +} // namespace Cpu +} // namespace Extensions +} // namespace InferenceEngine diff --git a/inference-engine/src/mkldnn_plugin/nodes/depth_to_space.cpp b/inference-engine/src/mkldnn_plugin/nodes/depth_to_space.cpp index f3b208b0994..69d9024029f 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/depth_to_space.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/depth_to_space.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include "base.hpp" #include @@ -118,7 +117,7 @@ private: size_t ownStrides[CNTR_SIZE]; }; -REG_FACTORY_FOR(ImplFactory, DepthToSpace); +REG_FACTORY_FOR(DepthToSpaceImpl, DepthToSpace); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/mkldnn_plugin/nodes/detectionoutput.cpp b/inference-engine/src/mkldnn_plugin/nodes/detectionoutput.cpp index f80b48e29af..e5a7c09956a 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/detectionoutput.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/detectionoutput.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include "base.hpp" #include @@ -604,7 +603,7 @@ void DetectionOutputImpl::nms_mx(const float* conf_data, } } -REG_FACTORY_FOR(ImplFactory, DetectionOutput); +REG_FACTORY_FOR(DetectionOutputImpl, DetectionOutput); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/mkldnn_plugin/nodes/detectionoutput_onnx.cpp b/inference-engine/src/mkldnn_plugin/nodes/detectionoutput_onnx.cpp index ca8c8876343..c1f75770669 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/detectionoutput_onnx.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/detectionoutput_onnx.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include "base.hpp" #include @@ -392,7 +391,7 @@ private: -REG_FACTORY_FOR(ImplFactory, ExperimentalDetectronDetectionOutput); +REG_FACTORY_FOR(ExperimentalDetectronDetectionOutputImpl, ExperimentalDetectronDetectionOutput); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/mkldnn_plugin/nodes/fill.cpp b/inference-engine/src/mkldnn_plugin/nodes/fill.cpp index c8d347244ed..e08897184a1 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/fill.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/fill.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include "base.hpp" #include @@ -121,7 +120,7 @@ private: const size_t FILL_VALUE = 1; }; -REG_FACTORY_FOR(ImplFactory, Fill); +REG_FACTORY_FOR(FillImpl, Fill); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/mkldnn_plugin/nodes/gather.cpp b/inference-engine/src/mkldnn_plugin/nodes/gather.cpp index e624bf03651..cd7e0378f07 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/gather.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/gather.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include "base.hpp" #include @@ -148,7 +147,7 @@ private: }; -REG_FACTORY_FOR(ImplFactory, Gather); +REG_FACTORY_FOR(GatherImpl, Gather); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/mkldnn_plugin/nodes/gather_tree.cpp b/inference-engine/src/mkldnn_plugin/nodes/gather_tree.cpp index 5bb17c8a0bc..5e420b22ddd 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/gather_tree.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/gather_tree.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include "base.hpp" #include @@ -150,7 +149,7 @@ private: InferenceEngine::Precision precision; }; -REG_FACTORY_FOR(ImplFactory, GatherTree); +REG_FACTORY_FOR(GatherTreeImpl, GatherTree); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/mkldnn_plugin/nodes/grn.cpp b/inference-engine/src/mkldnn_plugin/nodes/grn.cpp index 46e647d7206..b5e4e214965 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/grn.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/grn.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include "base.hpp" #include @@ -58,7 +57,7 @@ private: float bias = 1.0f; }; -REG_FACTORY_FOR(ImplFactory, GRN); +REG_FACTORY_FOR(GRNImpl, GRN); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/mkldnn_plugin/nodes/interp.cpp b/inference-engine/src/mkldnn_plugin/nodes/interp.cpp index 02138570a1b..873575b8be4 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/interp.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/interp.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include "base.hpp" #include #include @@ -434,7 +433,7 @@ private: } }; -REG_FACTORY_FOR(ImplFactory, Interp); +REG_FACTORY_FOR(InterpImpl, Interp); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/mkldnn_plugin/nodes/list.cpp b/inference-engine/src/mkldnn_plugin/nodes/list.cpp new file mode 100644 index 00000000000..e017bae6c38 --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/nodes/list.cpp @@ -0,0 +1,29 @@ +// Copyright (C) 2018-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "nodes/list.hpp" + +namespace InferenceEngine { +namespace Extensions { +namespace Cpu { + +#define FACTORY_DECLARATION(__prim, __type) \ + void __prim ## __type(MKLDNNExtensions * extInstance) + +#define FACTORY_CALL(__prim, __type) \ + __prim ## __type(this) + +#define MKLDNN_EXTENSION_NODE(__prim, __type) FACTORY_DECLARATION(__prim, __type) +# include "list_tbl.hpp" +#undef MKLDNN_EXTENSION_NODE + +MKLDNNExtensions::MKLDNNExtensions() { + #define MKLDNN_EXTENSION_NODE(__prim, __type) FACTORY_CALL(__prim, __type) + # include "list_tbl.hpp" + #undef MKLDNN_EXTENSION_NODE +} + +} // namespace Cpu +} // namespace Extensions +} // namespace InferenceEngine diff --git a/inference-engine/src/mkldnn_plugin/nodes/list.hpp b/inference-engine/src/mkldnn_plugin/nodes/list.hpp index 63b66b4ebd7..92ae6d80c69 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/list.hpp +++ b/inference-engine/src/mkldnn_plugin/nodes/list.hpp @@ -12,17 +12,6 @@ #include #include -// WA for xbyak.h -#ifdef _WIN32 -# ifndef _WINSOCKAPI_ -# define _WINSOCKAPI_ -# endif -# ifndef _WINSOCK2API_ -# define _WINSOCK2API_ -# endif -#endif -#include - namespace InferenceEngine { namespace Extensions { namespace Cpu { @@ -37,14 +26,16 @@ struct ExtensionsHolder { class MKLDNNExtensions : public IExtension { public: + MKLDNNExtensions(); + StatusCode getPrimitiveTypes(char**& types, unsigned int& size, ResponseDesc* resp) noexcept override { - collectTypes(types, size, MKLDNNExtensions::GetExtensionsHolder()->list); + collectTypes(types, size, extensionsHolder->list); return OK; } StatusCode getFactoryFor(ILayerImplFactory*& factory, const CNNLayer* cnnLayer, ResponseDesc* resp) noexcept override { - auto& factories = MKLDNNExtensions::GetExtensionsHolder()->list; + auto& factories = extensionsHolder->list; if (factories.find(cnnLayer->type) == factories.end()) { std::string errorMsg = std::string("Factory for ") + cnnLayer->type + " wasn't found!"; errorMsg.copy(resp->msg, sizeof(resp->msg) - 1); @@ -78,21 +69,13 @@ public: delete this; } - static void AddExt(std::string name, ext_factory factory) { - auto extensionsHolder = GetExtensionsHolder(); - if (extensionsHolder != nullptr) - extensionsHolder->list[name] = factory; - } - - static std::shared_ptr GetExtensionsHolder() { - static std::shared_ptr localHolder; - if (localHolder == nullptr) { - localHolder = std::make_shared(); - } - return localHolder; + void AddExt(std::string name, ext_factory factory) { + extensionsHolder->list[name] = factory; } private: + std::shared_ptr extensionsHolder = std::make_shared(); + template void collectTypes(char**& types, unsigned int& size, const std::map &factories) { types = new char *[factories.size()]; @@ -108,22 +91,6 @@ private: IE_SUPPRESS_DEPRECATED_END -template -class ExtRegisterBase { -public: - explicit ExtRegisterBase(const std::string& type) { - IE_SUPPRESS_DEPRECATED_START - MKLDNNExtensions::AddExt(type, - [](const CNNLayer* layer) -> InferenceEngine::ILayerImplFactory* { - return new Ext(layer); - }); - IE_SUPPRESS_DEPRECATED_END - } -}; - -#define REG_FACTORY_FOR(__prim, __type) \ -static ExtRegisterBase<__prim> __reg__##__type(#__type) - } // namespace Cpu } // namespace Extensions } // namespace InferenceEngine diff --git a/inference-engine/src/mkldnn_plugin/nodes/list_tbl.hpp b/inference-engine/src/mkldnn_plugin/nodes/list_tbl.hpp new file mode 100644 index 00000000000..e0ebf3ff8cd --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/nodes/list_tbl.hpp @@ -0,0 +1,93 @@ +// Copyright (C) 2018-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#ifndef MKLDNN_EXTENSION_NODE +# warning "MKLDNN_EXTENSION_NODE is not defined" +# define MKLDNN_EXTENSION_NODE(__prim, __type) +#endif + +MKLDNN_EXTENSION_NODE(PriorBoxImpl, PriorBox); +MKLDNN_EXTENSION_NODE(MathImpl, Abs); +MKLDNN_EXTENSION_NODE(MathImpl, Acos); +MKLDNN_EXTENSION_NODE(MathImpl, Acosh); +MKLDNN_EXTENSION_NODE(MathImpl, Asin); +MKLDNN_EXTENSION_NODE(MathImpl, Asinh); +MKLDNN_EXTENSION_NODE(MathImpl, Atan); +MKLDNN_EXTENSION_NODE(MathImpl, Atanh); +MKLDNN_EXTENSION_NODE(MathImpl, Ceil); +MKLDNN_EXTENSION_NODE(MathImpl, Cos); +MKLDNN_EXTENSION_NODE(MathImpl, Cosh); +MKLDNN_EXTENSION_NODE(MathImpl, Erf); +MKLDNN_EXTENSION_NODE(MathImpl, Floor); +MKLDNN_EXTENSION_NODE(MathImpl, HardSigmoid); +MKLDNN_EXTENSION_NODE(MathImpl, Log); +MKLDNN_EXTENSION_NODE(MathImpl, Neg); +MKLDNN_EXTENSION_NODE(MathImpl, Reciprocal); +MKLDNN_EXTENSION_NODE(MathImpl, Selu); +MKLDNN_EXTENSION_NODE(MathImpl, Sign); +MKLDNN_EXTENSION_NODE(MathImpl, Sin); +MKLDNN_EXTENSION_NODE(MathImpl, Sinh); +MKLDNN_EXTENSION_NODE(MathImpl, Softplus); +MKLDNN_EXTENSION_NODE(MathImpl, Softsign); +MKLDNN_EXTENSION_NODE(MathImpl, Tan); +MKLDNN_EXTENSION_NODE(ExperimentalDetectronTopKROIsImpl, ExperimentalDetectronTopKROIs); +MKLDNN_EXTENSION_NODE(ReverseSequenceImpl, ReverseSequence); +MKLDNN_EXTENSION_NODE(DetectionOutputImpl, DetectionOutput); +MKLDNN_EXTENSION_NODE(ArgMaxImpl, ArgMax); +MKLDNN_EXTENSION_NODE(UnsqueezeImpl, Unsqueeze); +MKLDNN_EXTENSION_NODE(StridedSliceImpl, StridedSlice); +MKLDNN_EXTENSION_NODE(ExperimentalDetectronDetectionOutputImpl, ExperimentalDetectronDetectionOutput); +MKLDNN_EXTENSION_NODE(RegionYoloImpl, RegionYolo); +MKLDNN_EXTENSION_NODE(LogSoftmaxImpl, LogSoftmax); +MKLDNN_EXTENSION_NODE(ReorgYoloImpl, ReorgYolo); +MKLDNN_EXTENSION_NODE(SqueezeImpl, Squeeze); +MKLDNN_EXTENSION_NODE(ConvertImpl, Convert); +MKLDNN_EXTENSION_NODE(FillImpl, Fill); +MKLDNN_EXTENSION_NODE(UniqueImpl, Unique); +MKLDNN_EXTENSION_NODE(PSROIPoolingImpl, PSROIPooling); +MKLDNN_EXTENSION_NODE(DepthToSpaceImpl, DepthToSpace); +MKLDNN_EXTENSION_NODE(ScatterImpl, ScatterUpdate); +MKLDNN_EXTENSION_NODE(OneHotImpl, OneHot); +MKLDNN_EXTENSION_NODE(BroadcastImpl, Broadcast); +MKLDNN_EXTENSION_NODE(ExperimentalSparseWeightedReduceImpl, ExperimentalSparseWeightedSum); +MKLDNN_EXTENSION_NODE(SparseToDenseImpl, SparseToDense); +MKLDNN_EXTENSION_NODE(ExperimentalDetectronROIFeatureExtractorImpl, ExperimentalDetectronROIFeatureExtractor); +MKLDNN_EXTENSION_NODE(ONNXCustomProposalImpl, ExperimentalDetectronGenerateProposalsSingleImage); +MKLDNN_EXTENSION_NODE(NonMaxSuppressionImpl, NonMaxSuppression); +MKLDNN_EXTENSION_NODE(TopKImpl, TopK); +MKLDNN_EXTENSION_NODE(ShuffleChannelsImpl, ShuffleChannels); +MKLDNN_EXTENSION_NODE(SpaceToDepthImpl, SpaceToDepth); +MKLDNN_EXTENSION_NODE(PowerFileImpl, PowerFile); +MKLDNN_EXTENSION_NODE(InterpImpl, Interp); +MKLDNN_EXTENSION_NODE(BatchToSpaceImpl, BatchToSpace); +MKLDNN_EXTENSION_NODE(ExperimentalDetectronPriorGridGeneratorImpl, ExperimentalDetectronPriorGridGenerator); +MKLDNN_EXTENSION_NODE(SimplerNMSImpl, SimplerNMS); +MKLDNN_EXTENSION_NODE(PadImpl, Pad); +MKLDNN_EXTENSION_NODE(GRNImpl, GRN); +MKLDNN_EXTENSION_NODE(SparseFillEmptyRowsImpl, SparseFillEmptyRows); +MKLDNN_EXTENSION_NODE(BucketizeImpl, Bucketize); +MKLDNN_EXTENSION_NODE(CTCGreedyDecoderImpl, CTCGreedyDecoder); +MKLDNN_EXTENSION_NODE(GatherImpl, Gather); +MKLDNN_EXTENSION_NODE(ProposalImpl, Proposal); +MKLDNN_EXTENSION_NODE(RangeImpl, Range); +MKLDNN_EXTENSION_NODE(SelectImpl, Select); +MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceAnd); +MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceL1); +MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceL2); +MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceLogSum); +MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceLogSumExp); +MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceMax); +MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceMean); +MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceMin); +MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceOr); +MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceProd); +MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceSum); +MKLDNN_EXTENSION_NODE(ReduceImpl, ReduceSumSquare); +MKLDNN_EXTENSION_NODE(GatherTreeImpl, GatherTree); +MKLDNN_EXTENSION_NODE(PriorBoxClusteredImpl, PriorBoxClustered); +MKLDNN_EXTENSION_NODE(SpaceToBatchImpl, SpaceToBatch); +MKLDNN_EXTENSION_NODE(SparseSegmentReduceImpl, SparseSegmentMean); +MKLDNN_EXTENSION_NODE(SparseSegmentReduceImpl, SparseSegmentSqrtN); +MKLDNN_EXTENSION_NODE(SparseSegmentReduceImpl, SparseSegmentSum); +MKLDNN_EXTENSION_NODE(CumSumImpl, CumSum); diff --git a/inference-engine/src/mkldnn_plugin/nodes/log_softmax.cpp b/inference-engine/src/mkldnn_plugin/nodes/log_softmax.cpp index cea0169c56f..b79109d946b 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/log_softmax.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/log_softmax.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include "base.hpp" #include @@ -105,7 +104,7 @@ private: bool is_last_dim = false; }; -REG_FACTORY_FOR(ImplFactory, LogSoftmax); +REG_FACTORY_FOR(LogSoftmaxImpl, LogSoftmax); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/mkldnn_plugin/nodes/math.cpp b/inference-engine/src/mkldnn_plugin/nodes/math.cpp index 92f2059618e..2920badc7f2 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/math.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/math.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include "base.hpp" #include @@ -272,29 +271,29 @@ private: float gamma = 0.0f; }; -REG_FACTORY_FOR(ImplFactory, Abs); -REG_FACTORY_FOR(ImplFactory, Acos); -REG_FACTORY_FOR(ImplFactory, Acosh); -REG_FACTORY_FOR(ImplFactory, Asin); -REG_FACTORY_FOR(ImplFactory, Asinh); -REG_FACTORY_FOR(ImplFactory, Atan); -REG_FACTORY_FOR(ImplFactory, Atanh); -REG_FACTORY_FOR(ImplFactory, Ceil); -REG_FACTORY_FOR(ImplFactory, Cos); -REG_FACTORY_FOR(ImplFactory, Cosh); -REG_FACTORY_FOR(ImplFactory, Erf); -REG_FACTORY_FOR(ImplFactory, Floor); -REG_FACTORY_FOR(ImplFactory, HardSigmoid); -REG_FACTORY_FOR(ImplFactory, Log); -REG_FACTORY_FOR(ImplFactory, Neg); -REG_FACTORY_FOR(ImplFactory, Reciprocal); -REG_FACTORY_FOR(ImplFactory, Selu); -REG_FACTORY_FOR(ImplFactory, Sign); -REG_FACTORY_FOR(ImplFactory, Sin); -REG_FACTORY_FOR(ImplFactory, Sinh); -REG_FACTORY_FOR(ImplFactory, Softplus); -REG_FACTORY_FOR(ImplFactory, Softsign); -REG_FACTORY_FOR(ImplFactory, Tan); +REG_FACTORY_FOR(MathImpl, Abs); +REG_FACTORY_FOR(MathImpl, Acos); +REG_FACTORY_FOR(MathImpl, Acosh); +REG_FACTORY_FOR(MathImpl, Asin); +REG_FACTORY_FOR(MathImpl, Asinh); +REG_FACTORY_FOR(MathImpl, Atan); +REG_FACTORY_FOR(MathImpl, Atanh); +REG_FACTORY_FOR(MathImpl, Ceil); +REG_FACTORY_FOR(MathImpl, Cos); +REG_FACTORY_FOR(MathImpl, Cosh); +REG_FACTORY_FOR(MathImpl, Erf); +REG_FACTORY_FOR(MathImpl, Floor); +REG_FACTORY_FOR(MathImpl, HardSigmoid); +REG_FACTORY_FOR(MathImpl, Log); +REG_FACTORY_FOR(MathImpl, Neg); +REG_FACTORY_FOR(MathImpl, Reciprocal); +REG_FACTORY_FOR(MathImpl, Selu); +REG_FACTORY_FOR(MathImpl, Sign); +REG_FACTORY_FOR(MathImpl, Sin); +REG_FACTORY_FOR(MathImpl, Sinh); +REG_FACTORY_FOR(MathImpl, Softplus); +REG_FACTORY_FOR(MathImpl, Softsign); +REG_FACTORY_FOR(MathImpl, Tan); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.cpp index 66303d74405..28b25e25751 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.cpp @@ -108,7 +108,7 @@ void MKLDNNBatchNormalizationNode::getSupportedDescriptors() { MKLDNNMemoryDesc MKLDNNBatchNormalizationNode::GetVarianceDesc(const memory::primitive_desc &primitive_desc) const { memory::primitive_desc aprimitive_desc; - mkldnn_primitive_desc_t bndesc; + mkldnn_primitive_desc_t bndesc = nullptr; mkldnn_batch_normalization_desc_t *p; error::wrap_c_api(mkldnn_primitive_desc_query( primitive_desc.get(), mkldnn::convert_to_c(batch_normalization_d), 0, &p), @@ -128,7 +128,7 @@ MKLDNNMemoryDesc MKLDNNBatchNormalizationNode::GetVarianceDesc(const memory::pri MKLDNNMemoryDesc MKLDNNBatchNormalizationNode::GetMeanDesc(const memory::primitive_desc &primitive_desc) const { memory::primitive_desc aprimitive_desc; - mkldnn_primitive_desc_t bndesc; + mkldnn_primitive_desc_t bndesc = nullptr; mkldnn_batch_normalization_desc_t *p; error::wrap_c_api(mkldnn_primitive_desc_query( primitive_desc.get(), mkldnn::convert_to_c(batch_normalization_d), 0, &p), @@ -148,7 +148,7 @@ MKLDNNMemoryDesc MKLDNNBatchNormalizationNode::GetMeanDesc(const memory::primiti MKLDNNMemoryDesc MKLDNNBatchNormalizationNode::GetScaleShiftWeightsDesc(const memory::primitive_desc &primitive_desc) const { memory::primitive_desc adesc; - mkldnn_primitive_desc_t bndesc; + mkldnn_primitive_desc_t bndesc = nullptr; const_mkldnn_primitive_desc_t const_bndesc = mkldnn_primitive_desc_query_pd(primitive_desc.get(), mkldnn::convert_to_c(weights_pd), 0); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp index 9213a746438..e5f3b84f71e 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp @@ -17,6 +17,16 @@ #include #include #include + +// WA for xbyak.h +#ifdef _WIN32 +# ifndef _WINSOCKAPI_ +# define _WINSOCKAPI_ +# endif +# ifndef _WINSOCK2API_ +# define _WINSOCK2API_ +# endif +#endif #include "cpu_isa_traits.hpp" using namespace mkldnn; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp index c9e93ccc86e..e30e8842134 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp @@ -279,8 +279,10 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() { getParentEdgeAt(0)->getDims().ndims() == 5 ? memory::ndhwc : memory::nhwc); createDescriptor({in_candidate}, {out_candidate}); } else { - inputDataType = convLayer->input()->getPrecision() == Precision::BF16 ? memory::bf16 : memory::f32; - outputDataType = convLayer->outData[0]->getPrecision() == Precision::BF16 ? memory::bf16 : memory::f32; + inputDataType = (convLayer->input()->getPrecision() == Precision::BF16 + && !(isGrouped && getParentEdgeAt(0)->getDims().ndims() == 5)) ? memory::bf16 : memory::f32; + outputDataType = (convLayer->outData[0]->getPrecision() == Precision::BF16 + && !(isGrouped && getParentEdgeAt(0)->getDims().ndims() == 5)) ? memory::bf16 : memory::f32; eltwisePrecision = Precision::FP32; for (int i = 0; i < fusedWith.size(); i++) { auto *eltwiseNode = dynamic_cast(fusedWith[i].get()); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp index 8591bebfbfb..a7c3fdc3046 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp @@ -37,7 +37,7 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() { precision = InferenceEngine::Precision::FP32; auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); - if (getParentEdges().size() != 1) + if (getParentEdges().empty() || getParentEdges().size() > 3) THROW_IE_EXCEPTION << "Incorrect number of input edges for layer " << getName(); if (getChildEdges().empty()) THROW_IE_EXCEPTION << "Incorrect number of output edges for layer " << getName(); @@ -45,7 +45,7 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() { auto * deconvLayer = dynamic_cast(getCnnLayer().get()); if (deconvLayer == nullptr) THROW_IE_EXCEPTION << "Cannot convert deconvolution layer."; - if (deconvLayer->_weights == nullptr) { + if (getParentEdges().size() == 1 && deconvLayer->_weights == nullptr) { THROW_IE_EXCEPTION << "Weights are empty for layer: " << deconvLayer->name << " used in MKLDNN node: " << getName() << "\n" << "Use the second argumemt of InferenceEngine::Core::ReadNetwork" @@ -54,11 +54,22 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() { withGroups = (deconvLayer->_group > 1); isDW = withGroups && deconvLayer->_group == deconvLayer->_out_depth && deconvLayer->_group == deconvLayer->input()->getDims()[1]; - withBiases = (deconvLayer->_biases != nullptr && deconvLayer->_biases->size() != 0); + + bool withBiases = (deconvLayer->_biases != nullptr && deconvLayer->_biases->size() != 0) || getParentEdges().size() == 3; if (withBiases) { - biases = deconvLayer->_biases; + Blob::Ptr biases; + + if (getParentEdges().size() == 3) { + auto biasLayer = getParentEdgesAtPort(2)[0]->getParent()->getCnnLayer(); + if (biasLayer->type != "Const") + THROW_IE_EXCEPTION << "Deconvolution layer with name '" << getName() << "' doesn't support non-constant biases"; + biases = biasLayer->blobs["custom"]; + } else { + biases = deconvLayer->_biases; + } + // WA: we add bias as depthwise post op - setBiasAsPostOp(); + setBiasAsPostOp(biases); } /* Original layout format for deconv weights is iohw (from Caffe). @@ -83,7 +94,8 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() { weightDims.push_back(deconvLayer->_kernel[deconvLayer->_kernel.size() - i]); } - internalBlobs.push_back(createInternalBlob(weightDims, true)); + if (getParentEdges().size() == 1) + internalBlobs.push_back(createInternalBlob(weightDims, true)); invertVectorCopyUtoI(deconvLayer->_stride, stride); for (int i = 1; i <= deconvLayer->_dilation.size(); i++) { @@ -113,7 +125,7 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() { } } -void MKLDNNDeconvolutionNode::setBiasAsPostOp() { +void MKLDNNDeconvolutionNode::setBiasAsPostOp(const InferenceEngine::Blob::Ptr& biases) { mkldnn::post_ops ops; MKLDNNDims depthwiseDims({static_cast(rnd_up(biases->size(), 16))}); @@ -157,7 +169,7 @@ void MKLDNNDeconvolutionNode::createPrimitive() { prim.reset(new convolution_backward_data(prim_desc, getParentEdgeAt(0)->getMemory().GetPrimitive(), - internalBlobMemory[0]->GetPrimitive(), + getWeights(), getChildEdgeAt(0)->getMemory().GetPrimitive())); } @@ -197,15 +209,32 @@ void MKLDNNDeconvolutionNode::createDescriptor(const std::vector 0 ? MKLDNNMemoryDesc(primitive_desc_it.weights_primitive_desc(idx - 1).desc()) + : MKLDNNMemoryDesc(primitive_desc_it.diff_dst_primitive_desc(idx).desc()); + + if (desc.getLayout() == InferenceEngine::Layout::ANY) { return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(), getParentEdgeAt(idx)->getDims().ToSizeVector(), desc.getLayout())); - else - return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(), - getParentEdgeAt(idx)->getDims().ToSizeVector(), - desc.getBlockingDesc())); + } else { + if (getParentEdgeAt(idx)->getDims().ToSizeVector().size() != *std::max_element(desc.getBlockingDesc().getOrder().begin(), + desc.getBlockingDesc().getOrder().end()) + 1) { + auto old_dims = getParentEdgeAt(idx)->getDims().ToSizeVector(); + auto new_dims = weightsDims.ToSizeVector(); + + auto td = InferenceEngine::TensorDesc(desc.getPrecision(), + new_dims, + desc.getBlockingDesc()); + if (new_dims.size() == desc.getBlockingDesc().getBlockDims().size()) { + td.setLayout(BLOCKED); + } + return MKLDNNMemoryDesc(td); + } else { + return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(), + getParentEdgeAt(idx)->getDims().ToSizeVector(), + desc.getBlockingDesc())); + } + } } MKLDNNMemoryDesc MKLDNNDeconvolutionNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { @@ -219,4 +248,9 @@ MKLDNNMemoryDesc MKLDNNDeconvolutionNode::getDstMemDesc(mkldnn::primitive_desc_i getChildEdgeAt(idx)->getDims().ToSizeVector(), desc.getBlockingDesc())); } + +const mkldnn::memory& MKLDNNDeconvolutionNode::getWeights() const { + return getParentEdges().size() > 1 ? getParentEdgeAt(1)->getMemory().GetPrimitive() : internalBlobMemory[0]->GetPrimitive(); +} + REG_MKLDNN_PRIM_FOR(MKLDNNDeconvolutionNode, Deconvolution); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h index e4a5ab26e8a..c2493f45d5b 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h @@ -27,11 +27,14 @@ public: return false; } + size_t descInputNumbers(MKLDNNDescriptor desc) override { + return static_cast(getParentEdges().size()); + } + MKLDNNMemoryDesc getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; MKLDNNMemoryDesc getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; private: - bool withBiases = false; bool withGroups = false; bool isDW = false; size_t groupNum = 1; @@ -40,13 +43,14 @@ private: std::vector dilation; std::vector paddingR; MKLDNNDims weightsDims; - InferenceEngine::Blob::Ptr biases; std::vector> descs_fwd; std::vector> descs_bwd; mkldnn::primitive_attr attr; std::vector PostOpsIntBlobMemory; - void setBiasAsPostOp(); + void setBiasAsPostOp(const InferenceEngine::Blob::Ptr& biases); + + const mkldnn::memory& getWeights() const; }; } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp index 31065c2d498..a004acdd63c 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp @@ -715,7 +715,9 @@ void MKLDNNNormalizeNode::initSupportedPrimitiveDescriptors() { setPostOps(attr, true); Precision inputPrecision = getCnnLayer()->insData[0].lock()->getPrecision(); + inputPrecision = inputPrecision == Precision::BF16 ? Precision(Precision::FP32) : inputPrecision; Precision outputPrecision = getCnnLayer()->outData[0]->getPrecision(); + outputPrecision = outputPrecision == Precision::BF16 ? Precision(Precision::FP32) : outputPrecision; if (!fusedWith.empty()) { auto lastFusedLayer = fusedWith[fusedWith.size() - 1].get()->getCnnLayer(); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp index ac81e16413c..c7b7ef25617 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp @@ -119,22 +119,38 @@ void MKLDNNReorderNode::createReorderPrimitive(const mkldnn::memory::desc &srcDe // MKLDNN doesn't support direct reorders from planar data formats to grouped weights formats. // Code block below tries to detect such cases and reinterpret data planar formats (e.g. nchw) // as grouped weights planar formats (e.g. goihw) since they have same physical memory layout. - if (MKLDNNMemory::GetPlainFormat(src_blocked->GetDims()) == src_blocked->GetFormat() && MKLDNNMemory::IsGroupedFormat(dst_blocked->GetFormat())) { + if (MKLDNNMemory::GetPlainFormat(src_blocked->GetDims()) == src_blocked->GetFormat() && + MKLDNNMemory::IsGroupedFormat(dst_blocked->GetFormat())) { try { mkldnn::memory::dims newDims = dst_blocked->GetDims(); - mkldnn::memory::format newFormat = src_blocked->GetDims().size() == 4 ? memory::goihw : - src_blocked->GetDims().size() == 5 ? memory::goidhw : - src_blocked->GetFormat(); + mkldnn::memory::format newFormat; + newFormat = src_blocked->GetDims().size() == 4 ? memory::goihw : + src_blocked->GetDims().size() == 5 ? memory::goidhw : + src_blocked->GetFormat(); auto newDesc = mkldnn::memory::desc(newDims, src_blocked->GetDataType(), newFormat); src_blocked->Create(newDesc, srcPtr, false); createReorder(); - } catch (const std::exception&) { + } catch (...) { THROW_IE_EXCEPTION << "Cannot create reorder primitive: unsupported reorder case"; } + // MKLDNN doesn't support direct reorders between planar data formats in case they have different rank but the same number of elements. + // Code block below detects these cases and substitute src dims with dst ones. + } else if (MKLDNNMemory::GetPlainFormat(src_blocked->GetDims()) == src_blocked->GetFormat() && + MKLDNNMemory::GetPlainFormat(dst_blocked->GetDims()) == dst_blocked->GetFormat() && + src_blocked->GetElementsCount() == dst_blocked->GetElementsCount()) { + try { + auto newDesc = mkldnn::memory::desc(dst_blocked->GetDims(), src_blocked->GetDataType(), dst_blocked->GetFormat()); + src_blocked->Create(newDesc, srcPtr, false); + + createReorder(); + } catch (...) { + THROW_IE_EXCEPTION << "Cannot create reorder primitive: unsupported reorder case"; + } + } else { + THROW_IE_EXCEPTION << "Cannot create reorder primitive: unsupported reorder case"; } - // TODO: should't we throw exception in this case? } } diff --git a/inference-engine/src/mkldnn_plugin/nodes/non_max_suppression.cpp b/inference-engine/src/mkldnn_plugin/nodes/non_max_suppression.cpp index 028b354e976..407eb51228f 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/non_max_suppression.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/non_max_suppression.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include "base.hpp" #include @@ -242,7 +241,7 @@ private: bool sort_result_descending = true; }; -REG_FACTORY_FOR(ImplFactory, NonMaxSuppression); +REG_FACTORY_FOR(NonMaxSuppressionImpl, NonMaxSuppression); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/mkldnn_plugin/nodes/one_hot.cpp b/inference-engine/src/mkldnn_plugin/nodes/one_hot.cpp index 734aeb9bdf3..56d43ea7872 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/one_hot.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/one_hot.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include "base.hpp" #include "ie_parallel.hpp" @@ -103,7 +102,7 @@ private: Precision input_precision; }; -REG_FACTORY_FOR(ImplFactory, OneHot); +REG_FACTORY_FOR(OneHotImpl, OneHot); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/mkldnn_plugin/nodes/pad.cpp b/inference-engine/src/mkldnn_plugin/nodes/pad.cpp index 89fa86d5085..bd02cee4ce6 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/pad.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/pad.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include "base.hpp" #include @@ -251,7 +250,7 @@ void PadImpl::pad_symmetric(const float *src_data, float* dst_data) { }); } -REG_FACTORY_FOR(ImplFactory, Pad); +REG_FACTORY_FOR(PadImpl, Pad); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/mkldnn_plugin/nodes/powerfile.cpp b/inference-engine/src/mkldnn_plugin/nodes/powerfile.cpp index 2068c141070..c0a0cbf9ab0 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/powerfile.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/powerfile.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include "base.hpp" #include @@ -57,7 +56,7 @@ private: std::vector shift_; }; -REG_FACTORY_FOR(ImplFactory, PowerFile); +REG_FACTORY_FOR(PowerFileImpl, PowerFile); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/mkldnn_plugin/nodes/priorbox.cpp b/inference-engine/src/mkldnn_plugin/nodes/priorbox.cpp index 737bc5fdb15..d372c760746 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/priorbox.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/priorbox.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include "base.hpp" #include @@ -338,7 +337,7 @@ private: int _num_priors = 0; }; -REG_FACTORY_FOR(ImplFactory, PriorBox); +REG_FACTORY_FOR(PriorBoxImpl, PriorBox); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/mkldnn_plugin/nodes/priorbox_clustered.cpp b/inference-engine/src/mkldnn_plugin/nodes/priorbox_clustered.cpp index 50cc57a2fdd..954f7d6fed6 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/priorbox_clustered.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/priorbox_clustered.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include "base.hpp" #include #include @@ -117,7 +116,7 @@ private: float offset_; }; -REG_FACTORY_FOR(ImplFactory, PriorBoxClustered); +REG_FACTORY_FOR(PriorBoxClusteredImpl, PriorBoxClustered); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/mkldnn_plugin/nodes/priorgridgenerator_onnx.cpp b/inference-engine/src/mkldnn_plugin/nodes/priorgridgenerator_onnx.cpp index e1df4242805..c783797ea26 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/priorgridgenerator_onnx.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/priorgridgenerator_onnx.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include "base.hpp" #include #include @@ -90,7 +89,7 @@ private: }; -REG_FACTORY_FOR(ImplFactory, ExperimentalDetectronPriorGridGenerator); +REG_FACTORY_FOR(ExperimentalDetectronPriorGridGeneratorImpl, ExperimentalDetectronPriorGridGenerator); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/mkldnn_plugin/nodes/proposal.cpp b/inference-engine/src/mkldnn_plugin/nodes/proposal.cpp index 62dafbaa49a..4c794fe73ad 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/proposal.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/proposal.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include "base.hpp" #include "proposal_imp.hpp" @@ -179,7 +178,7 @@ private: bool store_prob; // store blob with proposal probabilities }; -REG_FACTORY_FOR(ImplFactory, Proposal); +REG_FACTORY_FOR(ProposalImpl, Proposal); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/mkldnn_plugin/nodes/proposal_imp.cpp b/inference-engine/src/mkldnn_plugin/nodes/proposal_imp.cpp index 562755e2cf0..61d7db2c353 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/proposal_imp.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/proposal_imp.cpp @@ -4,6 +4,7 @@ #include "proposal_imp.hpp" +#include #include #include #include @@ -137,7 +138,7 @@ static void nms_cpu(const int num_boxes, int is_dead[], const float* x1 = boxes + 2 * num_proposals; const float* y1 = boxes + 3 * num_proposals; - memset(is_dead, 0, num_boxes * sizeof(int)); + std::memset(is_dead, 0, num_boxes * sizeof(int)); #if defined(HAVE_AVX2) __m256 vc_fone = _mm256_set1_ps(coordinates_offset); diff --git a/inference-engine/src/mkldnn_plugin/nodes/proposal_onnx.cpp b/inference-engine/src/mkldnn_plugin/nodes/proposal_onnx.cpp index 526c7775c36..e6370b16a5a 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/proposal_onnx.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/proposal_onnx.cpp @@ -2,9 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include "base.hpp" +#include #include #include #include @@ -130,7 +130,7 @@ void nms_cpu(const int num_boxes, int is_dead[], const float* x1 = boxes + 2 * num_proposals; const float* y1 = boxes + 3 * num_proposals; - memset(is_dead, 0, num_boxes * sizeof(int)); + std::memset(is_dead, 0, num_boxes * sizeof(int)); #if defined(HAVE_AVX2) __m256 vc_fone = _mm256_set1_ps(coordinates_offset); @@ -410,7 +410,7 @@ private: std::vector roi_indices_; }; -REG_FACTORY_FOR(ImplFactory, ExperimentalDetectronGenerateProposalsSingleImage); +REG_FACTORY_FOR(ONNXCustomProposalImpl, ExperimentalDetectronGenerateProposalsSingleImage); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/mkldnn_plugin/nodes/psroi.cpp b/inference-engine/src/mkldnn_plugin/nodes/psroi.cpp index 372c7de03df..f42061338b5 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/psroi.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/psroi.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include "base.hpp" #include #include @@ -303,7 +302,7 @@ private: float trans_std_; }; -REG_FACTORY_FOR(ImplFactory, PSROIPooling); +REG_FACTORY_FOR(PSROIPoolingImpl, PSROIPooling); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/mkldnn_plugin/nodes/range.cpp b/inference-engine/src/mkldnn_plugin/nodes/range.cpp index 3416cc798e5..3f6c2ecfb41 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/range.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/range.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include "base.hpp" #include @@ -125,7 +124,7 @@ StatusCode RangeImpl::range(data_t start, data_t limit, data_t delta, Blob::Ptr }); return OK; } -REG_FACTORY_FOR(ImplFactory, Range); +REG_FACTORY_FOR(RangeImpl, Range); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/mkldnn_plugin/nodes/reduce.cpp b/inference-engine/src/mkldnn_plugin/nodes/reduce.cpp index 837313d81a2..f59178f5034 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/reduce.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/reduce.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include "base.hpp" #include @@ -388,18 +387,18 @@ void ReduceImpl::reduce( } } -REG_FACTORY_FOR(ImplFactory, ReduceAnd); -REG_FACTORY_FOR(ImplFactory, ReduceL1); -REG_FACTORY_FOR(ImplFactory, ReduceL2); -REG_FACTORY_FOR(ImplFactory, ReduceLogSum); -REG_FACTORY_FOR(ImplFactory, ReduceLogSumExp); -REG_FACTORY_FOR(ImplFactory, ReduceMax); -REG_FACTORY_FOR(ImplFactory, ReduceMean); -REG_FACTORY_FOR(ImplFactory, ReduceMin); -REG_FACTORY_FOR(ImplFactory, ReduceOr); -REG_FACTORY_FOR(ImplFactory, ReduceProd); -REG_FACTORY_FOR(ImplFactory, ReduceSum); -REG_FACTORY_FOR(ImplFactory, ReduceSumSquare); +REG_FACTORY_FOR(ReduceImpl, ReduceAnd); +REG_FACTORY_FOR(ReduceImpl, ReduceL1); +REG_FACTORY_FOR(ReduceImpl, ReduceL2); +REG_FACTORY_FOR(ReduceImpl, ReduceLogSum); +REG_FACTORY_FOR(ReduceImpl, ReduceLogSumExp); +REG_FACTORY_FOR(ReduceImpl, ReduceMax); +REG_FACTORY_FOR(ReduceImpl, ReduceMean); +REG_FACTORY_FOR(ReduceImpl, ReduceMin); +REG_FACTORY_FOR(ReduceImpl, ReduceOr); +REG_FACTORY_FOR(ReduceImpl, ReduceProd); +REG_FACTORY_FOR(ReduceImpl, ReduceSum); +REG_FACTORY_FOR(ReduceImpl, ReduceSumSquare); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/mkldnn_plugin/nodes/region_yolo.cpp b/inference-engine/src/mkldnn_plugin/nodes/region_yolo.cpp index beb3c77cdb2..9bf522a4c60 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/region_yolo.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/region_yolo.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include "base.hpp" #include "common/defs.h" #include "common/softmax.h" @@ -304,7 +303,7 @@ private: } }; -REG_FACTORY_FOR(ImplFactory, RegionYolo); +REG_FACTORY_FOR(RegionYoloImpl, RegionYolo); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/mkldnn_plugin/nodes/reorg_yolo.cpp b/inference-engine/src/mkldnn_plugin/nodes/reorg_yolo.cpp index 331d2b26e2a..0b74fbd4395 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/reorg_yolo.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/reorg_yolo.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include "base.hpp" #include @@ -64,7 +63,7 @@ private: int stride; }; -REG_FACTORY_FOR(ImplFactory, ReorgYolo); +REG_FACTORY_FOR(ReorgYoloImpl, ReorgYolo); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/mkldnn_plugin/nodes/reverse_sequence.cpp b/inference-engine/src/mkldnn_plugin/nodes/reverse_sequence.cpp index 60037ccad17..52499d1d973 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/reverse_sequence.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/reverse_sequence.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include "base.hpp" #include @@ -172,7 +171,7 @@ private: size_t work_amount_dst; }; -REG_FACTORY_FOR(ImplFactory, ReverseSequence); +REG_FACTORY_FOR(ReverseSequenceImpl, ReverseSequence); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/mkldnn_plugin/nodes/roifeatureextractor_onnx.cpp b/inference-engine/src/mkldnn_plugin/nodes/roifeatureextractor_onnx.cpp index 07aad16a9ee..29539ef049c 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/roifeatureextractor_onnx.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/roifeatureextractor_onnx.cpp @@ -7,7 +7,6 @@ // https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/csrc/cpu/ROIAlign_cpu.cpp // -#include "list.hpp" #include "base.hpp" #include #include @@ -406,7 +405,7 @@ private: int nw = 0; }; -REG_FACTORY_FOR(ImplFactory, ExperimentalDetectronROIFeatureExtractor); +REG_FACTORY_FOR(ExperimentalDetectronROIFeatureExtractorImpl, ExperimentalDetectronROIFeatureExtractor); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/mkldnn_plugin/nodes/scatter.cpp b/inference-engine/src/mkldnn_plugin/nodes/scatter.cpp index 67348d1e706..1a4a3edb928 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/scatter.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/scatter.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include "base.hpp" #include @@ -182,7 +181,7 @@ private: const size_t SCATTER_UPDATES = 2; }; -REG_FACTORY_FOR(ImplFactory, ScatterUpdate); +REG_FACTORY_FOR(ScatterImpl, ScatterUpdate); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/mkldnn_plugin/nodes/select.cpp b/inference-engine/src/mkldnn_plugin/nodes/select.cpp index 35c606b5102..3813986f4e1 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/select.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/select.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include "base.hpp" #include @@ -221,7 +220,7 @@ private: } }; -REG_FACTORY_FOR(ImplFactory, Select); +REG_FACTORY_FOR(SelectImpl, Select); } // namespace Cpu } // namespace Extensions } // namespace InferenceEngine diff --git a/inference-engine/src/mkldnn_plugin/nodes/shuffle_channels.cpp b/inference-engine/src/mkldnn_plugin/nodes/shuffle_channels.cpp index b05ac7abb0e..8a818edec64 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/shuffle_channels.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/shuffle_channels.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include "base.hpp" #include @@ -142,7 +141,7 @@ private: size_t ownStrides[CNTR_SIZE]; }; -REG_FACTORY_FOR(ImplFactory, ShuffleChannels); +REG_FACTORY_FOR(ShuffleChannelsImpl, ShuffleChannels); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/mkldnn_plugin/nodes/simplernms.cpp b/inference-engine/src/mkldnn_plugin/nodes/simplernms.cpp index 9f00833444e..2bc2f8c506a 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/simplernms.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/simplernms.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include "base.hpp" #include @@ -332,7 +331,7 @@ private: std::vector anchors_; }; -REG_FACTORY_FOR(ImplFactory, SimplerNMS); +REG_FACTORY_FOR(SimplerNMSImpl, SimplerNMS); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/mkldnn_plugin/nodes/space_to_batch.cpp b/inference-engine/src/mkldnn_plugin/nodes/space_to_batch.cpp index 11a90855106..d84da1ac400 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/space_to_batch.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/space_to_batch.cpp @@ -4,7 +4,6 @@ #include "base.hpp" #include "ie_parallel.hpp" -#include "list.hpp" #include #include @@ -222,7 +221,7 @@ private: std::vector _pads_end; }; -REG_FACTORY_FOR(ImplFactory, SpaceToBatch); +REG_FACTORY_FOR(SpaceToBatchImpl, SpaceToBatch); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/mkldnn_plugin/nodes/space_to_depth.cpp b/inference-engine/src/mkldnn_plugin/nodes/space_to_depth.cpp index 39c294ae4ad..911dc69370c 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/space_to_depth.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/space_to_depth.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include "base.hpp" #include @@ -119,7 +118,7 @@ private: size_t ownStrides[CNTR_SIZE]; }; -REG_FACTORY_FOR(ImplFactory, SpaceToDepth); +REG_FACTORY_FOR(SpaceToDepthImpl, SpaceToDepth); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/mkldnn_plugin/nodes/sparse_fill_empty_rows.cpp b/inference-engine/src/mkldnn_plugin/nodes/sparse_fill_empty_rows.cpp index 195702dcf21..2ecf370db9d 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/sparse_fill_empty_rows.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/sparse_fill_empty_rows.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include "base.hpp" #include @@ -225,7 +224,7 @@ private: size_t outMaxNumValues = 0; }; -REG_FACTORY_FOR(ImplFactory, SparseFillEmptyRows); +REG_FACTORY_FOR(SparseFillEmptyRowsImpl, SparseFillEmptyRows); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/mkldnn_plugin/nodes/sparse_segment_reduce.cpp b/inference-engine/src/mkldnn_plugin/nodes/sparse_segment_reduce.cpp index 07f313a1c49..9aeaf096364 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/sparse_segment_reduce.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/sparse_segment_reduce.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include "base.hpp" #include @@ -202,9 +201,9 @@ private: ReducedOp reduction_op; }; -REG_FACTORY_FOR(ImplFactory, SparseSegmentMean); -REG_FACTORY_FOR(ImplFactory, SparseSegmentSqrtN); -REG_FACTORY_FOR(ImplFactory, SparseSegmentSum); +REG_FACTORY_FOR(SparseSegmentReduceImpl, SparseSegmentMean); +REG_FACTORY_FOR(SparseSegmentReduceImpl, SparseSegmentSqrtN); +REG_FACTORY_FOR(SparseSegmentReduceImpl, SparseSegmentSum); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/mkldnn_plugin/nodes/sparse_to_dense.cpp b/inference-engine/src/mkldnn_plugin/nodes/sparse_to_dense.cpp index 4c046e2386b..2e8af4a85e5 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/sparse_to_dense.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/sparse_to_dense.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include "base.hpp" #include @@ -152,7 +151,7 @@ private: bool with_default_value = false; }; -REG_FACTORY_FOR(ImplFactory, SparseToDense); +REG_FACTORY_FOR(SparseToDenseImpl, SparseToDense); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/mkldnn_plugin/nodes/sparse_weighted_reduce.cpp b/inference-engine/src/mkldnn_plugin/nodes/sparse_weighted_reduce.cpp index d68040f7fe9..1a6726b2d94 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/sparse_weighted_reduce.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/sparse_weighted_reduce.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include "base.hpp" #include @@ -226,7 +225,7 @@ private: Precision input_default_value_precision; }; -REG_FACTORY_FOR(ImplFactory, ExperimentalSparseWeightedSum); +REG_FACTORY_FOR(ExperimentalSparseWeightedReduceImpl, ExperimentalSparseWeightedSum); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/mkldnn_plugin/nodes/squeeze.cpp b/inference-engine/src/mkldnn_plugin/nodes/squeeze.cpp index 8bd80f81c5d..1d5a916304a 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/squeeze.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/squeeze.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include "base.hpp" #include @@ -57,7 +56,7 @@ public: } }; -REG_FACTORY_FOR(ImplFactory, Squeeze); +REG_FACTORY_FOR(SqueezeImpl, Squeeze); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/mkldnn_plugin/nodes/strided_slice.cpp b/inference-engine/src/mkldnn_plugin/nodes/strided_slice.cpp index 0afdff0af96..0ab3202158e 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/strided_slice.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/strided_slice.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include "base.hpp" #include @@ -376,7 +375,7 @@ void StridedSliceImpl::strided_slice_p(const float *src_data, float* dst_data) { }); } -REG_FACTORY_FOR(ImplFactory, StridedSlice); +REG_FACTORY_FOR(StridedSliceImpl, StridedSlice); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/mkldnn_plugin/nodes/topk.cpp b/inference-engine/src/mkldnn_plugin/nodes/topk.cpp index 04aa8be79d1..f2f715ec687 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/topk.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/topk.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include "base.hpp" #include @@ -578,7 +577,7 @@ private: } }; -REG_FACTORY_FOR(ImplFactory, TopK); +REG_FACTORY_FOR(TopKImpl, TopK); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/mkldnn_plugin/nodes/topkrois_onnx.cpp b/inference-engine/src/mkldnn_plugin/nodes/topkrois_onnx.cpp index c6dad29037e..5410ad6baee 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/topkrois_onnx.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/topkrois_onnx.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include "base.hpp" #include #include @@ -71,7 +70,7 @@ private: int max_rois_num_; }; -REG_FACTORY_FOR(ImplFactory, ExperimentalDetectronTopKROIs); +REG_FACTORY_FOR(ExperimentalDetectronTopKROIsImpl, ExperimentalDetectronTopKROIs); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/mkldnn_plugin/nodes/unique.cpp b/inference-engine/src/mkldnn_plugin/nodes/unique.cpp index e1094107339..3598479db34 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/unique.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/unique.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include "base.hpp" #include @@ -199,7 +198,7 @@ private: size_t num_elements = 0; }; -REG_FACTORY_FOR(ImplFactory, Unique); +REG_FACTORY_FOR(UniqueImpl, Unique); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/mkldnn_plugin/nodes/unsqueeze.cpp b/inference-engine/src/mkldnn_plugin/nodes/unsqueeze.cpp index f65c06e450c..f3f67a4a20b 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/unsqueeze.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/unsqueeze.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "list.hpp" #include "base.hpp" #include @@ -52,7 +51,7 @@ public: } }; -REG_FACTORY_FOR(ImplFactory, Unsqueeze); +REG_FACTORY_FOR(UnsqueezeImpl, Unsqueeze); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/transformations/include/transformations/convert_opset1_to_legacy/conv_bias_fusion.hpp b/inference-engine/src/transformations/include/transformations/convert_opset1_to_legacy/conv_bias_fusion.hpp index 17f9f03b289..79e1689b09c 100644 --- a/inference-engine/src/transformations/include/transformations/convert_opset1_to_legacy/conv_bias_fusion.hpp +++ b/inference-engine/src/transformations/include/transformations/convert_opset1_to_legacy/conv_bias_fusion.hpp @@ -38,9 +38,9 @@ class INFERENCE_ENGINE_API_CLASS(ConvFusion); class ngraph::pass::ConvFusion: public ngraph::pass::GraphRewrite { public: ConvFusion() : GraphRewrite() { - fuse_convolution_with(); - fuse_convolution_with(); - fuse_convolution_with(); + fuse_convolution_with(); + fuse_convolution_with(); + fuse_convolution_with(); } private: @@ -53,8 +53,8 @@ private: template void ngraph::pass::ConvFusion::fuse_convolution_with() { - static_assert(std::is_same() || std::is_same(), - "This transformation works only with ngraph::op::v1::Add and ngraph::op::v1::Multiply"); + static_assert(std::is_same() || std::is_same(), + "This transformation works only with ngraph::opset1::Add and ngraph::opset1::Multiply"); static_assert(std::is_same() || std::is_same(), "This transformation works only with ngraph::op::ConvolutionIE and ngraph::op::DeconvolutionIE"); @@ -85,56 +85,63 @@ ngraph::graph_rewrite_callback ngraph::pass::ConvFusion::get_callback() { } // TODO: check that constant can be scalar and do not match [1, C, 1, 1] layout - auto constant_shape = m_const->get_shape(); - auto output_shape = m_conv->get_shape(); - size_t constant_size = std::accumulate(constant_shape.begin(), constant_shape.end(), 1, std::multiplies()); - if (constant_size != output_shape[1]) { + const auto constant_shape = m_const->get_shape(); + const auto output_pshape = m_conv->get_output_partial_shape(0); + + if (output_pshape.rank().is_dynamic() || output_pshape[1].is_dynamic()) { return false; } - std::shared_ptr constant(m_const); + const auto channel_dim = output_pshape[1].get_length(); + + size_t constant_size = std::accumulate(constant_shape.begin(), constant_shape.end(), 1, std::multiplies()); + if (constant_size != channel_dim) { + return false; + } + + Output constant(m_const); if (constant_shape.size() > 1) { - constant = std::make_shared(constant, op::Constant::create(element::i64, Shape{1}, {output_shape[1]}), true); + constant = std::make_shared(constant, op::Constant::create(element::i64, Shape{1}, {channel_dim}), true); } if (m_conv->output(0).get_target_inputs().size() != 1) { return false; } - std::shared_ptr new_conv, new_weights, new_bias; - if (std::dynamic_pointer_cast(eltwise)) { + Output new_conv, new_weights, new_bias; + if (std::dynamic_pointer_cast(eltwise)) { // Fuse: ConvolutionIE/DeconvolutionIE->Add if (m_conv->inputs().size() == 2) { new_bias = constant; } else { - new_bias = std::make_shared(constant, m_conv->input_value(2)); + new_bias = std::make_shared(constant, m_conv->input_value(2)); } new_conv = m_conv->clone_with_new_inputs({m_conv->input_value(0), m_conv->input_value(1), new_bias}); - } else if (std::is_same() && std::dynamic_pointer_cast(eltwise)) { + } else if (std::is_same() && std::dynamic_pointer_cast(eltwise)) { // Fuse: ConvolutionIE->Mul auto weights_shape = m_conv->input(1).get_shape(); Shape const_shape(weights_shape.size(), 1); const_shape[0] = weights_shape[0]; - auto const_reshape = std::make_shared(constant, + auto const_reshape = std::make_shared(constant, op::Constant::create(element::i64, Shape{const_shape.size()}, const_shape), true); - new_weights = std::make_shared (m_conv->input_value(1), const_reshape); + new_weights = std::make_shared (m_conv->input_value(1), const_reshape); if (m_conv->inputs().size() == 2) { new_conv = m_conv->clone_with_new_inputs({m_conv->input_value(0), new_weights}); } else { - auto bias_reshape = std::make_shared(constant, op::Constant::create(element::i64, Shape{1}, {weights_shape[0]}), true); - new_bias = std::make_shared(bias_reshape, constant); + auto bias_reshape = std::make_shared(constant, op::Constant::create(element::i64, Shape{1}, {weights_shape[0]}), true); + new_bias = std::make_shared(bias_reshape, constant); new_conv = m_conv->clone_with_new_inputs({m_conv->input_value(0), new_weights, new_bias}); } } else { return false; } - ngraph::copy_runtime_info({m_conv, eltwise}, new_conv); - new_conv->set_friendly_name(m.get_match_root()->get_friendly_name()); - ngraph::replace_node(m.get_match_root(), new_conv); + ngraph::copy_runtime_info({m_conv, eltwise}, new_conv.get_node_shared_ptr()); + new_conv.get_node_shared_ptr()->set_friendly_name(m.get_match_root()->get_friendly_name()); + ngraph::replace_node(m.get_match_root(), new_conv.get_node_shared_ptr()); return true; }; return callback; diff --git a/inference-engine/src/transformations/include/transformations/convert_opset3_to_opset2/convert_opset3_to_opset2_tbl.hpp b/inference-engine/src/transformations/include/transformations/convert_opset3_to_opset2/convert_opset3_to_opset2_tbl.hpp index 796642e3488..c802dcb3f2a 100644 --- a/inference-engine/src/transformations/include/transformations/convert_opset3_to_opset2/convert_opset3_to_opset2_tbl.hpp +++ b/inference-engine/src/transformations/include/transformations/convert_opset3_to_opset2/convert_opset3_to_opset2_tbl.hpp @@ -17,5 +17,6 @@ NGRAPH_PASS(ConvertBroadcast3, ::ngraph::pass) NGRAPH_PASS(ConvertNMS3, ::ngraph::pass) NGRAPH_PASS(ConvertShapeOf3, ::ngraph::pass) +NGRAPH_PASS(ConvertShuffleChannels3, ::ngraph::pass) NGRAPH_PASS(ConvertTopK3, ::ngraph::pass) diff --git a/inference-engine/src/transformations/include/transformations/convert_opset3_to_opset2/convert_shuffle_channels3.hpp b/inference-engine/src/transformations/include/transformations/convert_opset3_to_opset2/convert_shuffle_channels3.hpp new file mode 100644 index 00000000000..3c6920a096c --- /dev/null +++ b/inference-engine/src/transformations/include/transformations/convert_opset3_to_opset2/convert_shuffle_channels3.hpp @@ -0,0 +1,30 @@ +// Copyright (C) 2018-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include + +#include + +namespace ngraph { +namespace pass { + + class INFERENCE_ENGINE_API_CLASS(ConvertShuffleChannels3); + +} // namespace pass +} // namespace ngraph + +class ngraph::pass::ConvertShuffleChannels3: public ngraph::pass::GraphRewrite { +public: + ConvertShuffleChannels3() : GraphRewrite() { + convert_shuffle_channels3(); + } + +private: + void convert_shuffle_channels3(); +}; diff --git a/inference-engine/src/transformations/src/ngraph_ops/convolution_ie.cpp b/inference-engine/src/transformations/src/ngraph_ops/convolution_ie.cpp index 75b397cce3d..f17bcd90060 100644 --- a/inference-engine/src/transformations/src/ngraph_ops/convolution_ie.cpp +++ b/inference-engine/src/transformations/src/ngraph_ops/convolution_ie.cpp @@ -54,43 +54,11 @@ op::ConvolutionIE::ConvolutionIE(const Output& data_batch, } void op::ConvolutionIE::validate_and_infer_types() { - const PartialShape& data_batch_pshape = get_input_partial_shape(0); + PartialShape data_batch_shape = get_input_partial_shape(0); element::Type data_batch_et = get_input_element_type(0); - const PartialShape& filters_pshape = get_input_partial_shape(1); + PartialShape filters_shape = get_input_partial_shape(1); element::Type filters_et = get_input_element_type(1); - PartialShape result_shape{PartialShape::dynamic()}; - - // we need to adjust filters_shape to reuse helpers for normal convolution - if (filters_pshape.is_static() && data_batch_pshape.is_static()) { - auto filters_shape = filters_pshape.to_shape(); - auto groups = m_group; - auto data_batch_shape = data_batch_pshape.to_shape(); - data_batch_shape[1] /= groups; - - if (m_auto_pad == PadType::SAME_UPPER || m_auto_pad == PadType::SAME_LOWER) { - m_pads_begin.clear(); - m_pads_end.clear(); - infer_auto_padding( - data_batch_shape, - Shape(filters_shape.begin() + 2, filters_shape.end()), // Remove {O,I} - m_strides, - m_dilations, - m_auto_pad, - m_pads_end, - m_pads_begin); - } - - result_shape = - infer_convolution_forward(this, - data_batch_shape, - Strides(m_strides.size(), 1), // dummy data dilations - m_pads_begin, - m_pads_end, - filters_shape, - m_strides, - m_dilations); - } element::Type result_et; NODE_VALIDATION_CHECK( @@ -102,6 +70,45 @@ void op::ConvolutionIE::validate_and_infer_types() { filters_et, ")."); + PartialShape result_shape{PartialShape::dynamic()}; + + // In case if number of groups greater than 1 and channel dimension is dynamic we can't calculate output shape + if (m_group > 1) { + if (data_batch_shape.rank().is_dynamic() || data_batch_shape[1].is_dynamic()) { + set_output_type(0, result_et, result_shape); + return; + } else { + // Update channel dimension according to groups count + data_batch_shape[1] = data_batch_shape[1].get_length() / m_group; + } + } + + // we need to adjust filters_shape to reuse helpers for normal convolution + if (filters_shape.is_static() && data_batch_shape.is_static()) { + if (m_auto_pad == PadType::SAME_UPPER || m_auto_pad == PadType::SAME_LOWER) { + m_pads_begin.clear(); + m_pads_end.clear(); + auto filter_shape = filters_shape.to_shape(); + filter_shape.erase(filter_shape.begin(), filter_shape.begin() + 2); // Remove {O,I} + infer_auto_padding(data_batch_shape.to_shape(), + filter_shape, + m_strides, + m_dilations, + m_auto_pad, + m_pads_end, + m_pads_begin); + } + } + + result_shape = infer_convolution_forward(this, + data_batch_shape, + Strides(m_strides.size(), 1), // dummy data dilations + m_pads_begin, + m_pads_end, + filters_shape, + m_strides, + m_dilations); + set_output_type(0, result_et, result_shape); } diff --git a/inference-engine/src/transformations/src/transformations/convert_opset1_to_legacy/convert_interpolate_to_interp_or_resample.cpp b/inference-engine/src/transformations/src/transformations/convert_opset1_to_legacy/convert_interpolate_to_interp_or_resample.cpp index 936bc73bfa6..c68a3e2b52f 100644 --- a/inference-engine/src/transformations/src/transformations/convert_opset1_to_legacy/convert_interpolate_to_interp_or_resample.cpp +++ b/inference-engine/src/transformations/src/transformations/convert_opset1_to_legacy/convert_interpolate_to_interp_or_resample.cpp @@ -47,10 +47,16 @@ void ngraph::pass::ConvertInterpolateToInterpOrResample::convert_interpolate_to_ interpolate_attrs.pads_end = std::vector{0}; std::vector useless_axes; - for (const auto & axis : interpolate_axes) - if (input_shape[axis] == out_spatial_shape[axis] && axis < 2) - // keeping only those not spatial dimensions that are going to be changed - useless_axes.push_back(axis); + size_t axis_idx = 0; + for (auto axis = 0; axis < input_shape.size(); ++axis) { + if (interpolate_axes.count(axis)) { + if (input_shape[axis] == out_spatial_shape[axis_idx] && axis < 2) + // keeping only those not spatial dimensions that are going to be changed + useless_axes.push_back(axis); + ++axis_idx; + } + } + std::reverse(useless_axes.begin(), useless_axes.end()); for (const auto & axis : useless_axes) { interpolate_axes.erase(axis); diff --git a/inference-engine/src/transformations/src/transformations/convert_opset3_to_opset2/convert_opset3_to_opset2.cpp b/inference-engine/src/transformations/src/transformations/convert_opset3_to_opset2/convert_opset3_to_opset2.cpp index 0e6af43fc44..5a27e031769 100644 --- a/inference-engine/src/transformations/src/transformations/convert_opset3_to_opset2/convert_opset3_to_opset2.cpp +++ b/inference-engine/src/transformations/src/transformations/convert_opset3_to_opset2/convert_opset3_to_opset2.cpp @@ -7,6 +7,7 @@ #include "transformations/convert_opset3_to_opset2/convert_broadcast3.hpp" #include "transformations/convert_opset3_to_opset2/convert_nms3.hpp" #include "transformations/convert_opset3_to_opset2/convert_shapeof3.hpp" +#include "transformations/convert_opset3_to_opset2/convert_shuffle_channels3.hpp" #include "transformations/convert_opset3_to_opset2/convert_topk3.hpp" #include diff --git a/inference-engine/src/transformations/src/transformations/convert_opset3_to_opset2/convert_shuffle_channels3.cpp b/inference-engine/src/transformations/src/transformations/convert_opset3_to_opset2/convert_shuffle_channels3.cpp new file mode 100644 index 00000000000..ca66b192712 --- /dev/null +++ b/inference-engine/src/transformations/src/transformations/convert_opset3_to_opset2/convert_shuffle_channels3.cpp @@ -0,0 +1,100 @@ +// Copyright (C) 2018-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/convert_opset3_to_opset2/convert_shuffle_channels3.hpp" + +#include +#include + +#include +#include +#include + +using namespace ngraph; + +void ngraph::pass::ConvertShuffleChannels3::convert_shuffle_channels3() { + auto input = std::make_shared(element::f32, Shape{1, 1, 1, 1}); + auto shuffle_channels = std::make_shared<::opset3::ShuffleChannels>(input); + + ngraph::graph_rewrite_callback callback = [](pattern::Matcher &m) { + auto shuffle_channels = std::dynamic_pointer_cast<::opset3::ShuffleChannels>(m.get_match_root()); + if (!shuffle_channels) { + return false; + } + if (shuffle_channels->input_value(0).get_partial_shape().rank().is_dynamic()) { + return false; + } + + auto reduce_axis_const = ::opset2::Constant::create(element::i64, Shape({1}), std::vector{0}); + auto shuffle_axis = shuffle_channels->get_axis(); + int64_t shuffle_group = static_cast(shuffle_channels->get_group()); + int64_t input_rank = shuffle_channels->input_value(0).get_partial_shape().rank().get_length(); + auto original_shape = std::make_shared<::opset2::ShapeOf>(shuffle_channels->input_value(0)); + if (shuffle_axis < 0) { + shuffle_axis += input_rank; + } + + // calculate split sizes based on shuffle axis and avoid splits of size 0 + std::vector split_lengts; + if (shuffle_axis == 0) { + split_lengts = {1, input_rank - 1}; + } else if (shuffle_axis + 1 == input_rank) { + split_lengts = {input_rank - 1, 1}; + } else { + split_lengts = {shuffle_axis, 1, input_rank - shuffle_axis - 1}; + } + + // get input tensor dimensions divided into parts with help of VariadicSplit + auto split_input_dimensions = std::make_shared<::opset2::VariadicSplit>( + original_shape->output(0), + ::opset2::Constant::create(element::i64, Shape({1}), std::vector{0}), + ::opset2::Constant::create(element::i64, Shape({split_lengts.size()}), split_lengts)); + + // calculate new dimension of the reshape. Start with two elements of {group, -1} + ::OutputVector new_dimensions = { + ::opset2::Constant::create(element::i64, Shape({1}), std::vector{shuffle_group}), + ::opset2::Constant::create(element::i64, Shape({1}), std::vector{-1})}; + + // add more elements to the reshape output dimensions based on shuffle_axis + std::vector transpose_order; + if (shuffle_axis == 0) { + new_dimensions.push_back( + std::make_shared<::opset2::ReduceProd>(split_input_dimensions->output(1), reduce_axis_const, true)); + transpose_order = {1, 0, 2}; + } else if (shuffle_axis + 1 == input_rank) { + new_dimensions.insert(new_dimensions.begin(), + std::make_shared<::opset2::ReduceProd>(split_input_dimensions->output(0), + reduce_axis_const, true)); + transpose_order = {0, 2, 1}; + } else { + new_dimensions.insert(new_dimensions.begin(), + std::make_shared<::opset2::ReduceProd>(split_input_dimensions->output(0), + reduce_axis_const, true)); + new_dimensions.push_back( + std::make_shared<::opset2::ReduceProd>(split_input_dimensions->output(2), reduce_axis_const, true)); + transpose_order = {0, 2, 1, 3}; + } + // reshape the tensor to a new shape + auto new_shape = std::make_shared<::opset2::Concat>(new_dimensions, 0); + auto reshape = std::make_shared<::opset2::Reshape>(shuffle_channels->input_value(0), new_shape, false); + // swap dimensions appearing after splitting the "shuffle_axis" dimension into two + auto transpose = std::make_shared<::opset2::Transpose>(reshape->output(0), + ::opset2::Constant::create(element::i64, + Shape({transpose_order.size()}), + transpose_order)); + // restore original shape + auto reshape_back = std::make_shared<::opset2::Reshape>(transpose->output(0), original_shape->output(0), false); + + ::NodeVector new_ops = {original_shape, split_input_dimensions, transpose, reshape, reshape_back, new_shape}; + for (auto output : new_dimensions) + new_ops.insert(new_ops.begin(), output.get_node_shared_ptr()); + reshape_back->set_friendly_name(shuffle_channels->get_friendly_name()); + ::copy_runtime_info(shuffle_channels, new_ops); + ::replace_node(shuffle_channels, reshape_back); + return true; + }; + + auto m = std::make_shared(shuffle_channels, "ConvertShuffleChannels3"); + this->add_matcher(m, callback, PassProperty::CHANGE_DYNAMIC_STATE); +} \ No newline at end of file diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/graph_transformer.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/graph_transformer.hpp index 2692e67c1a9..2da46999685 100644 --- a/inference-engine/src/vpu/graph_transformer/include/vpu/graph_transformer.hpp +++ b/inference-engine/src/vpu/graph_transformer/include/vpu/graph_transformer.hpp @@ -125,6 +125,7 @@ struct CompilationConfig final { struct DataInfo final { std::unordered_map offset; + std::unordered_map descFromPlugin; int totalSize = 0; }; diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/model/model.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/model/model.hpp index 4554fc6853d..83bd2408d8f 100644 --- a/inference-engine/src/vpu/graph_transformer/include/vpu/model/model.hpp +++ b/inference-engine/src/vpu/graph_transformer/include/vpu/model/model.hpp @@ -264,6 +264,7 @@ public: const Data& newChild); void disconnectDatas(const DataToDataAllocation& edge); + void disconnectDatas(const DataToShapeAllocation& edge); // // Nodes removal diff --git a/inference-engine/src/vpu/graph_transformer/src/backend/backend.cpp b/inference-engine/src/vpu/graph_transformer/src/backend/backend.cpp index 475cf007d96..60537572c1f 100644 --- a/inference-engine/src/vpu/graph_transformer/src/backend/backend.cpp +++ b/inference-engine/src/vpu/graph_transformer/src/backend/backend.cpp @@ -26,6 +26,7 @@ void BackEnd::extractDataInfo( auto ioBufferOffset = data->attrs().get("ioBufferOffset"); IE_ASSERT(ioBufferOffset + data->totalByteSize() <= inputInfo.totalSize); + inputInfo.descFromPlugin[data->name()] = data->desc().toTensorDesc(); inputInfo.offset[data->name()] = ioBufferOffset; } else if (DataUsage::Output == data->usage()) { IE_ASSERT(outputInfo.offset.count(data->name()) == 0); @@ -33,6 +34,7 @@ void BackEnd::extractDataInfo( auto ioBufferOffset = data->attrs().get("ioBufferOffset"); IE_ASSERT(ioBufferOffset + data->totalByteSize() <= outputInfo.totalSize); + outputInfo.descFromPlugin[data->name()] = data->desc().toTensorDesc(); outputInfo.offset[data->name()] = ioBufferOffset; } } diff --git a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/strided_slice.cpp b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/strided_slice.cpp index 87405114d92..d8a985db382 100644 --- a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/strided_slice.cpp +++ b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/strided_slice.cpp @@ -45,28 +45,39 @@ private: }; StridedSliceParams PassImpl::parseInputParams(const Stage& stage) { - const auto beginInput = stage->input(1); - const auto endInput = stage->input(2); - const auto num_input_dims = stage->input(0)->desc().numDims(); + const auto input = stage->input(0); + const auto beginInput = stage->input(1); + const auto endInput = stage->input(2); + const auto num_input_dims = input->desc().numDims(); StridedSliceParams params; IE_ASSERT(beginInput->content() != nullptr); IE_ASSERT(endInput->content() != nullptr); - auto vectorToDimValues = [](const std::vector& v) { - auto dims = DimsOrder::fromNumDims(v.size()).toIndices(); - int idx = v.size(); + const auto numpyIdxVectorToDimValues = [&input](const std::vector& values) { + auto dims = DimsOrder::fromNumDims(values.size()).toIndices(); + + // IE notation to GT notation + std::vector revertedValues(values.size()); + std::reverse_copy(values.begin(), values.end(), revertedValues.begin()); + + int idx = 0; for (auto& dim : dims) { - idx--; - dim.second = v[idx]; + auto value = revertedValues[idx++]; + if (value < 0) { + value = std::max(input->desc().dim(dim.first) + value + 1, 0); + } + value = std::min(input->desc().dim(dim.first), value); + dim.second = value; } + return dims; }; - params.begin = vectorToDimValues( + params.begin = numpyIdxVectorToDimValues( std::vector(beginInput->content()->get(), beginInput->content()->get() + beginInput->desc().dims().get(Dim::C, 0))); - params.end = vectorToDimValues( + params.end = numpyIdxVectorToDimValues( std::vector(endInput->content()->get(), endInput->content()->get() + endInput->desc().dims().get(Dim::C, 0))); @@ -74,11 +85,11 @@ StridedSliceParams PassImpl::parseInputParams(const Stage& stage) { if (stage->numInputs() == 4) { const auto stridesInput = stage->input(3); IE_ASSERT(stridesInput->content() != nullptr); - params.strides = vectorToDimValues( + params.strides = numpyIdxVectorToDimValues( std::vector(stridesInput->content()->get(), stridesInput->content()->get() + stridesInput->desc().dims().get(Dim::C, 0))); } else { - params.strides = vectorToDimValues(std::vector(num_input_dims, 1)); + params.strides = numpyIdxVectorToDimValues(std::vector(num_input_dims, 1)); } IE_ASSERT(params.begin.size() == num_input_dims); @@ -117,8 +128,8 @@ StridedSliceParams PassImpl::parseInputParams(const Stage& stage) { IE_ASSERT(c != '1') << "VPU doesn't support shrink_axis_mask for StridedSlice"; } - params.begin_mask = vectorToDimValues(begin_mask_values); - params.end_mask = vectorToDimValues(end_mask_values); + params.begin_mask = numpyIdxVectorToDimValues(begin_mask_values); + params.end_mask = numpyIdxVectorToDimValues(end_mask_values); return params; } @@ -135,20 +146,14 @@ StridedSliceInternalParams PassImpl::computeInternalParams(const Stage& stage, S m_params.strides_dms.set(dim, 1); } - auto clip = [](int value, int min, int max) { - return std::min(std::max(min, value), max); - }; - for (const auto& dim : input->desc().dimsOrder().toPermutation()) { m_params.strides_dms.set(dim, params.strides[dim]); IE_ASSERT(params.begin_mask[dim] == 1 || params.begin_mask[dim] == 0); IE_ASSERT(params.end_mask[dim] == 1 || params.end_mask[dim] == 0); - m_params.begin_dms.set(dim, - params.begin_mask[dim] ? clip(params.begin[dim], 0, input->desc().dim(dim)) : 0); - m_params.end_dms.set(dim, - params.end_mask[dim] ? clip(params.end[dim], 0, input->desc().dim(dim)) : input->desc().dim(dim)); + m_params.begin_dms.set(dim, params.begin_mask[dim] ? params.begin[dim] : 0); + m_params.end_dms.set(dim, params.end_mask[dim] ? params.end[dim] : input->desc().dim(dim)); IE_ASSERT(dim != Dim::N || numDims < 4 || m_params.strides_dms[dim] == 1) << "VPU doesn't support batch strides for StridedSlice"; @@ -296,6 +301,10 @@ void PassImpl::run(const Model& model) { input = intermediateOutputData; } + VPU_INTERNAL_CHECK(input->desc().dims() == output->desc().dims(), + "StridedSlice pass: result tensor dims (%v) must be equal to output " + "tensor dims (%v)", input->desc().dims(), output->desc().dims()); + _stageBuilder->addCopyStage( model, formatString("%s@copy-output", stage->name()), diff --git a/inference-engine/src/vpu/graph_transformer/src/model/model.cpp b/inference-engine/src/vpu/graph_transformer/src/model/model.cpp index c2a2ed9ea91..c4ca128daca 100644 --- a/inference-engine/src/vpu/graph_transformer/src/model/model.cpp +++ b/inference-engine/src/vpu/graph_transformer/src/model/model.cpp @@ -1557,10 +1557,7 @@ void ModelObj::replaceDataToShapeParent( void ModelObj::replaceDataToShapeChild( const DataToShapeAllocation& edge, const Data& newChild) { - auto parent = edge->parent(); - auto oldChild = edge->child(); - - oldChild->_parentDataToShapeEdge = nullptr; + edge->_child->_parentDataToShapeEdge = nullptr; edge->_child = newChild; VPU_THROW_UNLESS(newChild->_parentDataToShapeEdge == nullptr, @@ -1634,6 +1631,17 @@ void ModelObj::disconnectDatas(const DataToDataAllocation& edge) { } } +void ModelObj::disconnectDatas(const DataToShapeAllocation& edge) { + auto parent = edge->parent(); + auto child = edge->child(); + + child->_parentDataToShapeEdge = nullptr; + parent->_childDataToShapeEdges.erase(edge); + + IE_ASSERT(edge->_ptrPosInModel != _shapeEdgePtrList.end()); + _shapeEdgePtrList.erase(edge->_ptrPosInModel); +} + void ModelObj::disconnectStage(const Stage& stage) { // // Check that objects belong to the same Model. diff --git a/inference-engine/src/vpu/graph_transformer/src/stages/dynamic_shape_resolver.cpp b/inference-engine/src/vpu/graph_transformer/src/stages/dynamic_shape_resolver.cpp index cb95807a951..9bca6c756a2 100644 --- a/inference-engine/src/vpu/graph_transformer/src/stages/dynamic_shape_resolver.cpp +++ b/inference-engine/src/vpu/graph_transformer/src/stages/dynamic_shape_resolver.cpp @@ -34,8 +34,15 @@ void FrontEnd::parseDSR(const Model& model, const ie::CNNLayerPtr& layer, const layer->name, layer->type, 1, shape->name()); model->replaceStageOutput(dataProducerEdge, dataOutput); - if (const auto& dataToShapeEdge = data->parentDataToShapeEdge()) { - model->replaceDataToShapeChild(dataToShapeEdge, dataOutput); + if (auto dataToShapeEdge = data->parentDataToShapeEdge()) { + const auto& parent = dataToShapeEdge->parent(); + VPU_THROW_UNLESS(parent == shape, "Myriad plugin encountered layer of type \"{}\" and name \"{}\" with input #{} (data input with name \"{}\") that " + "already has parent in terms of data to shape connection. The parent is expected to be input #{} (shape input with name \"{}\") of the layer, so " + "it's a \"{}\" with already connected inputs, but actual parent is other data object with name \"{}\". The case of connected inputs is considered " + "as \"{}\" that goes directly to \"{}\" as a result of some optimization (operation between them has been optimized out). Other cases, when some " + "input already has a connection, but with other data object are prohibited.", + layer->type, layer->name, 0, data->name(), 1, shape->name(), layer->type, parent->name(), layer->type, layer->type); + model->disconnectDatas(dataToShapeEdge); } model->removeUnusedData(data); diff --git a/inference-engine/src/vpu/graph_transformer/src/stages/eltwise.cpp b/inference-engine/src/vpu/graph_transformer/src/stages/eltwise.cpp index 41d4b75c15b..a17d6e0660e 100644 --- a/inference-engine/src/vpu/graph_transformer/src/stages/eltwise.cpp +++ b/inference-engine/src/vpu/graph_transformer/src/stages/eltwise.cpp @@ -137,10 +137,12 @@ private: void initialCheckImpl() const override { const auto& operation = type(); const auto& dataTypeInput0 = input(0)->desc().type(); + const auto& dataTypeOutput = output(0)->desc().type(); { auto supportedDataTypesInput0 = EnumSet{DataType::FP16}; - if (operation == StageType::Sum || operation == StageType::Greater_equal || operation == StageType::Select || + if (operation == StageType::Sum || operation == StageType::Greater_equal || + operation == StageType::Equal || operation == StageType::Select || operation == StageType::Prod || operation == StageType::Max) { supportedDataTypesInput0.insert(DataType::S32); } @@ -150,16 +152,18 @@ private: static_cast>(this), dataTypeInput0, supportedDataTypesInput0); } - if (operation != StageType::Select || dataTypeInput0 == DataType::FP16) { - assertInputsOutputsTypes(this, {{dataTypeInput0}, {dataTypeInput0}, {dataTypeInput0}}, {{dataTypeInput0}}); - } else { + if (operation == StageType::Select && dataTypeInput0 == DataType::S32) { auto supportedDataTypesInput1 = EnumSet{DataType::FP16, DataType::S32}; const auto& dataTypeInput1 = input(1)->desc().type(); VPU_THROW_UNLESS(supportedDataTypesInput1.count(dataTypeInput1) != 0, - "Stage node %v types check error: input #1 has type %v, but one of %v is expected", - static_cast>(this), dataTypeInput1, supportedDataTypesInput1); + "Stage node %v types check error: input #1 has type %v, but one of %v is expected", + static_cast>(this), dataTypeInput1, supportedDataTypesInput1); assertInputsOutputsTypes(this, {{dataTypeInput0}, {dataTypeInput1}, {dataTypeInput1}}, {{dataTypeInput1}}); + } else if (operation == StageType::Greater && dataTypeInput0 != dataTypeOutput) { + assertInputsOutputsTypes(this, {{DataType::FP16}, {DataType::FP16}, {DataType::FP16}}, {{DataType::S32}}); + } else { + assertInputsOutputsTypes(this, {{dataTypeInput0}, {dataTypeInput0}, {dataTypeInput0}}, {{dataTypeInput0}}); } } diff --git a/inference-engine/src/vpu/graph_transformer/src/stages/out_shape_of_reshape.cpp b/inference-engine/src/vpu/graph_transformer/src/stages/out_shape_of_reshape.cpp index 66d794caabf..25a2cb61584 100644 --- a/inference-engine/src/vpu/graph_transformer/src/stages/out_shape_of_reshape.cpp +++ b/inference-engine/src/vpu/graph_transformer/src/stages/out_shape_of_reshape.cpp @@ -96,7 +96,7 @@ void FrontEnd::parseOutShapeOfReshape( inputs, outputs); - auto specialZero = layer->GetParamAsInt("special_zero", 0); + auto specialZero = layer->GetParamAsBool("special_zero", false); outShapeOfReshapeStage->attrs().set("specialZero", specialZero); } diff --git a/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.cpp b/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.cpp index 76978c89e32..ea29b5b43cd 100644 --- a/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.cpp +++ b/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.cpp @@ -22,6 +22,7 @@ namespace vpu { namespace MyriadPlugin { ExecutableNetwork::ExecutableNetwork( + std::shared_ptr mvnc, std::vector& devicePool, const MyriadConfig& config) : _config(config) { @@ -32,7 +33,7 @@ ExecutableNetwork::ExecutableNetwork( _config.logLevel(), defaultOutput(_config.pluginLogFilePath())); - _executor = std::make_shared(_config.forceReset(), _config.logLevel(), _log); + _executor = std::make_shared(_config.forceReset(), std::move(mvnc), _config.logLevel(), _log); _device = _executor->openDevice(devicePool, _config); const auto& compileConfig = config.compileConfig(); @@ -49,9 +50,11 @@ ExecutableNetwork::ExecutableNetwork( } ExecutableNetwork::ExecutableNetwork( - ICNNNetwork& network, std::vector& devicePool, + ICNNNetwork& network, + std::shared_ptr mvnc, + std::vector& devicePool, const MyriadConfig& config) : - ExecutableNetwork(devicePool, config) { + ExecutableNetwork(std::move(mvnc), devicePool, config) { VPU_PROFILE(ExecutableNetwork); const auto compilerLog = std::make_shared( @@ -141,18 +144,20 @@ void ExecutableNetwork::Import(std::istream& strm, } ExecutableNetwork::ExecutableNetwork(std::istream& strm, + std::shared_ptr mvnc, std::vector &devicePool, const MyriadConfig& config) : - ExecutableNetwork(devicePool, config) { + ExecutableNetwork(std::move(mvnc), devicePool, config) { VPU_PROFILE(ExecutableNetwork); Import(strm, devicePool, config); } ExecutableNetwork::ExecutableNetwork( const std::string& blobFilename, + std::shared_ptr mvnc, std::vector& devicePool, const MyriadConfig& config) : - ExecutableNetwork(devicePool, config) { + ExecutableNetwork(std::move(mvnc), devicePool, config) { VPU_PROFILE(ExecutableNetwork); std::ifstream blobFile{blobFilename, std::ios::binary}; Import(blobFile, devicePool, config); diff --git a/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.h b/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.h index 2e03ea02f9e..1e106c06cbc 100644 --- a/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.h +++ b/inference-engine/src/vpu/myriad_plugin/myriad_executable_network.h @@ -33,14 +33,17 @@ public: typedef std::shared_ptr Ptr; explicit ExecutableNetwork(InferenceEngine::ICNNNetwork &network, + std::shared_ptr mvnc, std::vector &devicePool, const MyriadConfig& config); explicit ExecutableNetwork(std::istream& strm, + std::shared_ptr mvnc, std::vector &devicePool, const MyriadConfig& config); explicit ExecutableNetwork(const std::string &blobFilename, + std::shared_ptr mvnc, std::vector &devicePool, const MyriadConfig& config); @@ -126,8 +129,9 @@ private: const size_t _maxTaskExecutorGetResultCount = 1; std::queue _taskExecutorGetResultIds; - ExecutableNetwork(std::vector &devicePool, - const MyriadConfig& config); + ExecutableNetwork(std::shared_ptr mvnc, + std::vector &devicePool, + const MyriadConfig& config); InferenceEngine::ITaskExecutor::Ptr getNextTaskExecutor() { std::string id = _taskExecutorGetResultIds.front(); diff --git a/inference-engine/src/vpu/myriad_plugin/myriad_executor.cpp b/inference-engine/src/vpu/myriad_plugin/myriad_executor.cpp index f553c18edcb..9b7b96cfabd 100644 --- a/inference-engine/src/vpu/myriad_plugin/myriad_executor.cpp +++ b/inference-engine/src/vpu/myriad_plugin/myriad_executor.cpp @@ -36,9 +36,10 @@ using namespace vpu; static std::mutex device_mutex; -MyriadExecutor::MyriadExecutor(bool forceReset, const LogLevel& vpuLogLevel, const Logger::Ptr& log) : _log(log) { +MyriadExecutor::MyriadExecutor(bool forceReset, std::shared_ptr mvnc, + const LogLevel& vpuLogLevel, const Logger::Ptr& log) : _log(log), _mvnc(std::move(mvnc)) { VPU_PROFILE(MyriadExecutor); - _mvnc = std::make_shared(); + VPU_THROW_UNLESS(_mvnc, "mvnc is null"); int ncResetAll = forceReset; auto status = ncGlobalSetOption(NC_RW_RESET_ALL, &ncResetAll, sizeof(ncResetAll)); if (status != NC_OK) { @@ -136,12 +137,17 @@ ncStatus_t MyriadExecutor::bootNextDevice(std::vector &devicePool, return statusOpen; } + ncDeviceOpenParams_t deviceOpenParams = {}; + deviceOpenParams.watchdogHndl = _mvnc->watchdogHndl(); + deviceOpenParams.watchdogInterval = config.watchdogInterval().count(); + deviceOpenParams.customFirmwareDirectory = dirName.c_str(); + // Open new device with specific path to FW folder statusOpen = ncDeviceOpen(&device._deviceHandle, - in_deviceDesc, config.watchdogInterval().count(), dirName.c_str()); + in_deviceDesc, deviceOpenParams); if (statusOpen != NC_OK) { - ncDeviceClose(&device._deviceHandle); + ncDeviceClose(&device._deviceHandle, _mvnc->watchdogHndl()); return statusOpen; } @@ -154,7 +160,7 @@ ncStatus_t MyriadExecutor::bootNextDevice(std::vector &devicePool, reinterpret_cast(&device._platform), &dataLength); if (status != NC_OK || dataLength != sizeof(device._platform)) { _log->warning("Failed to get device platform"); - ncDeviceClose(&device._deviceHandle); + ncDeviceClose(&device._deviceHandle, _mvnc->watchdogHndl()); return status != NC_OK ? status : NC_ERROR; // for dataLength error } @@ -163,7 +169,7 @@ ncStatus_t MyriadExecutor::bootNextDevice(std::vector &devicePool, reinterpret_cast(&device._protocol), &dataLength); if (status != NC_OK || dataLength != sizeof(device._protocol)) { _log->warning("Failed to get device protocol"); - ncDeviceClose(&device._deviceHandle); + ncDeviceClose(&device._deviceHandle, _mvnc->watchdogHndl()); return status != NC_OK ? status : NC_ERROR; // for dataLength error } @@ -173,7 +179,7 @@ ncStatus_t MyriadExecutor::bootNextDevice(std::vector &devicePool, reinterpret_cast(&device._maxGraphNum), &dataLength); if (status != NC_OK || dataLength != sizeof(device._maxGraphNum)) { _log->warning("Failed to get maximum supported number of graphs"); - ncDeviceClose(&device._deviceHandle); + ncDeviceClose(&device._deviceHandle, _mvnc->watchdogHndl()); return status != NC_OK ? status : NC_ERROR; // for dataLength error } @@ -184,7 +190,7 @@ ncStatus_t MyriadExecutor::bootNextDevice(std::vector &devicePool, reinterpret_cast(&deviceName), &dataLength); if (status != NC_OK || dataLength > NC_MAX_NAME_SIZE) { _log->warning("Failed to get name of booted device"); - ncDeviceClose(&device._deviceHandle); + ncDeviceClose(&device._deviceHandle, _mvnc->watchdogHndl()); return status != NC_OK ? status : NC_ERROR; // for dataLength error } else { device._name = deviceName; @@ -194,7 +200,7 @@ ncStatus_t MyriadExecutor::bootNextDevice(std::vector &devicePool, if (status != NC_OK) { _log->warning("Failed to set configuration for Power Manager"); - ncDeviceClose(&device._deviceHandle); + ncDeviceClose(&device._deviceHandle, _mvnc->watchdogHndl()); return status; } @@ -283,12 +289,12 @@ VPU_PACKED(bin_header { uint32_t frequency; };) -void MyriadExecutor::closeDevices(std::vector &devicePool) { +void MyriadExecutor::closeDevices(std::vector &devicePool, std::shared_ptr mvnc) { VPU_PROFILE(closeDevices); std::lock_guard lock(device_mutex); for (auto &device : devicePool) { if (device->_deviceHandle != nullptr) { - auto res = ncDeviceClose(&(device->_deviceHandle)); + auto res = ncDeviceClose(&(device->_deviceHandle), mvnc->watchdogHndl()); if (res != NC_OK) printf("ncDeviceClose failed (%d)\n", static_cast(res)); device->_deviceHandle = nullptr; diff --git a/inference-engine/src/vpu/myriad_plugin/myriad_executor.h b/inference-engine/src/vpu/myriad_plugin/myriad_executor.h index 9aa0d898888..d231c45b443 100644 --- a/inference-engine/src/vpu/myriad_plugin/myriad_executor.h +++ b/inference-engine/src/vpu/myriad_plugin/myriad_executor.h @@ -78,7 +78,8 @@ class MyriadExecutor { unsigned int _numStages = 0; public: - MyriadExecutor(bool forceReset, const LogLevel& vpuLogLevel, const Logger::Ptr& log); + MyriadExecutor(bool forceReset, std::shared_ptr mvnc, + const LogLevel& vpuLogLevel, const Logger::Ptr& log); ~MyriadExecutor() = default; /** @@ -87,7 +88,7 @@ public: */ DevicePtr openDevice(std::vector &devicePool, const MyriadConfig& config); - static void closeDevices(std::vector &devicePool); + static void closeDevices(std::vector &devicePool, std::shared_ptr mvnc); void allocateGraph(DevicePtr &device, GraphDesc &graphDesc, diff --git a/inference-engine/src/vpu/myriad_plugin/myriad_infer_request.cpp b/inference-engine/src/vpu/myriad_plugin/myriad_infer_request.cpp index 1edaa94a823..e69167c800e 100644 --- a/inference-engine/src/vpu/myriad_plugin/myriad_infer_request.cpp +++ b/inference-engine/src/vpu/myriad_plugin/myriad_infer_request.cpp @@ -141,6 +141,42 @@ void MyriadInferRequest::InferAsync() { _inputInfo.totalSize, nullptr, 0); } +static void copyBlobAccordingUpperBound( + const Blob::Ptr& in, + const Blob::Ptr& out) { + const auto inLayout = in->getTensorDesc().getLayout(); + const auto outLayout = out->getTensorDesc().getLayout(); + + const auto& inDims = in->getTensorDesc().getDims(); + const auto& outDims = out->getTensorDesc().getDims(); + + IE_ASSERT(inLayout == outLayout); + + auto inPtr = in->cbuffer().as(); + IE_ASSERT(inPtr != nullptr); + + auto outPtr = out->cbuffer().as(); + IE_ASSERT(outPtr != nullptr); + + if (inDims.size() == 1) { + std::copy_n( + in->cbuffer().as(), + in->byteSize(), + out->buffer().as()); + } else if (inDims.size() == 2) { + size_t inLineSize = inDims[1] * in->element_size(); + size_t outLineSize = outDims[1] * out->element_size(); + for (size_t n = 0; n < outDims[0]; n++) { + std::copy_n( + in->cbuffer().as() + n * inLineSize, + outLineSize, + out->buffer().as() + n * outLineSize); + } + } else { + VPU_THROW_EXCEPTION << "Copying of blobs with dynamic shape and num dims greater than 2 unsupported yet"; + } +} + void MyriadInferRequest::GetResult() { VPU_PROFILE(GetResult); @@ -184,23 +220,50 @@ void MyriadInferRequest::GetResult() { const auto& ieOutDesc = ieBlob->getTensorDesc(); const auto& ieOutPrc = ieOutDesc.getPrecision(); + auto ieOutDims = ieOutDesc.getDims(); + // Eject dynamic output shape (suffix "@shape") and copy it to vector of dimensions in reverse order const auto& shapeInfo = _outputInfo.offset.find(ieBlobName + "@shape"); + // if (isDynamic) if (shapeInfo != _outputInfo.offset.end()) { - const auto shapeOffset = resultOffset(shapeInfo->first); - const auto shapePtr = reinterpret_cast(resultBuffer.data() + shapeOffset); + auto outData = networkOutputs[ieBlobName]; + const auto& descFromPlugin = _outputInfo.descFromPlugin.find(ieBlobName); + VPU_THROW_UNLESS(descFromPlugin != _outputInfo.descFromPlugin.end(), + "Can not find tensor descriptor by plugin for {} output", ieBlobName); + const auto& dynOutputDesc = descFromPlugin->second; - const auto shapeRank = ieOutDims.size(); + if (ieBlob->getTensorDesc().getLayout() != dynOutputDesc.getLayout()) { + ieBlob->deallocate(); + ieBlob->getTensorDesc().reshape(dynOutputDesc.getDims(), dynOutputDesc.getLayout()); + ieBlob->allocate(); + outData->reshape(dynOutputDesc.getDims(), dynOutputDesc.getLayout()); + } + + const auto shapeResultOffset = resultOffset(shapeInfo->first); + const auto shapePtr = reinterpret_cast(resultBuffer.data() + shapeResultOffset); + + auto shapeRank = dynOutputDesc.getDims().size(); + ieOutDims.resize(shapeRank); for (size_t idx = 0; idx < shapeRank; ++idx) { ieOutDims[idx] = shapePtr[shapeRank - idx - 1]; } - } - // TODO: TensorDesc doesn't update internal BlockingDesc and strides when setLayout is called - const auto tempTensorDesc = ie::TensorDesc{ieOutPrc, ieOutDims, getVpuLayout(ieBlobName)}; - const auto tmpBlob = make_blob_with_precision(tempTensorDesc, resultBuffer.data() + resultOffset(ieBlobName)); - copyBlob(tmpBlob, ieBlob); + outData->setDims(ieOutDims); + ieBlob->getTensorDesc().setDims(ieOutDims); + + // TODO: TensorDesc doesn't update internal BlockingDesc and strides when setLayout is called + const auto tempTensorDesc = ie::TensorDesc{ieOutPrc, dynOutputDesc.getDims(), dynOutputDesc.getLayout()}; + const auto tmpBlob = make_blob_with_precision(tempTensorDesc, resultBuffer.data() + resultOffset(ieBlobName)); + + copyBlobAccordingUpperBound(tmpBlob, ieBlob); + } else { + // TODO: TensorDesc doesn't update internal BlockingDesc and strides when setLayout is called + const auto tempTensorDesc = ie::TensorDesc{ieOutPrc, ieOutDims, getVpuLayout(ieBlobName)}; + const auto tmpBlob = make_blob_with_precision(tempTensorDesc, resultBuffer.data() + resultOffset(ieBlobName)); + + copyBlob(tmpBlob, ieBlob); + } } } diff --git a/inference-engine/src/vpu/myriad_plugin/myriad_mvnc_wraper.cpp b/inference-engine/src/vpu/myriad_plugin/myriad_mvnc_wraper.cpp index 66cf18cb933..5ba91ef4f86 100644 --- a/inference-engine/src/vpu/myriad_plugin/myriad_mvnc_wraper.cpp +++ b/inference-engine/src/vpu/myriad_plugin/myriad_mvnc_wraper.cpp @@ -11,6 +11,17 @@ using namespace vpu::MyriadPlugin; // Implementation of methods of class Mvnc //------------------------------------------------------------------------------ +Mvnc::Mvnc() { + WatchdogHndl_t* watchdogHndl = nullptr; + if (watchdog_create(&watchdogHndl) != WD_ERRNO) { + THROW_IE_EXCEPTION << "Cannot create watchdog."; + } + + m_watcdogPtr = WatchdogUniquePtr(watchdogHndl, [](WatchdogHndl_t* watchdogHndl) { + watchdog_destroy(watchdogHndl); + }); +} + std::vector Mvnc::AvailableDevicesDesc() const { int deviceCount = 0; std::vector availableDevices(NC_MAX_DEVICES); diff --git a/inference-engine/src/vpu/myriad_plugin/myriad_mvnc_wraper.h b/inference-engine/src/vpu/myriad_plugin/myriad_mvnc_wraper.h index d0b1c602add..43fcaed69d6 100644 --- a/inference-engine/src/vpu/myriad_plugin/myriad_mvnc_wraper.h +++ b/inference-engine/src/vpu/myriad_plugin/myriad_mvnc_wraper.h @@ -4,15 +4,19 @@ #pragma once +#include +#include + #include #include #include #include -#include namespace vpu { namespace MyriadPlugin { +using WatchdogUniquePtr = std::unique_ptr>; + //------------------------------------------------------------------------------ // class IMvnc // This is a class interface for accessing devices. @@ -24,6 +28,8 @@ public: virtual std::vector AvailableDevicesDesc() const = 0; virtual std::vector AvailableDevicesNames() const = 0; + virtual WatchdogHndl_t* watchdogHndl() = 0; + // Destructor virtual ~IMvnc() = default; }; @@ -35,9 +41,19 @@ public: class Mvnc : public IMvnc { public: + Mvnc(); + ~Mvnc() override = default; + // Operations std::vector AvailableDevicesDesc() const override; std::vector AvailableDevicesNames() const override; + + WatchdogHndl_t* watchdogHndl() override { + return m_watcdogPtr.get(); + } + +private: + WatchdogUniquePtr m_watcdogPtr; }; } // namespace MyriadPlugin diff --git a/inference-engine/src/vpu/myriad_plugin/myriad_plugin.cpp b/inference-engine/src/vpu/myriad_plugin/myriad_plugin.cpp index 146c93cc433..1d30a7566b4 100644 --- a/inference-engine/src/vpu/myriad_plugin/myriad_plugin.cpp +++ b/inference-engine/src/vpu/myriad_plugin/myriad_plugin.cpp @@ -43,7 +43,7 @@ ExecutableNetworkInternal::Ptr Engine::LoadExeNetworkImpl( vpu::DynamicToStaticShape().transform(function); } - return std::make_shared(*clonedNetwork, _devicePool, parsedConfigCopy); + return std::make_shared(*clonedNetwork, _mvnc, _devicePool, parsedConfigCopy); } void Engine::SetConfig(const std::map &config) { @@ -103,9 +103,7 @@ void Engine::QueryNetwork( Engine::Engine(std::shared_ptr mvnc) : _mvnc(std::move(mvnc)), _metrics(std::make_shared()) { - if (!_mvnc) { - THROW_IE_EXCEPTION << "mvnc is invalid"; - } + VPU_THROW_UNLESS(_mvnc, "mvnc is null"); _pluginName = "MYRIAD"; @@ -134,7 +132,7 @@ InferenceEngine::ExecutableNetwork Engine::ImportNetwork( const auto executableNetwork = std::make_shared( - model, _devicePool, parsedConfigCopy); + model, _mvnc, _devicePool, parsedConfigCopy); return InferenceEngine::ExecutableNetwork{IExecutableNetwork::Ptr( new ExecutableNetworkBase(executableNetwork), diff --git a/inference-engine/src/vpu/myriad_plugin/myriad_plugin.h b/inference-engine/src/vpu/myriad_plugin/myriad_plugin.h index 7567a5fde69..3d0d4eb9c14 100644 --- a/inference-engine/src/vpu/myriad_plugin/myriad_plugin.h +++ b/inference-engine/src/vpu/myriad_plugin/myriad_plugin.h @@ -22,7 +22,7 @@ public: explicit Engine(std::shared_ptr mvnc); ~Engine() override { - MyriadExecutor::closeDevices(_devicePool); + MyriadExecutor::closeDevices(_devicePool, _mvnc); } void SetConfig(const std::map& config) override; diff --git a/inference-engine/tests/functional/inference_engine/transformations/conv_fusion_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/conv_fusion_test.cpp new file mode 100644 index 00000000000..a31a2c3d7a6 --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/transformations/conv_fusion_test.cpp @@ -0,0 +1,167 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "common_test_utils/test_common.hpp" +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "ngraph_test_utils.hpp" + +using namespace testing; + +using InputShape = ngraph::PartialShape; +using WeightsShape = ngraph::Shape; +using EltwiseType = ngraph::NodeTypeInfo; +using EltwiseShape = ngraph::Shape; +using IsNegative = bool; + +class ConvFusionTests: public CommonTestUtils::TestsCommon, + public testing::WithParamInterface > { +public: + std::shared_ptr f, f_ref; + + void SetUp() override { + const auto& input_shape = std::get<0>(GetParam()); + const auto& weights_shape = std::get<1>(GetParam()); + const auto& eltwise_type = std::get<2>(GetParam()); + const auto& eltwise_shape = std::get<3>(GetParam()); + const auto& is_negative = std::get<4>(GetParam()); + + f = get_initial_function(input_shape, weights_shape, eltwise_type, eltwise_shape); + + if (is_negative) { + f_ref = get_initial_function(input_shape, weights_shape, eltwise_type, eltwise_shape); + } else { + f_ref = get_reference_function(input_shape, weights_shape, eltwise_type, eltwise_shape); + } + } + +private: + std::shared_ptr get_initial_function(const InputShape& input_shape, + const WeightsShape& weights_shape, + const EltwiseType& eltwise_type, + const EltwiseShape& eltwise_shape) { + auto spatial_dims = input_shape.rank().get_length() - 2; + auto input = std::make_shared(ngraph::element::f32, input_shape); + auto weights = ngraph::opset1::Constant::create(ngraph::element::f32, weights_shape, {1}); + auto conv = std::make_shared(input, weights, ngraph::Strides(spatial_dims, 1), ngraph::Strides(spatial_dims, 1), + ngraph::CoordinateDiff(spatial_dims, 0), ngraph::CoordinateDiff(spatial_dims, 0)); + + auto const_node = ngraph::opset1::Constant::create(ngraph::element::f32, eltwise_shape, {1.1}); + ngraph::Output eltwise; + if (eltwise_type == ngraph::opset1::Add::type_info) { + eltwise = std::make_shared(conv, const_node); + } else if (eltwise_type == ngraph::opset1::Multiply::type_info) { + eltwise = std::make_shared(conv, const_node); + } else { + throw ngraph::ngraph_error("Unsupported element type"); + } + + return std::make_shared(ngraph::NodeVector{eltwise.get_node_shared_ptr()}, ngraph::ParameterVector{input}); + } + + std::shared_ptr get_reference_function(const InputShape& input_shape, + const WeightsShape& weights_shape, + const EltwiseType& eltwise_type, + const EltwiseShape& eltwise_shape) { + auto spatial_dims = input_shape.rank().get_length() - 2; + auto input = std::make_shared(ngraph::element::f32, input_shape); + ngraph::Output weights = ngraph::opset1::Constant::create(ngraph::element::f32, weights_shape, {1}); + ngraph::Output conv = std::make_shared(input, weights, ngraph::Strides(spatial_dims, 1), + ngraph::Strides(spatial_dims, 1), ngraph::CoordinateDiff(spatial_dims, 0), ngraph::CoordinateDiff(spatial_dims, 0)); + + ngraph::Output const_node; + const_node = ngraph::opset1::Constant::create(ngraph::element::f32, eltwise_shape, {1.1}); + if (eltwise_type == ngraph::opset1::Add::type_info) { + if (eltwise_shape.size() != 1) { + const_node = ngraph::op::util::reshapeTo(const_node, ngraph::Shape{ngraph::shape_size(eltwise_shape)}); + } + conv = conv.get_node_shared_ptr()->copy_with_new_inputs({input, weights, const_node}); + } else if (eltwise_type == ngraph::opset1::Multiply::type_info) { + if (eltwise_shape.size() > 1) { + const_node = ngraph::op::util::reshapeTo(const_node, ngraph::Shape{ngraph::shape_size(eltwise_shape)}); + } + ngraph::Shape const_shape(weights_shape.size(), 1); + const_shape[0] = weights_shape[0]; + weights = std::make_shared(weights, ngraph::op::util::reshapeTo(const_node, const_shape)); + conv = conv.get_node_shared_ptr()->copy_with_new_inputs({input, weights}); + } else { + throw ngraph::ngraph_error("Unsupported element type"); + } + + return std::make_shared(ngraph::NodeVector{conv.get_node_shared_ptr()}, ngraph::ParameterVector{input}); + } +}; + +TEST_P(ConvFusionTests, CompareFunctions) { + ngraph::pass::InitNodeInfo().run_on_function(f); + ngraph::pass::ConvFusion().run_on_function(f); + f->validate_nodes_and_infer_types(); + // ASSERT_NO_THROW(check_rt_info(f)); + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; +} + +using add = ngraph::opset1::Add; +using mul = ngraph::opset1::Multiply; + +INSTANTIATE_TEST_CASE_P(ConvAddFusion, ConvFusionTests, + testing::Values(std::make_tuple(InputShape{DYN, DYN, DYN, DYN, DYN}, WeightsShape{8, 3, 1, 2, 3}, add::type_info, EltwiseShape{8, 1, 1, 1}, false), + std::make_tuple(InputShape{DYN, 3, 64, 64, 64}, WeightsShape{8, 3, 1, 2, 3}, add::type_info, EltwiseShape{8, 1, 1, 1}, false), + std::make_tuple(InputShape{2, DYN, 64, 64, 64}, WeightsShape{9, 3, 2, 3, 1}, add::type_info, EltwiseShape{9, 1, 1, 1}, false), + std::make_tuple(InputShape{3, 3, DYN, 64, 64}, WeightsShape{6, 3, 3, 4, 2}, add::type_info, EltwiseShape{6, 1, 1, 1}, false), + std::make_tuple(InputShape{3, 3, 64, DYN, 64}, WeightsShape{5, 3, 3, 4, 3}, add::type_info, EltwiseShape{5, 1, 1, 1}, false), + std::make_tuple(InputShape{3, 3, 64, 64, DYN}, WeightsShape{5, 3, 3, 4, 3}, add::type_info, EltwiseShape{5, 1, 1, 1}, false), + std::make_tuple(InputShape{1, 3, 64, 64}, WeightsShape{6, 3, 1, 1}, add::type_info, EltwiseShape{6, 1, 1}, false), + std::make_tuple(InputShape{DYN, DYN, DYN, DYN}, WeightsShape{7, 3, 1, 1}, add::type_info, EltwiseShape{7, 1, 1}, false), + std::make_tuple(InputShape{DYN, 3, 64, 64}, WeightsShape{8, 3, 1, 2}, add::type_info, EltwiseShape{8, 1, 1}, false), + std::make_tuple(InputShape{2, DYN, 64, 64}, WeightsShape{9, 3, 2, 3}, add::type_info, EltwiseShape{9, 1, 1}, false), + std::make_tuple(InputShape{3, 3, DYN, 64}, WeightsShape{6, 3, 3, 4}, add::type_info, EltwiseShape{6, 1, 1}, false), + std::make_tuple(InputShape{3, 3, 64, DYN}, WeightsShape{5, 3, 3, 4}, add::type_info, EltwiseShape{5, 1, 1}, false), + std::make_tuple(InputShape{DYN, DYN, DYN}, WeightsShape{5, 3, 1}, add::type_info, EltwiseShape{5, 1}, false), + std::make_tuple(InputShape{DYN, 3, 10}, WeightsShape{3, 3, 1}, add::type_info, EltwiseShape{3, 1}, false), + std::make_tuple(InputShape{2, DYN, 9}, WeightsShape{2, 3, 2}, add::type_info, EltwiseShape{2, 1}, false), + std::make_tuple(InputShape{3, 3, DYN}, WeightsShape{1, 3, 3}, add::type_info, EltwiseShape{1, 1}, false))); + +INSTANTIATE_TEST_CASE_P(DISABLED_ConvAddFusionNegative, ConvFusionTests, + testing::Values(std::make_tuple(InputShape{DYN, DYN, DYN, DYN, DYN}, WeightsShape{8, 3, 1, 2, 3}, add::type_info, EltwiseShape{2, 1}, true), + std::make_tuple(InputShape{DYN, 3, 64, 64, 64}, WeightsShape{8, 3, 1, 2, 3}, add::type_info, EltwiseShape{8, 1, 1}, true), + std::make_tuple(InputShape{2, DYN, 64, 64, 64}, WeightsShape{9, 3, 2, 3, 1}, add::type_info, EltwiseShape{9, 1, 1, 1, 1}, true))); + +INSTANTIATE_TEST_CASE_P(ConvMulFusion, ConvFusionTests, + testing::Values(std::make_tuple(InputShape{DYN, DYN, DYN, DYN, DYN}, WeightsShape{8, 3, 1, 2, 3}, mul::type_info, EltwiseShape{8, 1, 1, 1}, false), + std::make_tuple(InputShape{DYN, 3, 64, 64, 64}, WeightsShape{8, 3, 1, 2, 3}, mul::type_info, EltwiseShape{8, 1, 1, 1}, false), + std::make_tuple(InputShape{2, DYN, 64, 64, 64}, WeightsShape{9, 3, 2, 3, 1}, mul::type_info, EltwiseShape{9, 1, 1, 1}, false), + std::make_tuple(InputShape{3, 3, DYN, 64, 64}, WeightsShape{6, 3, 3, 4, 2}, mul::type_info, EltwiseShape{6, 1, 1, 1}, false), + std::make_tuple(InputShape{3, 3, 64, DYN, 64}, WeightsShape{5, 3, 3, 4, 3}, mul::type_info, EltwiseShape{5, 1, 1, 1}, false), + std::make_tuple(InputShape{3, 3, 64, 64, DYN}, WeightsShape{5, 3, 3, 4, 3}, mul::type_info, EltwiseShape{5, 1, 1, 1}, false), + std::make_tuple(InputShape{1, 3, 64, 64}, WeightsShape{6, 3, 1, 1}, mul::type_info, EltwiseShape{6, 1, 1}, false), + std::make_tuple(InputShape{DYN, DYN, DYN, DYN}, WeightsShape{7, 3, 1, 1}, mul::type_info, EltwiseShape{7, 1, 1}, false), + std::make_tuple(InputShape{DYN, 3, 64, 64}, WeightsShape{8, 3, 1, 2}, mul::type_info, EltwiseShape{8, 1, 1}, false), + std::make_tuple(InputShape{2, DYN, 64, 64}, WeightsShape{9, 3, 2, 3}, mul::type_info, EltwiseShape{9, 1, 1}, false), + std::make_tuple(InputShape{3, 3, DYN, 64}, WeightsShape{6, 3, 3, 4}, mul::type_info, EltwiseShape{6, 1, 1}, false), + std::make_tuple(InputShape{3, 3, 64, DYN}, WeightsShape{5, 3, 3, 4}, mul::type_info, EltwiseShape{5, 1, 1}, false), + std::make_tuple(InputShape{DYN, DYN, DYN}, WeightsShape{5, 3, 1}, mul::type_info, EltwiseShape{5, 1}, false), + std::make_tuple(InputShape{DYN, 3, 10}, WeightsShape{3, 3, 1}, mul::type_info, EltwiseShape{3, 1}, false), + std::make_tuple(InputShape{2, DYN, 9}, WeightsShape{2, 3, 2}, mul::type_info, EltwiseShape{2, 1}, false), + std::make_tuple(InputShape{3, 3, DYN}, WeightsShape{1, 3, 3}, mul::type_info, EltwiseShape{1, 1}, false))); + +INSTANTIATE_TEST_CASE_P(DISABLED_ConvMulFusionNegative, ConvFusionTests, + testing::Values(std::make_tuple(InputShape{DYN, DYN, DYN, DYN, DYN}, WeightsShape{8, 3, 1, 2, 3}, mul::type_info, EltwiseShape{2, 1}, true), + std::make_tuple(InputShape{DYN, 3, 64, 64}, WeightsShape{8, 3, 1, 2, 3}, mul::type_info, EltwiseShape{8, 1, 1}, true), + std::make_tuple(InputShape{2, DYN, 64, 64}, WeightsShape{9, 3, 2, 3, 1}, mul::type_info, EltwiseShape{9, 1, 1, 1, 1}, true))); diff --git a/inference-engine/tests/functional/inference_engine/transformations/convert_convolution_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/convert_convolution_test.cpp new file mode 100644 index 00000000000..2f2f50e0be7 --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/transformations/convert_convolution_test.cpp @@ -0,0 +1,95 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "common_test_utils/test_common.hpp" +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ngraph_test_utils.hpp" + +using namespace testing; + +using InputShape = ngraph::PartialShape; +using WeightsShape = ngraph::Shape; + +class ConvertConvolutionsTest: public CommonTestUtils::TestsCommon, + public testing::WithParamInterface > { +public: + std::shared_ptr f, f_ref; + + void SetUp() override { + const auto& input_shape = std::get<0>(GetParam()); + const auto& weights_shape = std::get<1>(GetParam()); + + f = get_initial_function(input_shape, weights_shape); + f_ref = get_reference_function(input_shape, weights_shape); + } + +private: + std::shared_ptr get_initial_function(const ngraph::PartialShape & input_shape, + const ngraph::Shape & weights_shape) { + auto spatial_dims = input_shape.rank().get_length() - 2; + auto input = std::make_shared(ngraph::element::f32, input_shape); + auto weights = ngraph::opset1::Constant::create(ngraph::element::f32, weights_shape, {1}); + auto conv = std::make_shared(input, weights, ngraph::Strides(spatial_dims, 1), + ngraph::CoordinateDiff(spatial_dims, 0), ngraph::CoordinateDiff(spatial_dims, 0), ngraph::Strides(spatial_dims, 1)); + + return std::make_shared(ngraph::NodeVector{conv}, ngraph::ParameterVector{input}); + } + + std::shared_ptr get_reference_function(const ngraph::PartialShape & input_shape, + const ngraph::Shape & weights_shape) { + auto spatial_dims = input_shape.rank().get_length() - 2; + auto input = std::make_shared(ngraph::element::f32, input_shape); + auto weights = ngraph::opset1::Constant::create(ngraph::element::f32, weights_shape, {1}); + auto conv = std::make_shared(input, weights, ngraph::Strides(spatial_dims, 1), ngraph::Strides(spatial_dims, 1), + ngraph::CoordinateDiff(spatial_dims, 0), ngraph::CoordinateDiff(spatial_dims, 0)); + + return std::make_shared(ngraph::NodeVector{conv}, ngraph::ParameterVector{input}); + } +}; + +TEST_P(ConvertConvolutionsTest, CompareFunctions) { + const auto & orig_shape = f->get_output_partial_shape(0); + ngraph::pass::InitNodeInfo().run_on_function(f); + ngraph::pass::ConvertConvolutions().run_on_function(f); + ASSERT_NO_THROW(check_rt_info(f)); + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; + ASSERT_TRUE(orig_shape.same_scheme(f->get_output_partial_shape(0))) << "Shape " << orig_shape << " is not equal to " << f->get_output_partial_shape(0); +} + +INSTANTIATE_TEST_CASE_P(ConvertConvolution, ConvertConvolutionsTest, + testing::Values(std::make_tuple(InputShape{DYN, DYN, DYN, DYN, DYN}, WeightsShape{8, 3, 1, 2, 3}), + std::make_tuple(InputShape{DYN, 3, 64, 64, 64}, WeightsShape{8, 3, 1, 2, 3}), + std::make_tuple(InputShape{2, DYN, 64, 64, 64}, WeightsShape{9, 3, 2, 3, 1}), + std::make_tuple(InputShape{3, 3, DYN, 64, 64}, WeightsShape{6, 3, 3, 4, 2}), + std::make_tuple(InputShape{3, 3, 64, DYN, 64}, WeightsShape{5, 3, 3, 4, 3}), + std::make_tuple(InputShape{3, 3, 64, 64, DYN}, WeightsShape{5, 3, 3, 4, 3}), + std::make_tuple(InputShape{1, 3, 64, 64}, WeightsShape{6, 3, 1, 1}), + std::make_tuple(InputShape{DYN, DYN, DYN, DYN}, WeightsShape{7, 3, 1, 1}), + std::make_tuple(InputShape{DYN, 3, 64, 64}, WeightsShape{8, 3, 1, 2}), + std::make_tuple(InputShape{2, DYN, 64, 64}, WeightsShape{9, 3, 2, 3}), + std::make_tuple(InputShape{3, 3, DYN, 64}, WeightsShape{6, 3, 3, 4}), + std::make_tuple(InputShape{3, 3, 64, DYN}, WeightsShape{5, 3, 3, 4}), + std::make_tuple(InputShape{DYN, DYN, DYN}, WeightsShape{5, 3, 1}), + std::make_tuple(InputShape{DYN, 3, 10}, WeightsShape{3, 3, 1}), + std::make_tuple(InputShape{2, DYN, 9}, WeightsShape{2, 3, 2}), + std::make_tuple(InputShape{3, 3, DYN}, WeightsShape{1, 3, 3}))); diff --git a/inference-engine/tests/functional/inference_engine/transformations/convert_shuffle_channels3_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/convert_shuffle_channels3_test.cpp new file mode 100644 index 00000000000..dbeccd5f247 --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/transformations/convert_shuffle_channels3_test.cpp @@ -0,0 +1,182 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include + +#include +#include +#include +#include +#include + +#include "ngraph_test_utils.hpp" + +using namespace testing; +using namespace ngraph; + +std::shared_ptr buildInputGraph(int64_t axis, int64_t group, const ::PartialShape& p) { + auto input = std::make_shared<::opset3::Parameter>(::element::f32, p); + auto shuffle_channels = std::make_shared<::opset3::ShuffleChannels>(input, axis, group); + shuffle_channels->set_friendly_name("shc"); + + auto f = std::make_shared<::Function>(::NodeVector{shuffle_channels}, ::ParameterVector{input}); + + ::pass::InitNodeInfo().run_on_function(f); + ::pass::ConvertShuffleChannels3().run_on_function(f); + f->validate_nodes_and_infer_types(); + return f; +} + +TEST(TransformationTests, ConvertShuffleChannelsAxis0) { + int64_t group = 4; + auto ps = ::PartialShape{12, Dimension::dynamic(), Dimension::dynamic(), Dimension::dynamic()}; + std::shared_ptr f = buildInputGraph(0, group, ps), f_ref(nullptr); + ASSERT_NO_THROW(check_rt_info(f)); + + auto input = std::make_shared<::opset3::Parameter>(::element::f32, ps); + + auto reduce_axis_const = ::opset2::Constant::create(element::i64, Shape({1}), std::vector{0}); + auto original_shape = std::make_shared<::opset2::ShapeOf>(input->output(0)); + auto split_input_dimensions = std::make_shared<::opset2::VariadicSplit>( + original_shape->output(0), + ::opset2::Constant::create(element::i64, Shape({1}), std::vector{0}), + ::opset2::Constant::create(element::i64, Shape({2}), {1, 3})); + + ::OutputVector new_dims = { + ::opset2::Constant::create(element::i64, Shape({1}), std::vector{group}), + ::opset2::Constant::create(element::i64, Shape({1}), std::vector{-1}), + std::make_shared<::opset2::ReduceProd>(split_input_dimensions->output(1), reduce_axis_const, true)}; + + auto new_shape = std::make_shared<::opset2::Concat>(new_dims, 0); + auto reshape = std::make_shared<::opset2::Reshape>(input->output(0), new_shape, false); + auto transpose = std::make_shared<::opset2::Transpose>(reshape->output(0), + ::opset2::Constant::create(element::i64, Shape({3}), + {1, 0, 2})); + auto reshape_back = std::make_shared<::opset2::Reshape>(transpose->output(0), original_shape->output(0), false); + + f_ref = std::make_shared<::Function>(::NodeVector{reshape_back}, ::ParameterVector{input}); + + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; + + auto result_node_of_converted_f = f->get_output_op(0); + auto output_node = result_node_of_converted_f->input(0).get_source_output().get_node_shared_ptr(); + ASSERT_TRUE(output_node->get_friendly_name() == "shc") << "ConvertShuffleChannels3 should keep output names.\n"; +} + +TEST(TransformationTests, ConvertShuffleChannelsAxis1) { + int64_t group = 4; + auto ps = ::PartialShape{Dimension::dynamic(), 12, Dimension::dynamic(), Dimension::dynamic()}; + std::shared_ptr f = buildInputGraph(1, group, ps), f_ref(nullptr); + ASSERT_NO_THROW(check_rt_info(f)); + + auto input = std::make_shared<::opset3::Parameter>(::element::f32, ps); + + auto reduce_axis_const = ::opset2::Constant::create(element::i64, Shape({1}), std::vector{0}); + auto original_shape = std::make_shared<::opset2::ShapeOf>(input->output(0)); + auto split_input_dimensions = std::make_shared<::opset2::VariadicSplit>( + original_shape->output(0), + ::opset2::Constant::create(element::i64, Shape({1}), std::vector{0}), + ::opset2::Constant::create(element::i64, Shape({3}), {1, 1, 2})); + + ::OutputVector new_dims = { + std::make_shared<::opset2::ReduceProd>(split_input_dimensions->output(0), reduce_axis_const, true), + ::opset2::Constant::create(element::i64, Shape({1}), std::vector{group}), + ::opset2::Constant::create(element::i64, Shape({1}), std::vector{-1}), + std::make_shared<::opset2::ReduceProd>(split_input_dimensions->output(2), reduce_axis_const, true)}; + + auto new_shape = std::make_shared<::opset2::Concat>(new_dims, 0); + auto reshape = std::make_shared<::opset2::Reshape>(input->output(0), new_shape, false); + auto transpose = std::make_shared<::opset2::Transpose>(reshape->output(0), + ::opset2::Constant::create(element::i64, Shape({4}), + {0, 2, 1, 3})); + auto reshape_back = std::make_shared<::opset2::Reshape>(transpose->output(0), original_shape->output(0), false); + + f_ref = std::make_shared<::Function>(::NodeVector{reshape_back}, ::ParameterVector{input}); + + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; + + auto result_node_of_converted_f = f->get_output_op(0); + auto output_node = result_node_of_converted_f->input(0).get_source_output().get_node_shared_ptr(); + ASSERT_TRUE(output_node->get_friendly_name() == "shc") << "ConvertShuffleChannels3 should keep output names.\n"; +} + +TEST(TransformationTests, ConvertShuffleChannelsAxis2) { + int64_t group = 4; + auto ps = ::PartialShape{Dimension::dynamic(), Dimension::dynamic(), 12, Dimension::dynamic()}; + std::shared_ptr f = buildInputGraph(2, group, ps), f_ref(nullptr); + ASSERT_NO_THROW(check_rt_info(f)); + + auto input = std::make_shared<::opset3::Parameter>(::element::f32, ps); + + auto reduce_axis_const = ::opset2::Constant::create(element::i64, Shape({1}), std::vector{0}); + auto original_shape = std::make_shared<::opset2::ShapeOf>(input->output(0)); + auto split_input_dimensions = std::make_shared<::opset2::VariadicSplit>( + original_shape->output(0), + ::opset2::Constant::create(element::i64, Shape({1}), std::vector{0}), + ::opset2::Constant::create(element::i64, Shape({3}), {2, 1, 1})); + + ::OutputVector new_dims = { + std::make_shared<::opset2::ReduceProd>(split_input_dimensions->output(0), reduce_axis_const, true), + ::opset2::Constant::create(element::i64, Shape({1}), std::vector{group}), + ::opset2::Constant::create(element::i64, Shape({1}), std::vector{-1}), + std::make_shared<::opset2::ReduceProd>(split_input_dimensions->output(2), reduce_axis_const, true)}; + + auto new_shape = std::make_shared<::opset2::Concat>(new_dims, 0); + auto reshape = std::make_shared<::opset2::Reshape>(input->output(0), new_shape, false); + auto transpose = std::make_shared<::opset2::Transpose>(reshape->output(0), + ::opset2::Constant::create(element::i64, Shape({4}), + {0, 2, 1, 3})); + auto reshape_back = std::make_shared<::opset2::Reshape>(transpose->output(0), original_shape->output(0), false); + + f_ref = std::make_shared<::Function>(::NodeVector{reshape_back}, ::ParameterVector{input}); + + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; + + auto result_node_of_converted_f = f->get_output_op(0); + auto output_node = result_node_of_converted_f->input(0).get_source_output().get_node_shared_ptr(); + ASSERT_TRUE(output_node->get_friendly_name() == "shc") << "ConvertShuffleChannels3 should keep output names.\n"; +} + +TEST(TransformationTests, ConvertShuffleChannelsLastAxis) { + int64_t group = 4; + auto ps = ::PartialShape{Dimension::dynamic(), Dimension::dynamic(), Dimension::dynamic(), 12}; + std::shared_ptr f = buildInputGraph(-1, group, ps), f_ref(nullptr); + ASSERT_NO_THROW(check_rt_info(f)); + + auto input = std::make_shared<::opset3::Parameter>(::element::f32, ps); + + auto reduce_axis_const = ::opset2::Constant::create(element::i64, Shape({1}), std::vector{0}); + auto original_shape = std::make_shared<::opset2::ShapeOf>(input->output(0)); + auto split_input_dimensions = std::make_shared<::opset2::VariadicSplit>( + original_shape->output(0), + ::opset2::Constant::create(element::i64, Shape({1}), std::vector{0}), + ::opset2::Constant::create(element::i64, Shape({2}), {3, 1})); + + ::OutputVector new_dims = { + std::make_shared<::opset2::ReduceProd>(split_input_dimensions->output(0), reduce_axis_const, true), + ::opset2::Constant::create(element::i64, Shape({1}), std::vector{group}), + ::opset2::Constant::create(element::i64, Shape({1}), std::vector{-1})}; + + auto new_shape = std::make_shared<::opset2::Concat>(new_dims, 0); + auto reshape = std::make_shared<::opset2::Reshape>(input->output(0), new_shape, false); + auto transpose = std::make_shared<::opset2::Transpose>(reshape->output(0), + ::opset2::Constant::create(element::i64, Shape({3}), + {0, 2, 1})); + auto reshape_back = std::make_shared<::opset2::Reshape>(transpose->output(0), original_shape->output(0), false); + + f_ref = std::make_shared<::Function>(::NodeVector{reshape_back}, ::ParameterVector{input}); + + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; + + auto result_node_of_converted_f = f->get_output_op(0); + auto output_node = result_node_of_converted_f->input(0).get_source_output().get_node_shared_ptr(); + ASSERT_TRUE(output_node->get_friendly_name() == "shc") << "ConvertShuffleChannels3 should keep output names.\n"; +} \ No newline at end of file diff --git a/inference-engine/tests/functional/inference_engine/transformations/ngraph_test_utils.cpp b/inference-engine/tests/functional/inference_engine/transformations/ngraph_test_utils.cpp index 42194e7a496..3d432f8c9cb 100644 --- a/inference-engine/tests/functional/inference_engine/transformations/ngraph_test_utils.cpp +++ b/inference-engine/tests/functional/inference_engine/transformations/ngraph_test_utils.cpp @@ -8,6 +8,7 @@ #include #include +#include std::pair compare_functions(const std::shared_ptr & f1, const std::shared_ptr & f2) { /* @@ -27,6 +28,8 @@ std::pair compare_functions(const std::shared_ptr, std::shared_ptr > > q; q.push({f1_results[0], f2_results[0]}); while (!q.empty()) { @@ -42,17 +45,29 @@ std::pair compare_functions(const std::shared_ptrinputs().size()) + " and " + std::to_string(node2->inputs().size())}; } + if (node1->outputs().size() != node2->outputs().size()) { + return {false, "Number of outputs is different: " + std::to_string(node1->outputs().size()) + " and " + std::to_string(node2->outputs().size())}; + } + for (int i = 0; i < node1->inputs().size(); ++i) { - if (!node1->input(i).get_partial_shape().compatible(node2->input(i).get_partial_shape())) { - std::ostringstream out("Different shape detected"); - out << node1->input(i).get_partial_shape() << " and " << node2->input(i).get_partial_shape(); - return {false, out.str()}; + if (!node1->input(i).get_partial_shape().same_scheme(node2->input(i).get_partial_shape())) { + err_log << "Different shape detected" << std::endl + << node1->description() << " Input(" << i << ") " << node1->input(i).get_partial_shape() << " and " + << node2->description() << " Input(" << i << ") " << node2->input(i).get_partial_shape() << std::endl; } q.push({node1->input_value(i).get_node_shared_ptr(), node2->input_value(i).get_node_shared_ptr()}); } + + for (int i = 0; i < node1->outputs().size(); ++i) { + if (!node1->output(i).get_partial_shape().same_scheme(node2->output(i).get_partial_shape())) { + err_log << "Different shape detected" << std::endl + << node1->description() << " Output(" << i << ") " << node1->output(i).get_partial_shape() << " and " + << node2->description() << " Output(" << i << ") " << node2->output(i).get_partial_shape() << std::endl; + } + } } - return {true, ""}; + return {err_log.str().empty(), err_log.str()}; } void check_rt_info(const std::shared_ptr & f) { @@ -74,4 +89,9 @@ void check_rt_info(const std::shared_ptr & f) { if (!err_msg.empty()) { throw ngraph::ngraph_error(err_msg); } +} + +void visualize_function(std::shared_ptr f, const std::string & file_name) { + std::vector > g{f}; + ngraph::pass::VisualizeTree(file_name).run_on_module(g); } \ No newline at end of file diff --git a/inference-engine/tests/functional/inference_engine/transformations/ngraph_test_utils.hpp b/inference-engine/tests/functional/inference_engine/transformations/ngraph_test_utils.hpp index 01a9ee266cd..a1845f99ddc 100644 --- a/inference-engine/tests/functional/inference_engine/transformations/ngraph_test_utils.hpp +++ b/inference-engine/tests/functional/inference_engine/transformations/ngraph_test_utils.hpp @@ -17,4 +17,6 @@ using TransformationTests = CommonTestUtils::TestsCommon; std::pair compare_functions(const std::shared_ptr & f1, const std::shared_ptr & f2); -void check_rt_info(const std::shared_ptr & f); \ No newline at end of file +void check_rt_info(const std::shared_ptr & f); + +void visualize_function(std::shared_ptr f, const std::string & file_name); \ No newline at end of file diff --git a/inference-engine/tests/functional/inference_engine/transformations/conv_bias_fusion.cpp b/inference-engine/tests/functional/inference_engine/transformations/primitives_priority_test.cpp similarity index 98% rename from inference-engine/tests/functional/inference_engine/transformations/conv_bias_fusion.cpp rename to inference-engine/tests/functional/inference_engine/transformations/primitives_priority_test.cpp index c89a7fdf33f..4c3c9e0e28f 100644 --- a/inference-engine/tests/functional/inference_engine/transformations/conv_bias_fusion.cpp +++ b/inference-engine/tests/functional/inference_engine/transformations/primitives_priority_test.cpp @@ -45,7 +45,7 @@ TEST(TransformationTests, ConvBiasFusion) { InferenceEngine::CNNNetwork network(f); - // Set PrimitivesPriority to all Convolutinos + // Set PrimitivesPriority to all Convolutions auto nGraph = network.getFunction(); ASSERT_TRUE(nGraph); for (auto & op : nGraph->get_ops()) { diff --git a/inference-engine/tests/functional/inference_engine/transformations/transpose_to_reshape_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/transpose_to_reshape_test.cpp index 7028797c313..2952e5cd492 100644 --- a/inference-engine/tests/functional/inference_engine/transformations/transpose_to_reshape_test.cpp +++ b/inference-engine/tests/functional/inference_engine/transformations/transpose_to_reshape_test.cpp @@ -98,6 +98,7 @@ private: TEST_P(TransposeToReshapeTests, CompareFunctions) { ngraph::pass::InitNodeInfo().run_on_function(f); ngraph::pass::AlgebraicSimplification().run_on_function(f); + f->validate_nodes_and_infer_types(); ASSERT_NO_THROW(check_rt_info(f)); auto res = compare_functions(f, f_ref); ASSERT_TRUE(res.first) << res.second; @@ -125,7 +126,7 @@ INSTANTIATE_TEST_CASE_P(ReshapeWithConstant, TransposeToReshapeTests, testing::Values(std::make_tuple(InputShape{1, 3, 64, 1}, TransposeOrder{0, 1, 3, 2}, ReferenceParams({1, 3, 1, 64})), std::make_tuple(InputShape{1, 3, 1, 64}, TransposeOrder{1, 0, 3, 2}, ReferenceParams({3, 1, 64, 1})), std::make_tuple(InputShape{DYN, DYN, 1}, TransposeOrder{0, 2, 1}, ReferenceParams({0, 1, -1})), - std::make_tuple(InputShape{1, 1, DYN}, TransposeOrder{2, 1, 0}, ReferenceParams({-1, 1, 1})), + std::make_tuple(InputShape{1, 1, DYN}, TransposeOrder{2, 1, 0}, ReferenceParams({-1, 0, 1})), std::make_tuple(InputShape{DYN, 1, 64, 1}, TransposeOrder{1, 0, 3, 2}, ReferenceParams({1, -1, 1, 64})))); INSTANTIATE_TEST_CASE_P(ReshapeWithGather, TransposeToReshapeTests, diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/add_output.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/add_output.cpp new file mode 100644 index 00000000000..46f74faec2c --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/add_output.cpp @@ -0,0 +1,20 @@ +// Copyright (C) 2020 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include "behavior/add_output.hpp" +#include "functional_test_utils/test_model/test_model.hpp" +#include "functional_test_utils/plugin_cache.hpp" + +InferenceEngine::CNNNetwork getTargetNetwork() { + auto model = FuncTestUtils::TestModel::getModelWithMemory(InferenceEngine::Precision::FP32); + auto ie = PluginCache::get().ie(); + return ie->ReadNetwork(model.model_xml_str, model.weights_blob); +} + +addOutputsParams testCases[] = {addOutputsParams(getTargetNetwork(), {"Memory_1"}, CommonTestUtils::DEVICE_CPU)}; + +INSTANTIATE_TEST_CASE_P(AddOutputBasic, AddOutputsTest, ::testing::ValuesIn(testCases), + AddOutputsTest::getTestCaseName); diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/set_preprocess.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/set_preprocess.cpp new file mode 100644 index 00000000000..0946bba776d --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/set_preprocess.cpp @@ -0,0 +1,40 @@ +// Copyright (C) 2018-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "multi-device/multi_device_config.hpp" + +#include "behavior/set_preprocess.hpp" + +using namespace LayerTestsDefinitions; + +namespace { + const std::vector netPrecisions = { + InferenceEngine::Precision::FP32, + InferenceEngine::Precision::FP16 + }; + + const std::vector> configs = { + {}, + {{InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS, InferenceEngine::PluginConfigParams::CPU_THROUGHPUT_AUTO}}, + {{InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS, "0"}, {InferenceEngine::PluginConfigParams::KEY_CPU_THREADS_NUM, "1"}} + }; + + const std::vector> multiConfigs = { + {{ InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU}} + }; + + INSTANTIATE_TEST_CASE_P(smoke_BehaviorTests, PreProcessTests, + ::testing::Combine( + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_CPU), + ::testing::ValuesIn(configs)), + PreProcessTests::getTestCaseName); + + INSTANTIATE_TEST_CASE_P(smoke_Multi_BehaviorTests, PreProcessTests, + ::testing::Combine( + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_MULTI), + ::testing::ValuesIn(multiConfigs)), + PreProcessTests::getTestCaseName); +} // namespace \ No newline at end of file diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/other_tests/add_output.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/other_tests/add_output.cpp deleted file mode 100644 index 008d7588194..00000000000 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/other_tests/add_output.cpp +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright (C) 2020 Intel Corporation -// -// SPDX-License-Identifier: Apache-2.0 -// - -#include "other/add_output.hpp" - -const auto addOutputParams = - ::testing::Combine(::testing::Values("Memory_1"), ::testing::Values(CommonTestUtils::DEVICE_CPU)); - -INSTANTIATE_TEST_CASE_P(AddOutputBasic, AddOutputTestsCommonClass, addOutputParams, - AddOutputTestsCommonClass::getTestCaseName); - -TEST_P(AddOutputTestsCommonClass, basic) { - run_test(); -} diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp new file mode 100644 index 00000000000..9ef51d02473 --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp @@ -0,0 +1,111 @@ +// Copyright (C) 2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "single_layer_tests/convolution_backprop_data.hpp" +#include "common_test_utils/test_constants.hpp" + +using namespace LayerTestsDefinitions; + +namespace { + +const std::vector netPrecisions = { + InferenceEngine::Precision::FP32, + InferenceEngine::Precision::FP16 +}; + +const std::vector numOutChannels = {1, 5, 16}; + +/* ============= 2D ConvolutionBackpropData ============= */ +const std::vector> inputShapes2D = {{1, 3, 30, 30}, + {1, 16, 10, 10}, + {1, 32, 10, 10}}; +const std::vector> kernels2D = {{1, 1}, {3, 3}, {3, 5}}; +const std::vector> strides2D = {{1, 1}, {1, 3}}; +const std::vector> padBegins2D = {{0, 0}}; +const std::vector> padEnds2D = {{0, 0}, {1, 1}}; +const std::vector> dilations2D = {{1, 1}, {2, 2}}; + +const auto conv2DParams_ExplicitPadding = ::testing::Combine( + ::testing::ValuesIn(kernels2D), + ::testing::ValuesIn(strides2D), + ::testing::ValuesIn(padBegins2D), + ::testing::ValuesIn(padEnds2D), + ::testing::ValuesIn(dilations2D), + ::testing::ValuesIn(numOutChannels), + ::testing::Values(ngraph::op::PadType::EXPLICIT) +); +const auto conv2DParams_AutoPadValid = ::testing::Combine( + ::testing::ValuesIn(kernels2D), + ::testing::ValuesIn(strides2D), + ::testing::Values(std::vector({0, 0})), + ::testing::Values(std::vector({0, 0})), + ::testing::ValuesIn(dilations2D), + ::testing::ValuesIn(numOutChannels), + ::testing::Values(ngraph::op::PadType::VALID) +); + +INSTANTIATE_TEST_CASE_P(ConvolutionBackpropData2D_ExplicitPadding, ConvolutionBackpropDataLayerTest, + ::testing::Combine( + conv2DParams_ExplicitPadding, + ::testing::ValuesIn(netPrecisions), + ::testing::ValuesIn(inputShapes2D), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ConvolutionBackpropDataLayerTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(ConvolutionBackpropData2D_AutoPadValid, ConvolutionBackpropDataLayerTest, + ::testing::Combine( + conv2DParams_AutoPadValid, + ::testing::ValuesIn(netPrecisions), + ::testing::ValuesIn(inputShapes2D), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ConvolutionBackpropDataLayerTest::getTestCaseName); + +/* ============= 3D ConvolutionBackpropData ============= */ +const std::vector> inputShapes3D = {{1, 3, 10, 10, 10}, + {1, 16, 5, 5, 5}, + {1, 32, 5, 5, 5}}; +const std::vector> kernels3D = {{1, 1, 1}, {3, 3, 3}}; +const std::vector> strides3D = {{1, 1, 1}}; +const std::vector> padBegins3D = {{0, 0, 0}}; +const std::vector> padEnds3D = {{0, 0, 0}, {1, 1, 1}}; +const std::vector> dilations3D = {{1, 1, 1}, {2, 2, 2}}; + +const auto conv3DParams_ExplicitPadding = ::testing::Combine( + ::testing::ValuesIn(kernels3D), + ::testing::ValuesIn(strides3D), + ::testing::ValuesIn(padBegins3D), + ::testing::ValuesIn(padEnds3D), + ::testing::ValuesIn(dilations3D), + ::testing::ValuesIn(numOutChannels), + ::testing::Values(ngraph::op::PadType::EXPLICIT) +); +const auto conv3DParams_AutoPadValid = ::testing::Combine( + ::testing::ValuesIn(kernels3D), + ::testing::ValuesIn(strides3D), + ::testing::Values(std::vector({0, 0, 0})), + ::testing::Values(std::vector({0, 0, 0})), + ::testing::ValuesIn(dilations3D), + ::testing::ValuesIn(numOutChannels), + ::testing::Values(ngraph::op::PadType::VALID) +); + +INSTANTIATE_TEST_CASE_P(ConvolutionBackpropData3D_ExplicitPadding, ConvolutionBackpropDataLayerTest, + ::testing::Combine( + conv3DParams_ExplicitPadding, + ::testing::ValuesIn(netPrecisions), + ::testing::ValuesIn(inputShapes3D), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ConvolutionBackpropDataLayerTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(ConvolutionBackpropData3D_AutoPadValid, ConvolutionBackpropDataLayerTest, + ::testing::Combine( + conv3DParams_AutoPadValid, + ::testing::ValuesIn(netPrecisions), + ::testing::ValuesIn(inputShapes3D), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ConvolutionBackpropDataLayerTest::getTestCaseName); + +} // namespace diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/cum_sum.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/cum_sum.cpp new file mode 100644 index 00000000000..2c88ef93189 --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/cum_sum.cpp @@ -0,0 +1,94 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "single_layer_tests/cum_sum.hpp" +#include "common_test_utils/test_constants.hpp" + +using namespace LayerTestsDefinitions; + +const std::vector> shapes = { + {16}, + {9, 15}, + {16, 10, 12}, + {5, 14, 5, 7}, + {7, 8, 6, 7, 13} +}; + +const std::vector inputPrecision = { + InferenceEngine::Precision::I8, + InferenceEngine::Precision::U8, + InferenceEngine::Precision::I16, + InferenceEngine::Precision::I32, + InferenceEngine::Precision::FP32 +}; + +const std::vector axes = { 0, 1, 2, 3, 4 }; +const std::vector negativeAxes = { -1, -2, -3, -4, -5 }; + +const std::vector exclusive = {true, false}; +const std::vector reverse = {true, false}; + +const auto testCasesNegativeAxis = ::testing::Combine( + ::testing::Values(std::vector{4, 16, 3, 6, 5}), + ::testing::Values(InferenceEngine::Precision::FP32), + ::testing::ValuesIn(negativeAxes), + ::testing::ValuesIn(exclusive), + ::testing::ValuesIn(reverse), + ::testing::Values(CommonTestUtils::DEVICE_CPU) +); + +const auto testCasesAxis_0 = ::testing::Combine( + ::testing::ValuesIn(shapes), + ::testing::ValuesIn(inputPrecision), + ::testing::Values(axes[0]), + ::testing::ValuesIn(exclusive), + ::testing::ValuesIn(reverse), + ::testing::Values(CommonTestUtils::DEVICE_CPU) +); + +const auto testCasesAxis_1 = ::testing::Combine( + ::testing::ValuesIn(std::vector>(shapes.begin() + 1, shapes.end())), + ::testing::ValuesIn(inputPrecision), + ::testing::Values(axes[1]), + ::testing::ValuesIn(exclusive), + ::testing::ValuesIn(reverse), + ::testing::Values(CommonTestUtils::DEVICE_CPU) +); + +const auto testCasesAxis_2 = ::testing::Combine( + ::testing::ValuesIn(std::vector>(shapes.begin() + 2, shapes.end())), + ::testing::ValuesIn(inputPrecision), + ::testing::Values(axes[2]), + ::testing::ValuesIn(exclusive), + ::testing::ValuesIn(reverse), + ::testing::Values(CommonTestUtils::DEVICE_CPU) +); + +const auto testCasesAxis_3 = ::testing::Combine( + ::testing::ValuesIn(std::vector>(shapes.begin() + 3, shapes.end())), + ::testing::ValuesIn(inputPrecision), + ::testing::Values(axes[3]), + ::testing::ValuesIn(exclusive), + ::testing::ValuesIn(reverse), + ::testing::Values(CommonTestUtils::DEVICE_CPU) +); + +const auto testCasesAxis_4 = ::testing::Combine( + ::testing::ValuesIn(std::vector>(shapes.begin() + 4, shapes.end())), + ::testing::ValuesIn(inputPrecision), + ::testing::Values(axes[4]), + ::testing::ValuesIn(exclusive), + ::testing::ValuesIn(reverse), + ::testing::Values(CommonTestUtils::DEVICE_CPU) +); + +INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsCumSum_negative_axis, CumSumLayerTest, testCasesNegativeAxis, CumSumLayerTest::getTestCaseName); +INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsCumSum_axis_0, CumSumLayerTest, testCasesAxis_0, CumSumLayerTest::getTestCaseName); +INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsCumSum_axis_1, CumSumLayerTest, testCasesAxis_1, CumSumLayerTest::getTestCaseName); +INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsCumSum_axis_2, CumSumLayerTest, testCasesAxis_2, CumSumLayerTest::getTestCaseName); +INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsCumSum_axis_3, CumSumLayerTest, testCasesAxis_3, CumSumLayerTest::getTestCaseName); +INSTANTIATE_TEST_CASE_P(smoke_MKLDNN_TestsCumSum_axis_4, CumSumLayerTest, testCasesAxis_4, CumSumLayerTest::getTestCaseName); + diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/eltwise.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/eltwise.cpp new file mode 100644 index 00000000000..bd1b411250d --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/eltwise.cpp @@ -0,0 +1,56 @@ +// Copyright (C) 2020 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// +// NOTE: WILL BE REWORKED (31905) + +#include + +#include + +#include "common_test_utils/common_layers_params.hpp" +#include "common_test_utils/common_utils.hpp" +#include "common_test_utils/test_common.hpp" +#include "common_test_utils/test_constants.hpp" +#include "common_test_utils/xml_net_builder/ir_net.hpp" +#include "common_test_utils/xml_net_builder/xml_filler.hpp" +#include "functional_test_utils/layer_test_utils.hpp" +#include "ngraph_functions/builders.hpp" +#include "ngraph_functions/utils/ngraph_helpers.hpp" +#include "ie_core.hpp" +#include "single_layer_tests/eltwise.hpp" + +using namespace EltwiseTestNamespace; + +std::vector operations = { EltwiseOpType::ADD, EltwiseOpType::SUBSTRACT, EltwiseOpType::MULTIPLY }; +std::vector primary_input_idx = { 0, 1 }; +std::vector secondary_input_types = { InputLayerType::CONSTANT , InputLayerType::PARAMETER }; +std::vector net_precisions = { InferenceEngine::Precision::FP32, InferenceEngine::Precision::FP16 }; +std::vector flat_shapes = { {1, 200}, {1, 2000}, {1, 20000} }; +std::vector non_flat_shapes = { {2, 200}, {10, 200}, {1, 10, 100}, {4, 4, 16} }; +std::map additional_config = {}; + +const auto FlatEltwiseParams = +::testing::Combine( + ::testing::ValuesIn(operations), + ::testing::ValuesIn(primary_input_idx), + ::testing::ValuesIn(secondary_input_types), + ::testing::ValuesIn(net_precisions), + ::testing::ValuesIn(flat_shapes), + ::testing::Values(CommonTestUtils::DEVICE_CPU), + ::testing::Values(additional_config)); + +const auto NonFlatEltwiseParams = +::testing::Combine( + ::testing::ValuesIn(operations), + ::testing::ValuesIn(primary_input_idx), + ::testing::ValuesIn(secondary_input_types), + ::testing::ValuesIn(net_precisions), + ::testing::ValuesIn(non_flat_shapes), + ::testing::Values(CommonTestUtils::DEVICE_CPU), + ::testing::Values(additional_config)); + +INSTANTIATE_TEST_CASE_P(Eltwise_flat, EltwiseLayerTest, FlatEltwiseParams, + EltwiseLayerTest::getTestCaseName); +INSTANTIATE_TEST_CASE_P(Eltwise_non_flat, EltwiseLayerTest, NonFlatEltwiseParams, + EltwiseLayerTest::getTestCaseName); diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/fake_quantize.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/fake_quantize.cpp new file mode 100644 index 00000000000..d4f5e4117a0 --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/fake_quantize.cpp @@ -0,0 +1,36 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "single_layer_tests/fake_quantize.hpp" +#include "common_test_utils/test_constants.hpp" + +using namespace LayerTestsDefinitions; + +namespace { + +const std::vector netPrecisions = { + InferenceEngine::Precision::FP32, + InferenceEngine::Precision::FP16 +}; + +const std::vector> inputShapes = {{1, 1, 1, 1}, {3, 10, 5, 6}}; +const std::vector> constShapes = {{1}}; +const std::vector levels = {16, 255, 256}; + +const auto fqParams = ::testing::Combine( + ::testing::ValuesIn(levels), + ::testing::ValuesIn(constShapes) +); + +INSTANTIATE_TEST_CASE_P(FakeQuantize, FakeQuantizeLayerTest, + ::testing::Combine( + fqParams, + ::testing::ValuesIn(netPrecisions), + ::testing::ValuesIn(inputShapes), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + FakeQuantizeLayerTest::getTestCaseName); + +} // namespace diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/group_convolution_backprop_data.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/group_convolution_backprop_data.cpp new file mode 100644 index 00000000000..275a506d2b9 --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/group_convolution_backprop_data.cpp @@ -0,0 +1,113 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "single_layer_tests/group_convolution_backprop_data.hpp" +#include "common_test_utils/test_constants.hpp" + +using namespace LayerTestsDefinitions; + +namespace { + +const std::vector netPrecisions = { + InferenceEngine::Precision::FP32 +}; + +const std::vector numOutChannels = {16, 32}; +const std::vector numGroups = {2, 8, 16}; + +/* ============= 2D GroupConvolution ============= */ +const std::vector> inputShapes2D = {{1, 16, 10, 10}, + {1, 32, 10, 10}}; +const std::vector> kernels2D = {{1, 1}, {3, 3}}; +const std::vector> strides2D = {{1, 1}}; +const std::vector> padBegins2D = {{0, 0}}; +const std::vector> padEnds2D = {{0, 0}}; +const std::vector> dilations2D = {{1, 1}}; + +const auto groupConvBackpropData2DParams_ExplicitPadding = ::testing::Combine( + ::testing::ValuesIn(kernels2D), + ::testing::ValuesIn(strides2D), + ::testing::ValuesIn(padBegins2D), + ::testing::ValuesIn(padEnds2D), + ::testing::ValuesIn(dilations2D), + ::testing::ValuesIn(numOutChannels), + ::testing::ValuesIn(numGroups), + ::testing::Values(ngraph::op::PadType::EXPLICIT) +); +const auto groupConvBackpropData2DParams_AutoPadValid = ::testing::Combine( + ::testing::ValuesIn(kernels2D), + ::testing::ValuesIn(strides2D), + ::testing::Values(std::vector({0, 0})), + ::testing::Values(std::vector({0, 0})), + ::testing::ValuesIn(dilations2D), + ::testing::ValuesIn(numOutChannels), + ::testing::ValuesIn(numGroups), + ::testing::Values(ngraph::op::PadType::VALID) +); + +INSTANTIATE_TEST_CASE_P(GroupConvBackpropData2D_ExplicitPadding, GroupConvBackpropDataLayerTest, + ::testing::Combine( + groupConvBackpropData2DParams_ExplicitPadding, + ::testing::ValuesIn(netPrecisions), + ::testing::ValuesIn(inputShapes2D), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + GroupConvBackpropDataLayerTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(GroupConvBackpropData2D_AutoPadValid, GroupConvBackpropDataLayerTest, + ::testing::Combine( + groupConvBackpropData2DParams_AutoPadValid, + ::testing::ValuesIn(netPrecisions), + ::testing::ValuesIn(inputShapes2D), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + GroupConvBackpropDataLayerTest::getTestCaseName); + +/* ============= 3D GroupConvolution ============= */ +const std::vector> inputShapes3D = {{1, 16, 5, 5, 5}, + {1, 32, 5, 5, 5}}; +const std::vector> kernels3D = {{1, 1, 1}, {3, 3, 3}}; +const std::vector> strides3D = {{1, 1, 1}}; +const std::vector> padBegins3D = {{0, 0, 0}}; +const std::vector> padEnds3D = {{0, 0, 0}}; +const std::vector> dilations3D = {{1, 1, 1}}; + +const auto groupConvBackpropData3DParams_ExplicitPadding = ::testing::Combine( + ::testing::ValuesIn(kernels3D), + ::testing::ValuesIn(strides3D), + ::testing::ValuesIn(padBegins3D), + ::testing::ValuesIn(padEnds3D), + ::testing::ValuesIn(dilations3D), + ::testing::ValuesIn(numOutChannels), + ::testing::ValuesIn(numGroups), + ::testing::Values(ngraph::op::PadType::EXPLICIT) +); +const auto groupConvBackpropData3DParams_AutoPadValid = ::testing::Combine( + ::testing::ValuesIn(kernels3D), + ::testing::ValuesIn(strides3D), + ::testing::Values(std::vector({0, 0, 0})), + ::testing::Values(std::vector({0, 0, 0})), + ::testing::ValuesIn(dilations3D), + ::testing::ValuesIn(numOutChannels), + ::testing::ValuesIn(numGroups), + ::testing::Values(ngraph::op::PadType::VALID) +); + +INSTANTIATE_TEST_CASE_P(GroupConvBackpropData3D_ExplicitPadding, GroupConvBackpropDataLayerTest, + ::testing::Combine( + groupConvBackpropData3DParams_ExplicitPadding, + ::testing::ValuesIn(netPrecisions), + ::testing::ValuesIn(inputShapes3D), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + GroupConvBackpropDataLayerTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(GroupConvBackpropData3D_AutoPadValid, GroupConvBackpropDataLayerTest, + ::testing::Combine( + groupConvBackpropData3DParams_AutoPadValid, + ::testing::ValuesIn(netPrecisions), + ::testing::ValuesIn(inputShapes3D), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + GroupConvBackpropDataLayerTest::getTestCaseName); + +} // namespace diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp index 536f0bb224a..a0e85efd686 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp @@ -10,6 +10,16 @@ std::vector disabledTestPatterns() { return { // TODO: Issue 26264 - R"(.*(MaxPool|AvgPool).*S\(1\.2\).*Rounding=CEIL.*)" + R"(.*(MaxPool|AvgPool).*S\(1\.2\).*Rounding=CEIL.*)", + // TODO: Issue 31839 + R"(.*(QuantConvBackpropData3D).*)", + // TODO: Issue 31841 + R"(.*(QuantGroupConvBackpropData3D).*)", + // TODO: Issue 31843 + R"(.*(QuantGroupConvBackpropData2D)*QG=Perchannel.*)", + // TODO: Issue 32023 + R"(.*(QuantGroupConvBackpropData2D)*QG=Pertensor.*)", + // TODO: Issue 31845 + R"(.*(FakeQuantize).*)" }; } \ No newline at end of file diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/quantized_convolution_backprop_data.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/quantized_convolution_backprop_data.cpp new file mode 100644 index 00000000000..cd1c42caccd --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/quantized_convolution_backprop_data.cpp @@ -0,0 +1,82 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "subgraph_tests/quantized_convolution_backprop_data.hpp" +#include "common_test_utils/test_constants.hpp" + +using namespace LayerTestsDefinitions; +using namespace ngraph::helpers; + +namespace { + +const std::vector netPrecisions = { + InferenceEngine::Precision::FP32 +}; + +const std::vector numOutChannels = {16, 32}; + +const std::vector levels = {256}; +// FIXME: Perchannel tests fail because of bug in LPT +const std::vector granularity = {Pertensor, Perchannel}; + +/* ============= 2D GroupConvolutionBackpropData ============= */ +const std::vector> inputShapes2D = {{1, 16, 10, 10}, {1, 32, 10, 10}}; +const std::vector> kernels2D = {{1, 1}, {3, 3}}; +const std::vector> strides2D = {{1, 1}}; +const std::vector> padBegins2D = {{0, 0}}; +const std::vector> padEnds2D = {{0, 0}}; +const std::vector> dilations2D = {{1, 1}}; + + +const auto quantConvBackpropData2DParams = ::testing::Combine( + ::testing::ValuesIn(kernels2D), + ::testing::ValuesIn(strides2D), + ::testing::ValuesIn(padBegins2D), + ::testing::ValuesIn(padEnds2D), + ::testing::ValuesIn(dilations2D), + ::testing::ValuesIn(numOutChannels), + ::testing::Values(ngraph::op::PadType::AUTO), + ::testing::ValuesIn(levels), + ::testing::ValuesIn(granularity) +); + +INSTANTIATE_TEST_CASE_P(QuantConvBackpropData2D, QuantConvBackpropDataLayerTest, + ::testing::Combine( + quantConvBackpropData2DParams, + ::testing::ValuesIn(netPrecisions), + ::testing::ValuesIn(inputShapes2D), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + QuantConvBackpropDataLayerTest::getTestCaseName); + +/* ============= 3D ConvolutionBackpropData ============= */ +const std::vector> inputShapes3D = {{1, 16, 5, 5, 5}, {1, 32, 5, 5, 5}}; +const std::vector> kernels3D = {{1, 1, 1}, {3, 3, 3}}; +const std::vector> strides3D = {{1, 1, 1}}; +const std::vector> padBegins3D = {{0, 0, 0}}; +const std::vector> padEnds3D = {{0, 0, 0}}; +const std::vector> dilations3D = {{1, 1, 1}}; + +const auto quantConvBackpropData3DParams = ::testing::Combine( + ::testing::ValuesIn(kernels3D), + ::testing::ValuesIn(strides3D), + ::testing::ValuesIn(padBegins3D), + ::testing::ValuesIn(padEnds3D), + ::testing::ValuesIn(dilations3D), + ::testing::ValuesIn(numOutChannels), + ::testing::Values(ngraph::op::PadType::AUTO), + ::testing::ValuesIn(levels), + ::testing::ValuesIn(granularity) +); + +INSTANTIATE_TEST_CASE_P(QuantConvBackpropData3D, QuantConvBackpropDataLayerTest, + ::testing::Combine( + quantConvBackpropData3DParams, + ::testing::ValuesIn(netPrecisions), + ::testing::ValuesIn(inputShapes3D), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + QuantConvBackpropDataLayerTest::getTestCaseName); + +} // namespace diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/quantized_group_convolution_backprop_data.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/quantized_group_convolution_backprop_data.cpp new file mode 100644 index 00000000000..0caf6ddf7fe --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/quantized_group_convolution_backprop_data.cpp @@ -0,0 +1,84 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "subgraph_tests/quantized_group_convolution_backprop_data.hpp" +#include "common_test_utils/test_constants.hpp" + +using namespace LayerTestsDefinitions; +using namespace ngraph::helpers; + +namespace { + +const std::vector netPrecisions = { + InferenceEngine::Precision::FP32 +}; + +const std::vector numOutChannels = {16, 32}; +const std::vector numGroups = {2, 8, 16}; + +const std::vector levels = {256}; +const std::vector granularity = {Pertensor, Perchannel}; + +/* ============= 2D GroupConvolutionBackpropData ============= */ +const std::vector> inputShapes2D = {{1, 16, 10, 10}, {1, 32, 10, 10}}; +const std::vector> kernels2D = {{1, 1}, {3, 3}}; +const std::vector> strides2D = {{1, 1}}; +const std::vector> padBegins2D = {{0, 0}}; +const std::vector> padEnds2D = {{0, 0}}; +const std::vector> dilations2D = {{1, 1}}; + + +const auto quantGroupConvBackpropData2DParams = ::testing::Combine( + ::testing::ValuesIn(kernels2D), + ::testing::ValuesIn(strides2D), + ::testing::ValuesIn(padBegins2D), + ::testing::ValuesIn(padEnds2D), + ::testing::ValuesIn(dilations2D), + ::testing::ValuesIn(numOutChannels), + ::testing::ValuesIn(numGroups), + ::testing::Values(ngraph::op::PadType::AUTO), + ::testing::ValuesIn(levels), + ::testing::ValuesIn(granularity) +); + +INSTANTIATE_TEST_CASE_P(QuantGroupConvBackpropData2D, QuantGroupConvBackpropDataLayerTest, + ::testing::Combine( + quantGroupConvBackpropData2DParams, + ::testing::ValuesIn(netPrecisions), + ::testing::ValuesIn(inputShapes2D), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + QuantGroupConvBackpropDataLayerTest::getTestCaseName); + +/* ============= 3D GroupConvolutionBackpropData ============= */ +const std::vector> inputShapes3D = {{1, 16, 5, 5, 5}, {1, 32, 5, 5, 5}}; +const std::vector> kernels3D = {{3, 3, 3}}; +const std::vector> strides3D = {{1, 1, 1}}; +const std::vector> padBegins3D = {{0, 0, 0}}; +const std::vector> padEnds3D = {{0, 0, 0}}; +const std::vector> dilations3D = {{1, 1, 1}}; + +const auto quantGroupConvBackpropData3DParams = ::testing::Combine( + ::testing::ValuesIn(kernels3D), + ::testing::ValuesIn(strides3D), + ::testing::ValuesIn(padBegins3D), + ::testing::ValuesIn(padEnds3D), + ::testing::ValuesIn(dilations3D), + ::testing::ValuesIn(numOutChannels), + ::testing::ValuesIn(numGroups), + ::testing::Values(ngraph::op::PadType::AUTO), + ::testing::ValuesIn(levels), + ::testing::ValuesIn(granularity) +); + +INSTANTIATE_TEST_CASE_P(QuantGroupConvBackpropData3D, QuantGroupConvBackpropDataLayerTest, + ::testing::Combine( + quantGroupConvBackpropData3DParams, + ::testing::ValuesIn(netPrecisions), + ::testing::ValuesIn(inputShapes3D), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + QuantGroupConvBackpropDataLayerTest::getTestCaseName); + +} // namespace diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/behavior/add_output.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/behavior/add_output.cpp new file mode 100644 index 00000000000..2ff8dbbf543 --- /dev/null +++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/behavior/add_output.cpp @@ -0,0 +1,18 @@ +// Copyright (C) 2020 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include "behavior/add_output.hpp" +#include "functional_test_utils/test_model/test_model.hpp" +#include "functional_test_utils/plugin_cache.hpp" + +InferenceEngine::CNNNetwork getTargetNetwork() { + auto model = FuncTestUtils::TestModel::getModelWithMemory(InferenceEngine::Precision::FP32); + auto ie = PluginCache::get().ie(); + return ie->ReadNetwork(model.model_xml_str, model.weights_blob); +} +addOutputsParams testCases[] = {addOutputsParams(getTargetNetwork(), {"Memory_1"}, CommonTestUtils::DEVICE_GNA)}; + +INSTANTIATE_TEST_CASE_P(AddOutputBasic, AddOutputsTest, ::testing::ValuesIn(testCases), AddOutputsTest::getTestCaseName); diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/other_tests/add_output.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/other_tests/add_output.cpp deleted file mode 100644 index b98bb423a6d..00000000000 --- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/other_tests/add_output.cpp +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright (C) 2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "other/add_output.hpp" - -const auto addOutputParams = - ::testing::Combine(::testing::Values("Memory_1"), ::testing::Values(CommonTestUtils::DEVICE_GNA)); - -INSTANTIATE_TEST_CASE_P(AddOutputBasic, AddOutputTestsCommonClass, addOutputParams, - AddOutputTestsCommonClass::getTestCaseName); - -TEST_P(AddOutputTestsCommonClass, basic) { - run_test(); -} diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/eltwise.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/eltwise.cpp new file mode 100644 index 00000000000..bd63495af04 --- /dev/null +++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/eltwise.cpp @@ -0,0 +1,57 @@ +// Copyright (C) 2020 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// +// NOTE: WILL BE REWORKED (31905) + +#include + +#include + +#include "common_test_utils/common_layers_params.hpp" +#include "common_test_utils/common_utils.hpp" +#include "common_test_utils/test_common.hpp" +#include "common_test_utils/test_constants.hpp" +#include "common_test_utils/xml_net_builder/ir_net.hpp" +#include "common_test_utils/xml_net_builder/xml_filler.hpp" +#include "functional_test_utils/layer_test_utils.hpp" +#include "ngraph_functions/builders.hpp" +#include "ngraph_functions/utils/ngraph_helpers.hpp" +#include "ie_core.hpp" +#include "single_layer_tests/eltwise.hpp" + +using namespace EltwiseTestNamespace; + +std::vector operations = { EltwiseOpType::ADD, EltwiseOpType::SUBSTRACT, EltwiseOpType::MULTIPLY }; +std::vector primary_input_idx = { 0, 1 }; +std::vector secondary_input_types = { InputLayerType::CONSTANT , InputLayerType::PARAMETER }; +std::vector net_precisions = { InferenceEngine::Precision::FP32, InferenceEngine::Precision::FP16 }; +std::vector flat_shapes = { {1, 200}, {1, 2000}, {1, 20000} }; +std::vector non_flat_shapes = { {2, 200}, {10, 200}, {1, 10, 100}, {4, 4, 16} }; +std::map additional_config = { {"GNA_DEVICE_MODE", "GNA_SW_EXACT"}, + {"GNA_SCALE_FACTOR_0", "1638.4"}, {"GNA_SCALE_FACTOR_1", "1638.4"} }; + +const auto FlatEltwiseParams = +::testing::Combine( + ::testing::ValuesIn(operations), + ::testing::ValuesIn(primary_input_idx), + ::testing::ValuesIn(secondary_input_types), + ::testing::ValuesIn(net_precisions), + ::testing::ValuesIn(flat_shapes), + ::testing::Values(CommonTestUtils::DEVICE_GNA), + ::testing::Values(additional_config)); + +const auto NonFlatEltwiseParams = +::testing::Combine( + ::testing::ValuesIn(operations), + ::testing::ValuesIn(primary_input_idx), + ::testing::ValuesIn(secondary_input_types), + ::testing::ValuesIn(net_precisions), + ::testing::ValuesIn(non_flat_shapes), + ::testing::Values(CommonTestUtils::DEVICE_GNA), + ::testing::Values(additional_config)); + +INSTANTIATE_TEST_CASE_P(Eltwise_flat, EltwiseLayerTest, FlatEltwiseParams, + EltwiseLayerTest::getTestCaseName); +INSTANTIATE_TEST_CASE_P(DISABLED_Eltwise_non_flat, EltwiseLayerTest, NonFlatEltwiseParams, + EltwiseLayerTest::getTestCaseName); diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp index 43a2a659d85..c17bf7445ce 100644 --- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp +++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp @@ -12,4 +12,4 @@ std::vector disabledTestPatterns() { // TODO: FIX BUG 31661 ".*Behavior.*Callback.*" }; -} \ No newline at end of file +} diff --git a/inference-engine/tests/functional/plugin/gpu/behavior/core_threading_tests.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/core_threading_tests.cpp similarity index 100% rename from inference-engine/tests/functional/plugin/gpu/behavior/core_threading_tests.cpp rename to inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/core_threading_tests.cpp diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/set_preprocess.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/set_preprocess.cpp new file mode 100644 index 00000000000..411861cd098 --- /dev/null +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/set_preprocess.cpp @@ -0,0 +1,38 @@ +// Copyright (C) 2018-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "multi-device/multi_device_config.hpp" + +#include "behavior/set_preprocess.hpp" + +using namespace LayerTestsDefinitions; + +namespace { + const std::vector netPrecisions = { + InferenceEngine::Precision::FP32, + InferenceEngine::Precision::FP16 + }; + + const std::vector> configs = { + {}, + }; + + const std::vector> multiConfigs = { + {{ InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_GPU}} + }; + + INSTANTIATE_TEST_CASE_P(smoke_BehaviorTests, PreProcessTests, + ::testing::Combine( + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_GPU), + ::testing::ValuesIn(configs)), + PreProcessTests::getTestCaseName); + + INSTANTIATE_TEST_CASE_P(smoke_Multi_BehaviorTests, PreProcessTests, + ::testing::Combine( + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_MULTI), + ::testing::ValuesIn(multiConfigs)), + PreProcessTests::getTestCaseName); +} // namespace \ No newline at end of file diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp new file mode 100644 index 00000000000..2e19bec73bb --- /dev/null +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/convolution_backprop_data.cpp @@ -0,0 +1,119 @@ +// Copyright (C) 2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "single_layer_tests/convolution_backprop_data.hpp" +#include "common_test_utils/test_constants.hpp" + +using namespace LayerTestsDefinitions; + +namespace { + +const std::vector netPrecisions = { + InferenceEngine::Precision::FP32, + InferenceEngine::Precision::FP16 +}; + +const std::vector numOutChannels = {1, 5, 16}; + +/* ============= 2D ConvolutionBackpropData ============= */ +const std::vector netPrecisions2D = { + InferenceEngine::Precision::FP32, + InferenceEngine::Precision::FP16 +}; + +const std::vector> inputShapes2D = {{1, 3, 30, 30}, + {1, 16, 10, 10}, + {1, 32, 10, 10}}; +const std::vector> kernels2D = {{1, 1}, {3, 3}, {3, 5}}; +const std::vector> strides2D = {{1, 3}}; +const std::vector> padBegins2D = {{0, 0}}; +const std::vector> padEnds2D = {{0, 0}, {1, 1}}; +const std::vector> dilations2D = {{1, 1}}; + +const auto conv2DParams_ExplicitPadding = ::testing::Combine( + ::testing::ValuesIn(kernels2D), + ::testing::ValuesIn(strides2D), + ::testing::ValuesIn(padBegins2D), + ::testing::ValuesIn(padEnds2D), + ::testing::ValuesIn(dilations2D), + ::testing::ValuesIn(numOutChannels), + ::testing::Values(ngraph::op::PadType::EXPLICIT) +); +const auto conv2DParams_AutoPadValid = ::testing::Combine( + ::testing::ValuesIn(kernels2D), + ::testing::ValuesIn(strides2D), + ::testing::Values(std::vector({0, 0})), + ::testing::Values(std::vector({0, 0})), + ::testing::ValuesIn(dilations2D), + ::testing::ValuesIn(numOutChannels), + ::testing::Values(ngraph::op::PadType::VALID) +); + +INSTANTIATE_TEST_CASE_P(ConvolutionBackpropData2D_ExplicitPadding, ConvolutionBackpropDataLayerTest, + ::testing::Combine( + conv2DParams_ExplicitPadding, + ::testing::ValuesIn(netPrecisions2D), + ::testing::ValuesIn(inputShapes2D), + ::testing::Values(CommonTestUtils::DEVICE_GPU)), + ConvolutionBackpropDataLayerTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(ConvolutionBackpropData2D_AutoPadValid, ConvolutionBackpropDataLayerTest, + ::testing::Combine( + conv2DParams_AutoPadValid, + ::testing::ValuesIn(netPrecisions2D), + ::testing::ValuesIn(inputShapes2D), + ::testing::Values(CommonTestUtils::DEVICE_GPU)), + ConvolutionBackpropDataLayerTest::getTestCaseName); + +/* ============= 3D ConvolutionBackpropData ============= */ +const std::vector netPrecisions3D = { + InferenceEngine::Precision::FP32, +}; +const std::vector> inputShapes3D = {{1, 3, 10, 10, 10}, + {1, 16, 5, 5, 5}, + {1, 32, 5, 5, 5}}; +const std::vector> kernels3D = {{1, 1, 1}, {3, 3, 3}}; +const std::vector> strides3D = {{1, 1, 1}}; +const std::vector> padBegins3D = {{0, 0, 0}}; +const std::vector> padEnds3D = {{0, 0, 0}, {1, 1, 1}}; +const std::vector> dilations3D = {{1, 1, 1}}; + +const auto conv3DParams_ExplicitPadding = ::testing::Combine( + ::testing::ValuesIn(kernels3D), + ::testing::ValuesIn(strides3D), + ::testing::ValuesIn(padBegins3D), + ::testing::ValuesIn(padEnds3D), + ::testing::ValuesIn(dilations3D), + ::testing::ValuesIn(numOutChannels), + ::testing::Values(ngraph::op::PadType::EXPLICIT) +); +const auto conv3DParams_AutoPadValid = ::testing::Combine( + ::testing::ValuesIn(kernels3D), + ::testing::ValuesIn(strides3D), + ::testing::Values(std::vector({0, 0, 0})), + ::testing::Values(std::vector({0, 0, 0})), + ::testing::ValuesIn(dilations3D), + ::testing::ValuesIn(numOutChannels), + ::testing::Values(ngraph::op::PadType::VALID) +); + +INSTANTIATE_TEST_CASE_P(ConvolutionBackpropData3D_ExplicitPadding, ConvolutionBackpropDataLayerTest, + ::testing::Combine( + conv3DParams_ExplicitPadding, + ::testing::ValuesIn(netPrecisions3D), + ::testing::ValuesIn(inputShapes3D), + ::testing::Values(CommonTestUtils::DEVICE_GPU)), + ConvolutionBackpropDataLayerTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(ConvolutionBackpropData3D_AutoPadValid, ConvolutionBackpropDataLayerTest, + ::testing::Combine( + conv3DParams_AutoPadValid, + ::testing::ValuesIn(netPrecisions3D), + ::testing::ValuesIn(inputShapes3D), + ::testing::Values(CommonTestUtils::DEVICE_GPU)), + ConvolutionBackpropDataLayerTest::getTestCaseName); + +} // namespace diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/eltwise.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/eltwise.cpp new file mode 100644 index 00000000000..f9688395115 --- /dev/null +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/eltwise.cpp @@ -0,0 +1,56 @@ +// Copyright (C) 2020 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// +// NOTE: WILL BE REWORKED (31905) + +#include + +#include + +#include "common_test_utils/common_layers_params.hpp" +#include "common_test_utils/common_utils.hpp" +#include "common_test_utils/test_common.hpp" +#include "common_test_utils/test_constants.hpp" +#include "common_test_utils/xml_net_builder/ir_net.hpp" +#include "common_test_utils/xml_net_builder/xml_filler.hpp" +#include "functional_test_utils/layer_test_utils.hpp" +#include "ngraph_functions/builders.hpp" +#include "ngraph_functions/utils/ngraph_helpers.hpp" +#include "ie_core.hpp" +#include "single_layer_tests/eltwise.hpp" + +using namespace EltwiseTestNamespace; + +std::vector operations = { EltwiseOpType::ADD, EltwiseOpType::SUBSTRACT, EltwiseOpType::MULTIPLY }; +std::vector primary_input_idx = { 0, 1 }; +std::vector secondary_input_types = { InputLayerType::CONSTANT , InputLayerType::PARAMETER }; +std::vector net_precisions = { InferenceEngine::Precision::FP32, InferenceEngine::Precision::FP16 }; +std::vector flat_shapes = { {1, 200}, {1, 2000}, {1, 20000} }; +std::vector non_flat_shapes = { {2, 200}, {10, 200}, {1, 10, 100}, {4, 4, 16} }; +std::map additional_config = {}; + +const auto FlatEltwiseParams = +::testing::Combine( + ::testing::ValuesIn(operations), + ::testing::ValuesIn(primary_input_idx), + ::testing::ValuesIn(secondary_input_types), + ::testing::ValuesIn(net_precisions), + ::testing::ValuesIn(flat_shapes), + ::testing::Values(CommonTestUtils::DEVICE_GPU), + ::testing::Values(additional_config)); + +const auto NonFlatEltwiseParams = +::testing::Combine( + ::testing::ValuesIn(operations), + ::testing::ValuesIn(primary_input_idx), + ::testing::ValuesIn(secondary_input_types), + ::testing::ValuesIn(net_precisions), + ::testing::ValuesIn(non_flat_shapes), + ::testing::Values(CommonTestUtils::DEVICE_GPU), + ::testing::Values(additional_config)); + +INSTANTIATE_TEST_CASE_P(Eltwise_flat, EltwiseLayerTest, FlatEltwiseParams, + EltwiseLayerTest::getTestCaseName); +INSTANTIATE_TEST_CASE_P(Eltwise_non_flat, EltwiseLayerTest, NonFlatEltwiseParams, + EltwiseLayerTest::getTestCaseName); diff --git a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/behavior/set_preprocess.cpp b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/behavior/set_preprocess.cpp new file mode 100644 index 00000000000..7df275ae6a5 --- /dev/null +++ b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/behavior/set_preprocess.cpp @@ -0,0 +1,37 @@ +// Copyright (C) 2018-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "multi-device/multi_device_config.hpp" + +#include "behavior/set_preprocess.hpp" + +using namespace LayerTestsDefinitions; + +namespace { + const std::vector netPrecisions = { + InferenceEngine::Precision::FP16 + }; + + const std::vector> configs = { + {}, + }; + + const std::vector> multiConfigs = { + {{ InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_MYRIAD}} + }; + + INSTANTIATE_TEST_CASE_P(smoke_BehaviorTests, PreProcessTests, + ::testing::Combine( + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_MYRIAD), + ::testing::ValuesIn(configs)), + PreProcessTests::getTestCaseName); + + INSTANTIATE_TEST_CASE_P(smoke_Multi_BehaviorTests, PreProcessTests, + ::testing::Combine( + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_MULTI), + ::testing::ValuesIn(multiConfigs)), + PreProcessTests::getTestCaseName); +} // namespace \ No newline at end of file diff --git a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/equal.cpp b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/equal.cpp new file mode 100644 index 00000000000..77ac1e42b23 --- /dev/null +++ b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/equal.cpp @@ -0,0 +1,30 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "single_layer_tests/equal.hpp" + +#include "common_test_utils/test_constants.hpp" + +#include + +using namespace LayerTestsDefinitions; + +namespace { + +std::vector> inShapes = { + {{200}, {200}}, + {{1000}, {1}}, + {{1, 256, 512}, {1, 256, 512}}, + {{1}, {1, 256, 512}}, +}; + +INSTANTIATE_TEST_CASE_P(equalS32, EqualLayerTest, + ::testing::Combine( + ::testing::ValuesIn(inShapes), + ::testing::Values(InferenceEngine::Precision::I32), + ::testing::Values(InferenceEngine::Precision::I32), + ::testing::Values(CommonTestUtils::DEVICE_MYRIAD)), + EqualLayerTest::getTestCaseName); + +} // namespace diff --git a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/greater.cpp b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/greater.cpp new file mode 100644 index 00000000000..7abfa6fead2 --- /dev/null +++ b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/single_layer_tests/greater.cpp @@ -0,0 +1,30 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "single_layer_tests/greater.hpp" + +#include "common_test_utils/test_constants.hpp" + +#include + +using namespace LayerTestsDefinitions; + +namespace { + +std::vector> inShapes = { + {{200}, {200}}, + {{1000}, {1}}, + {{1, 256, 512}, {1, 256, 512}}, + {{1}, {1, 256, 512}}, +}; + +INSTANTIATE_TEST_CASE_P(greaterS32, GreaterLayerTest, + ::testing::Combine( + ::testing::ValuesIn(inShapes), + ::testing::Values(InferenceEngine::Precision::FP16), + ::testing::Values(InferenceEngine::Precision::I32), + ::testing::Values(CommonTestUtils::DEVICE_MYRIAD)), + GreaterLayerTest::getTestCaseName); + +} // namespace diff --git a/inference-engine/tests/functional/plugin/shared/include/behavior/add_output.hpp b/inference-engine/tests/functional/plugin/shared/include/behavior/add_output.hpp new file mode 100644 index 00000000000..81a5770d804 --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/include/behavior/add_output.hpp @@ -0,0 +1,27 @@ +// Copyright (C) 2018-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + + +#include "common_test_utils/test_common.hpp" +#include + +typedef std::tuple< + InferenceEngine::CNNNetwork, // CNNNetwork to work with + std::vector, // Target layers to add as outputs + std::string> // Target device name + addOutputsParams; + +class AddOutputsTest : public CommonTestUtils::TestsCommon, + public testing::WithParamInterface { +protected: + InferenceEngine::CNNNetwork net; + std::vector outputsToAdd; + std::string deviceName; + + void SetUp(); +public: + static std::string getTestCaseName(const testing::TestParamInfo &obj); +}; diff --git a/inference-engine/tests/functional/plugin/shared/include/behavior/set_preprocess.hpp b/inference-engine/tests/functional/plugin/shared/include/behavior/set_preprocess.hpp new file mode 100644 index 00000000000..4c3402d6166 --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/include/behavior/set_preprocess.hpp @@ -0,0 +1,34 @@ +// Copyright (C) 2018-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include +#include "ie_extension.h" +#include +#include "functional_test_utils/layer_test_utils.hpp" +#include "ngraph_functions/utils/ngraph_helpers.hpp" +#include "ngraph_functions/builders.hpp" + +namespace LayerTestsDefinitions { + typedef std::tuple< + InferenceEngine::Precision, // Network precision + std::string, // Device name + std::map // Config + > PreProcessParams; + +class PreProcessTests : public testing::WithParamInterface, + public LayerTestsUtils::LayerTestsCommon { +public: + static std::string getTestCaseName(testing::TestParamInfo obj); + +protected: + void SetUp() override; + void TearDown() override; +}; + +} // namespace LayerTestsDefinitions \ No newline at end of file diff --git a/inference-engine/tests/functional/plugin/shared/include/other/add_output.hpp b/inference-engine/tests/functional/plugin/shared/include/other/add_output.hpp deleted file mode 100644 index b1fc8d20104..00000000000 --- a/inference-engine/tests/functional/plugin/shared/include/other/add_output.hpp +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright (C) 2020 Intel Corporation -// -// SPDX-License-Identifier: Apache-2.0 -// - -#include - -#include - -#include "common_test_utils/common_layers_params.hpp" -#include "common_test_utils/common_utils.hpp" -#include "common_test_utils/test_common.hpp" -#include "common_test_utils/test_constants.hpp" -#include "common_test_utils/xml_net_builder/ir_net.hpp" -#include "common_test_utils/xml_net_builder/xml_filler.hpp" -#include "ie_core.hpp" - -class AddOutputTestsCommonClass : public CommonTestUtils::TestsCommon, - public testing::WithParamInterface> { -private: - static std::string generate_model(); - -public: - static std::string getTestCaseName(testing::TestParamInfo> obj); - void run_test(); -}; diff --git a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/convolution_backprop_data.hpp b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/convolution_backprop_data.hpp new file mode 100644 index 00000000000..4b8940bae76 --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/convolution_backprop_data.hpp @@ -0,0 +1,43 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +#include "functional_test_utils/layer_test_utils.hpp" +#include "ngraph_functions/builders.hpp" +#include "ngraph_functions/utils/ngraph_helpers.hpp" + +typedef std::tuple< + InferenceEngine::SizeVector, // Kernel size + InferenceEngine::SizeVector, // Strides + std::vector, // Pad begin + std::vector, // Pad end + InferenceEngine::SizeVector, // Dilation + size_t, // Num out channels + ngraph::op::PadType // Padding type +> convBackpropDataSpecificParams; +typedef std::tuple< + convBackpropDataSpecificParams, + InferenceEngine::Precision, // Net precision + InferenceEngine::SizeVector, // Input shapes + LayerTestsUtils::TargetDevice // Device name +> convBackpropDataLayerTestParamsSet; +namespace LayerTestsDefinitions { + + +class ConvolutionBackpropDataLayerTest : public testing::WithParamInterface, + public LayerTestsUtils::LayerTestsCommon { +public: + static std::string getTestCaseName(testing::TestParamInfo obj); + +protected: + void SetUp() override; +}; + +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/cum_sum.hpp b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/cum_sum.hpp new file mode 100644 index 00000000000..161e4ddfa2e --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/cum_sum.hpp @@ -0,0 +1,31 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include "functional_test_utils/layer_test_utils.hpp" +#include "ngraph_functions/builders.hpp" + +namespace LayerTestsDefinitions { + +typedef std::tuple< + InferenceEngine::SizeVector, // Input shapes + InferenceEngine::Precision, // Input precision + int64_t, // Axis + bool, // Exclusive + bool, // Reverse + std::string> cumSumParams; // Device name + +class CumSumLayerTest : public testing::WithParamInterface, public LayerTestsUtils::LayerTestsCommon { +public: + static std::string getTestCaseName(testing::TestParamInfo obj); + +protected: + void SetUp() override; +}; + +} // namespace LayerTestsDefinitions \ No newline at end of file diff --git a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/eltwise.hpp b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/eltwise.hpp new file mode 100644 index 00000000000..c7519b8d7f4 --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/eltwise.hpp @@ -0,0 +1,50 @@ +// Copyright (C) 2020 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// +// NOTE: WILL BE REWORKED (31905) + +#include + +#include + +#include "common_test_utils/common_layers_params.hpp" +#include "common_test_utils/common_utils.hpp" +#include "common_test_utils/test_common.hpp" +#include "common_test_utils/test_constants.hpp" +#include "ie_core.hpp" + +namespace EltwiseTestNamespace { + + using ParameterInputIdx = int; + enum class InputLayerType { + CONSTANT, + PARAMETER + }; + enum class EltwiseOpType { + ADD, + SUBSTRACT, + MULTIPLY + }; + const char* InputLayerType_to_string(InputLayerType lt); + const char* EltwiseOpType_to_string(EltwiseOpType eOp); +}// namespace EltwiseTestNamespace + +typedef std::tuple< + EltwiseTestNamespace::EltwiseOpType, // eltwise op type + EltwiseTestNamespace::ParameterInputIdx, // primary input idx + EltwiseTestNamespace::InputLayerType, // secondary input type + InferenceEngine::Precision, // Net precision + InferenceEngine::SizeVector, // Input shapes + std::string, // Device name + std::map // Additional network configuration +> eltwiseLayerTestParamsSet; + +class EltwiseLayerTest : public testing::WithParamInterface, + public LayerTestsUtils::LayerTestsCommon { +protected: + void SetUp() override; + +public: + static std::string getTestCaseName(testing::TestParamInfo obj); +}; diff --git a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/equal.hpp b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/equal.hpp new file mode 100644 index 00000000000..d2b04edc03e --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/equal.hpp @@ -0,0 +1,35 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "functional_test_utils/layer_test_utils.hpp" + +#include "ngraph_functions/builders.hpp" +#include "ngraph_functions/utils/ngraph_helpers.hpp" + +#include +#include +#include +#include +#include + +namespace LayerTestsDefinitions { + +using EqualTestParam = typename std::tuple< + std::vector, // Input shapes + InferenceEngine::Precision, // Input precision + InferenceEngine::Precision, // Output precision + LayerTestsUtils::TargetDevice>; // Config + +class EqualLayerTest : public testing::WithParamInterface, + public LayerTestsUtils::LayerTestsCommon { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj); + +protected: + void SetUp() override; +}; + +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/fake_quantize.hpp b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/fake_quantize.hpp new file mode 100644 index 00000000000..32e1816bfdf --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/fake_quantize.hpp @@ -0,0 +1,38 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +#include "functional_test_utils/layer_test_utils.hpp" +#include "ngraph_functions/builders.hpp" +#include "ngraph_functions/utils/ngraph_helpers.hpp" + +typedef std::tuple< + size_t, // levels + std::vector // const inputs shape +> fqSpecificParams; +typedef std::tuple< + fqSpecificParams, + InferenceEngine::Precision, // Net precision + InferenceEngine::SizeVector, // Input shapes + LayerTestsUtils::TargetDevice // Device name +> fqLayerTestParamsSet; +namespace LayerTestsDefinitions { + + +class FakeQuantizeLayerTest : public testing::WithParamInterface, + public LayerTestsUtils::LayerTestsCommon { +public: + static std::string getTestCaseName(testing::TestParamInfo obj); + +protected: + void SetUp() override; +}; + +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/greater.hpp b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/greater.hpp new file mode 100644 index 00000000000..4d740ce218d --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/greater.hpp @@ -0,0 +1,35 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "functional_test_utils/layer_test_utils.hpp" + +#include "ngraph_functions/builders.hpp" +#include "ngraph_functions/utils/ngraph_helpers.hpp" + +#include +#include +#include +#include +#include + +namespace LayerTestsDefinitions { + +using GreaterTestParam = typename std::tuple< + std::vector, // Input shapes + InferenceEngine::Precision, // Input precision + InferenceEngine::Precision, // Output precision + LayerTestsUtils::TargetDevice>; // Config + +class GreaterLayerTest : public testing::WithParamInterface, + public LayerTestsUtils::LayerTestsCommon { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj); + +protected: + void SetUp() override; +}; + +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/group_convolution_backprop_data.hpp b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/group_convolution_backprop_data.hpp new file mode 100644 index 00000000000..cfe92d9e039 --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/include/single_layer_tests/group_convolution_backprop_data.hpp @@ -0,0 +1,42 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +#include "functional_test_utils/layer_test_utils.hpp" +#include "ngraph_functions/builders.hpp" +#include "ngraph_functions/utils/ngraph_helpers.hpp" + +typedef std::tuple< + InferenceEngine::SizeVector, + InferenceEngine::SizeVector, + std::vector, + std::vector, + InferenceEngine::SizeVector, + size_t, + size_t, + ngraph::op::PadType> groupConvBackpropDataSpecificParams; +typedef std::tuple< + groupConvBackpropDataSpecificParams, + InferenceEngine::Precision, + InferenceEngine::SizeVector, + LayerTestsUtils::TargetDevice> groupConvBackpropDataLayerTestParamsSet; + +namespace LayerTestsDefinitions { + +class GroupConvBackpropDataLayerTest : public testing::WithParamInterface, + public LayerTestsUtils::LayerTestsCommon { +public: + static std::string getTestCaseName(testing::TestParamInfo obj); + +protected: + void SetUp() override; +}; + +} // namespace LayerTestsDefinitions \ No newline at end of file diff --git a/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/quantized_convolution_backprop_data.hpp b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/quantized_convolution_backprop_data.hpp new file mode 100644 index 00000000000..d5b01edc63c --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/quantized_convolution_backprop_data.hpp @@ -0,0 +1,43 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +#include "functional_test_utils/layer_test_utils.hpp" +#include "ngraph_functions/builders.hpp" +#include "ngraph_functions/utils/ngraph_helpers.hpp" + +typedef std::tuple< + InferenceEngine::SizeVector, + InferenceEngine::SizeVector, + std::vector, + std::vector, + InferenceEngine::SizeVector, + size_t, + ngraph::op::PadType, + size_t, + ngraph::helpers::QuantizationGranularity> quantConvBackpropDataSpecificParams; +typedef std::tuple< + quantConvBackpropDataSpecificParams, + InferenceEngine::Precision, + InferenceEngine::SizeVector, + LayerTestsUtils::TargetDevice> quantConvBackpropDataLayerTestParamsSet; + +namespace LayerTestsDefinitions { + +class QuantConvBackpropDataLayerTest : public testing::WithParamInterface, + public LayerTestsUtils::LayerTestsCommon { +public: + static std::string getTestCaseName(testing::TestParamInfo obj); + +protected: + void SetUp() override; +}; + +} // namespace LayerTestsDefinitions \ No newline at end of file diff --git a/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/quantized_group_convolution_backprop_data.hpp b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/quantized_group_convolution_backprop_data.hpp new file mode 100644 index 00000000000..99212d9fcdd --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/include/subgraph_tests/quantized_group_convolution_backprop_data.hpp @@ -0,0 +1,44 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +#include "functional_test_utils/layer_test_utils.hpp" +#include "ngraph_functions/builders.hpp" +#include "ngraph_functions/utils/ngraph_helpers.hpp" + +typedef std::tuple< + InferenceEngine::SizeVector, + InferenceEngine::SizeVector, + std::vector, + std::vector, + InferenceEngine::SizeVector, + size_t, + size_t, + ngraph::op::PadType, + size_t, + ngraph::helpers::QuantizationGranularity> quantGroupConvBackpropDataSpecificParams; +typedef std::tuple< + quantGroupConvBackpropDataSpecificParams, + InferenceEngine::Precision, + InferenceEngine::SizeVector, + LayerTestsUtils::TargetDevice> quantGroupConvBackpropDataLayerTestParamsSet; + +namespace LayerTestsDefinitions { + +class QuantGroupConvBackpropDataLayerTest : public testing::WithParamInterface, + public LayerTestsUtils::LayerTestsCommon { +public: + static std::string getTestCaseName(testing::TestParamInfo obj); + +protected: + void SetUp() override; +}; + +} // namespace LayerTestsDefinitions \ No newline at end of file diff --git a/inference-engine/tests/functional/plugin/shared/src/behavior/add_output.cpp b/inference-engine/tests/functional/plugin/shared/src/behavior/add_output.cpp new file mode 100644 index 00000000000..df2f09eba51 --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/src/behavior/add_output.cpp @@ -0,0 +1,39 @@ +// Copyright (C) 2018-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + + +#include +#include "behavior/add_output.hpp" +#include "functional_test_utils/plugin_cache.hpp" + +std::string AddOutputsTest::getTestCaseName(const testing::TestParamInfo &obj) { + std::ostringstream results; + InferenceEngine::CNNNetwork net; + std::vector outputsToAdd; + std::string deviceName; + std::tie(net, outputsToAdd, deviceName) = obj.param; + results << "Outputs:" << CommonTestUtils::vec2str(outputsToAdd); + return results.str(); +} + +void AddOutputsTest::SetUp() { + std::tie(net, outputsToAdd, deviceName) = GetParam(); +} + +TEST_P(AddOutputsTest, smoke_CheckOutputExist) { + std::vector expectedOutputs = outputsToAdd; + for (const auto &out : net.getOutputsInfo()) { + expectedOutputs.push_back(out.first); + } + for (const auto &out : outputsToAdd) { + net.addOutput(out); + } + auto ie = PluginCache::get().ie(deviceName); + auto executableNet = ie->LoadNetwork(net, deviceName); + auto outputs = executableNet.GetOutputsInfo(); + + for (const auto &out : expectedOutputs) { + ASSERT_TRUE(outputs.count(out)) << "Layer " << out << " expected to be in network outputs but it's not!"; + } +} \ No newline at end of file diff --git a/inference-engine/tests/functional/plugin/shared/src/behavior/infer_request_callback.cpp b/inference-engine/tests/functional/plugin/shared/src/behavior/infer_request_callback.cpp index b7a14a54819..544b3729096 100644 --- a/inference-engine/tests/functional/plugin/shared/src/behavior/infer_request_callback.cpp +++ b/inference-engine/tests/functional/plugin/shared/src/behavior/infer_request_callback.cpp @@ -290,4 +290,4 @@ TEST_P(CallbackTests, returnGeneralErrorIfCallbackThrowException) { ASSERT_NE(std::string(resp.msg).find("returnGeneralErrorIfCallbackThrowException"), std::string::npos); } -} // namespace LayerTestsDefinitions \ No newline at end of file +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/shared/src/behavior/set_preprocess.cpp b/inference-engine/tests/functional/plugin/shared/src/behavior/set_preprocess.cpp new file mode 100644 index 00000000000..8eea245b423 --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/src/behavior/set_preprocess.cpp @@ -0,0 +1,102 @@ +// Copyright (C) 2018-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include +#include + +#include +#include +#include "common_test_utils/test_assertions.hpp" +#include "common_test_utils/common_utils.hpp" +#include "functional_test_utils/plugin_cache.hpp" +#include "functional_test_utils/layer_test_utils.hpp" +#include "functional_test_utils/blob_utils.hpp" +#include "ie_preprocess.hpp" +#include "ngraph_functions/pass/convert_prc.hpp" +#include "ngraph_functions/subgraph_builders.hpp" +#include "behavior/set_preprocess.hpp" + +namespace LayerTestsDefinitions { + std::string PreProcessTests::getTestCaseName(testing::TestParamInfo obj) { + InferenceEngine::Precision netPrecision; + std::string targetDevice; + std::map configuration; + std::tie(netPrecision, targetDevice, configuration) = obj.param; + std::ostringstream result; + result << "netPRC=" << netPrecision.name() << "_"; + result << "targetDevice=" << targetDevice; + if (!configuration.empty()) { + result << "configItem=" << configuration.begin()->first << "_" << configuration.begin()->second; + } + return result.str(); + } + + void PreProcessTests::SetUp() { + InferenceEngine::Precision netPrecision; + std::tie(netPrecision, targetDevice, configuration) = this->GetParam(); + function = ngraph::builder::subgraph::makeConvPoolRelu(); + } + + void PreProcessTests::TearDown() { + if (targetDevice.find(CommonTestUtils::DEVICE_GPU) != std::string::npos) { + PluginCache::get().reset(); + } + } + +TEST_P(PreProcessTests, SetPreProcessToInputInfo) { + // Skip test according to plugin specific disabledTestPatterns() (if any) + SKIP_IF_CURRENT_TEST_IS_DISABLED() + // Create CNNNetwork from ngrpah::Function + InferenceEngine::CNNNetwork cnnNet(function); + + auto &preProcess = cnnNet.getInputsInfo().begin()->second->getPreProcess(); + preProcess.setResizeAlgorithm(InferenceEngine::ResizeAlgorithm::RESIZE_BILINEAR); + + // Get Core from cache + auto ie = PluginCache::get().ie(); + // Load CNNNetwork to target plugins + auto execNet = ie->LoadNetwork(cnnNet, targetDevice, configuration); + // Create InferRequest + auto req = execNet.CreateInferRequest(); + { + InferenceEngine::ConstInputsDataMap inputsMap = execNet.GetInputsInfo(); + const auto& name = inputsMap.begin()->second->name(); + const InferenceEngine::PreProcessInfo *info = &req.GetPreProcess(name.c_str()); + ASSERT_EQ(info->getResizeAlgorithm(), InferenceEngine::ResizeAlgorithm::RESIZE_BILINEAR); + ASSERT_PREPROCESS_INFO_EQ(preProcess, *info); + } + function.reset(); + } + +TEST_P(PreProcessTests, SetPreProcessToInferRequest) { + // Skip test according to plugin specific disabledTestPatterns() (if any) + SKIP_IF_CURRENT_TEST_IS_DISABLED() + // Create CNNNetwork from ngrpah::Function + InferenceEngine::CNNNetwork cnnNet(function); + + auto &preProcess = cnnNet.getInputsInfo().begin()->second->getPreProcess(); + preProcess.setResizeAlgorithm(InferenceEngine::ResizeAlgorithm::RESIZE_BILINEAR); + + // Get Core from cache + auto ie = PluginCache::get().ie(); + // Load CNNNetwork to target plugins + auto execNet = ie->LoadNetwork(cnnNet, targetDevice, configuration); + // Create InferRequest + auto req = execNet.CreateInferRequest(); + InferenceEngine::ConstInputsDataMap inputsMap = execNet.GetInputsInfo(); + const auto& name = inputsMap.begin()->second->name(); + auto inputBlob = FuncTestUtils::createAndFillBlob( + cnnNet.getInputsInfo().begin()->second->getTensorDesc()); + req.SetBlob(cnnNet.getInputsInfo().begin()->first, inputBlob); + { + const InferenceEngine::PreProcessInfo *info = &req.GetPreProcess(name.c_str()); + ASSERT_EQ(cnnNet.getInputsInfo().begin()->second->getPreProcess().getResizeAlgorithm(), + info->getResizeAlgorithm()); + } + function.reset(); + } + +} // namespace LayerTestsDefinitions \ No newline at end of file diff --git a/inference-engine/tests/functional/plugin/shared/src/other/add_output.cpp b/inference-engine/tests/functional/plugin/shared/src/other/add_output.cpp deleted file mode 100644 index 3c66b8d0ce6..00000000000 --- a/inference-engine/tests/functional/plugin/shared/src/other/add_output.cpp +++ /dev/null @@ -1,76 +0,0 @@ -// Copyright (C) 2020 Intel Corporation -// -// SPDX-License-Identifier: Apache-2.0 -// - -#include "other/add_output.hpp" - -// TODO: Replace IRBuilder with NGraph when it supports Memory Layer -std::string AddOutputTestsCommonClass::generate_model() { - CommonTestUtils::IRBuilder_v6 test_model_builder("model"); - - auto precision = InferenceEngine::Precision::FP32; - - auto Memory_1_layer = - test_model_builder.AddLayer("Memory_1", "Memory", precision, {{"id", "r_1-3"}, {"index", "1"}, {"size", "2"}}) - .AddOutPort({1, 200}) - .getLayer(); - auto Input_2_layer = test_model_builder.AddLayer("Input_2", "input", precision).AddOutPort({1, 200}).getLayer(); - auto Eltwise_3_layer = test_model_builder.AddLayer("Eltwise_3", "Eltwise", precision, {{"operation", "mul"}}) - .AddInPort({1, 200}) - .AddInPort({1, 200}) - .AddOutPort({1, 200}) - .getLayer(); - - auto Activation_4_layer = - test_model_builder.AddLayer("Activation_4", "Activation", precision, {{"type", "sigmoid"}}) - .AddInPort({1, 200}) - .AddOutPort({1, 200}) - .getLayer(); - auto Memory_5_layer = - test_model_builder.AddLayer("Memory_5", "Memory", precision, {{"id", "r_1-3"}, {"index", "0"}, {"size", "2"}}) - .AddInPort({1, 200}) - .getLayer(); - - test_model_builder.AddEdge(Memory_1_layer.out(0), Eltwise_3_layer.in(0)); - test_model_builder.AddEdge(Input_2_layer.out(0), Eltwise_3_layer.in(1)); - test_model_builder.AddEdge(Eltwise_3_layer.out(0), Activation_4_layer.in(0)); - test_model_builder.AddEdge(Activation_4_layer.out(0), Memory_5_layer.in(0)); - - auto serial = test_model_builder.serialize(); - - return serial; -} - -std::string AddOutputTestsCommonClass::getTestCaseName( - testing::TestParamInfo> obj) { - std::string layer; - std::string engine; - - std::tie(layer, engine) = obj.param; - return layer + "_" + engine; -} - -void AddOutputTestsCommonClass::run_test() { - std::string layer_name; - std::string engine_type; - - std::tie(layer_name, engine_type) = this->GetParam(); - - auto model = this->generate_model(); - - InferenceEngine::Core ie; - InferenceEngine::CNNNetwork network; - InferenceEngine::ExecutableNetwork executableNet; - - auto null_blob = CommonTestUtils::getWeightsBlob(0); - network = ie.ReadNetwork(model, null_blob); - network.addOutput(layer_name); - executableNet = ie.LoadNetwork(network, engine_type); - - auto outputs = executableNet.GetOutputsInfo(); - - auto layer_output = outputs[layer_name]; - - ASSERT_EQ(true, layer_output && "layer not found in outputs"); -} diff --git a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/convolution_backprop_data.cpp b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/convolution_backprop_data.cpp new file mode 100644 index 00000000000..d18cd6736b2 --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/convolution_backprop_data.cpp @@ -0,0 +1,73 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include +#include +#include +#include + +#include "ie_core.hpp" + +#include "common_test_utils/common_utils.hpp" +#include "functional_test_utils/blob_utils.hpp" +#include "functional_test_utils/plugin_cache.hpp" +#include "functional_test_utils/layer_test_utils.hpp" + +#include "single_layer_tests/convolution_backprop_data.hpp" + +namespace LayerTestsDefinitions { + +std::string ConvolutionBackpropDataLayerTest::getTestCaseName(testing::TestParamInfo obj) { + convBackpropDataSpecificParams convBackpropDataParams; + InferenceEngine::Precision netPrecision; + InferenceEngine::SizeVector inputShapes; + std::string targetDevice; + std::tie(convBackpropDataParams, netPrecision, inputShapes, targetDevice) = obj.param; + ngraph::op::PadType padType; + InferenceEngine::SizeVector kernel, stride, dilation; + std::vector padBegin, padEnd; + size_t convOutChannels; + std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, padType) = convBackpropDataParams; + + std::ostringstream result; + result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_"; + result << "K" << CommonTestUtils::vec2str(kernel) << "_"; + result << "S" << CommonTestUtils::vec2str(stride) << "_"; + result << "PB" << CommonTestUtils::vec2str(padBegin) << "_"; + result << "PE" << CommonTestUtils::vec2str(padEnd) << "_"; + result << "D=" << CommonTestUtils::vec2str(dilation) << "_"; + result << "O=" << convOutChannels << "_"; + result << "AP=" << padType << "_"; + result << "netPRC=" << netPrecision.name() << "_"; + result << "targetDevice=" << targetDevice; + return result.str(); +} + +void ConvolutionBackpropDataLayerTest::SetUp() { + convBackpropDataSpecificParams convBackpropDataParams; + std::vector inputShape; + auto netPrecision = InferenceEngine::Precision::UNSPECIFIED; + std::tie(convBackpropDataParams, netPrecision, inputShape, targetDevice) = this->GetParam(); + ngraph::op::PadType padType; + InferenceEngine::SizeVector kernel, stride, dilation; + std::vector padBegin, padEnd; + size_t convOutChannels; + std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, padType) = convBackpropDataParams; + auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); + auto params = ngraph::builder::makeParams(ngPrc, {inputShape}); + auto paramOuts = ngraph::helpers::convert2OutputVector( + ngraph::helpers::castOps2Nodes(params)); + auto convBackpropData = std::dynamic_pointer_cast( + ngraph::builder::makeConvolutionBackpropData(paramOuts[0], ngPrc, kernel, stride, padBegin, + padEnd, dilation, padType, convOutChannels)); + ngraph::ResultVector results{std::make_shared(convBackpropData)}; + function = std::make_shared(results, params, "convolutionBackpropData"); +} + +TEST_P(ConvolutionBackpropDataLayerTest, CompareWithRefs) { + Run(); +} +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/cum_sum.cpp b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/cum_sum.cpp new file mode 100644 index 00000000000..ebb7d46a610 --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/cum_sum.cpp @@ -0,0 +1,64 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include +#include + +#include "ie_core.hpp" +#include "ngraph_functions/utils/ngraph_helpers.hpp" + +#include "common_test_utils/common_utils.hpp" +#include "functional_test_utils/blob_utils.hpp" +#include "functional_test_utils/plugin_cache.hpp" +#include "functional_test_utils/layer_test_utils.hpp" + +#include "single_layer_tests/cum_sum.hpp" + +namespace LayerTestsDefinitions { + +std::string CumSumLayerTest::getTestCaseName(testing::TestParamInfo obj) { + InferenceEngine::SizeVector inputShapes; + InferenceEngine::Precision inputPrecision; + int64_t axis; + bool exclusive, reverse; + std::string targetDevice; + std::tie(inputShapes, inputPrecision, axis, exclusive, reverse, targetDevice) = obj.param; + + std::ostringstream result; + result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_"; + result << "Precision=" << inputPrecision.name() << "_"; + result << "Axis=" << axis << "_"; + result << "Exclusive=" << (exclusive ? "TRUE" : "FALSE") << "_"; + result << "Reverse=" << (reverse ? "TRUE" : "FALSE") << "_"; + result << "TargetDevice=" << targetDevice; + return result.str(); +} + +void CumSumLayerTest::SetUp() { + InferenceEngine::SizeVector inputShapes; + InferenceEngine::Precision inputPrecision; + bool exclusive, reverse; + int64_t axis; + std::tie(inputShapes, inputPrecision, axis, exclusive, reverse, targetDevice) = this->GetParam(); + auto inType = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inputPrecision); + ngraph::ParameterVector paramVector; + auto paramData = std::make_shared(inType, ngraph::Shape(inputShapes)); + paramVector.push_back(paramData); + + auto axisNode = std::make_shared(ngraph::element::Type_t::i64, ngraph::Shape{}, std::vector{axis})->output(0); + + auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(paramVector)); + auto cumSum = std::dynamic_pointer_cast(ngraph::builder::makeCumSum(paramOuts[0], axisNode, exclusive, reverse)); + + ngraph::ResultVector results{std::make_shared(cumSum)}; + function = std::make_shared(results, paramVector, "cumsum"); +} + +TEST_P(CumSumLayerTest, CompareWithRefs) { + Run(); +}; + +} // namespace LayerTestsDefinitions \ No newline at end of file diff --git a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/eltwise.cpp b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/eltwise.cpp new file mode 100644 index 00000000000..a92d607239b --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/eltwise.cpp @@ -0,0 +1,155 @@ +// Copyright (C) 2020 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include + +#include "common_test_utils/common_layers_params.hpp" +#include "common_test_utils/common_utils.hpp" +#include "common_test_utils/test_common.hpp" +#include "common_test_utils/test_constants.hpp" +#include "common_test_utils/xml_net_builder/ir_net.hpp" +#include "common_test_utils/xml_net_builder/xml_filler.hpp" +#include "functional_test_utils/layer_test_utils.hpp" +#include "ngraph_functions/builders.hpp" +#include "ngraph_functions/utils/ngraph_helpers.hpp" +#include "ie_core.hpp" +#include "single_layer_tests/eltwise.hpp" + +using namespace EltwiseTestNamespace; + +std::string EltwiseLayerTest::getTestCaseName(testing::TestParamInfo obj) { + EltwiseOpType op; + ParameterInputIdx primary_input_idx; + InputLayerType secondary_input_type; + InferenceEngine::Precision prec; + InferenceEngine::SizeVector vec; + LayerTestsUtils::TargetDevice dev; + std::map additional_config; + std::tie(op, primary_input_idx, secondary_input_type, prec, vec, dev, additional_config) = obj.param; + + std::ostringstream result; + result << "operation=" << EltwiseOpType_to_string(op) << "_"; + result << "netPRC=" << prec.name() << "_"; + result << "primaryInputIdx=" << primary_input_idx << "_"; + result << "secondaryInputType=" << InputLayerType_to_string(secondary_input_type) << "_"; + result << "inputShapes=" << CommonTestUtils::vec2str(vec) << "_"; + result << "targetDevice=" << dev; + return result.str(); +} + +void EltwiseLayerTest::SetUp() { + EltwiseOpType op; + ParameterInputIdx primary_input_idx; + InputLayerType secondary_input_type; + InferenceEngine::SizeVector inputShape; + InferenceEngine::Precision netPrecision; + ngraph::ParameterVector parameter_inputs; + std::map additional_config; + std::tie(op, primary_input_idx, secondary_input_type, netPrecision, inputShape, targetDevice, additional_config) = this->GetParam(); + configuration.insert(additional_config.begin(), additional_config.end()); + auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); + + std::shared_ptr input0_node; + std::shared_ptr input1_node; + auto primary_input = ngraph::builder::makeParams(ngPrc, { inputShape })[0]; + + switch (secondary_input_type) { + case InputLayerType::CONSTANT: + { + auto shape_total = 1; + for (auto dim : inputShape) { + shape_total *= dim; + } + + const float min = -10; + const float max = 10; + const float range = max - min; + const float step = range / shape_total; + + std::vector const_vec(shape_total); + for (int i = 0; i < shape_total; i++) { + const_vec[i] = min + step * i; + } + + auto const_vals = ngraph::builder::makeConstant(ngPrc, inputShape, const_vec); + parameter_inputs.push_back(primary_input); + + if (primary_input_idx == 0) { + input0_node = primary_input; + input1_node = const_vals; + } else { + input0_node = const_vals; + input1_node = primary_input; + } + break; + } + case InputLayerType::PARAMETER: + { + auto secondary_input = ngraph::builder::makeParams(ngPrc, { inputShape })[0]; + if (primary_input_idx == 0) { + parameter_inputs.push_back(primary_input); + parameter_inputs.push_back(secondary_input); + input0_node = primary_input; + input1_node = secondary_input; + } else { + parameter_inputs.push_back(secondary_input); + parameter_inputs.push_back(primary_input); + input0_node = secondary_input; + input1_node = primary_input; + } + break; + } + default: + ASSERT_EQ("unknown input type", ""); + break; + } + + std::shared_ptr ngraph_op = nullptr; + switch (op) { + case EltwiseOpType::ADD: + ngraph_op = std::make_shared(input0_node, input1_node); + break; + case EltwiseOpType::MULTIPLY: + ngraph_op = std::make_shared(input0_node, input1_node); + break; + case EltwiseOpType::SUBSTRACT: + ngraph_op = std::make_shared(input0_node, input1_node); + break; + default: + ASSERT_EQ(std::string("Unknown Eltwise operation type: ") + EltwiseOpType_to_string(op), ""); + break; + } + function = std::make_shared(ngraph_op, parameter_inputs, "Eltwise_op"); +} + +const char* EltwiseTestNamespace::InputLayerType_to_string(InputLayerType lt) { + switch (lt) { + case InputLayerType::CONSTANT: + return "CONSTANT"; + case InputLayerType::PARAMETER: + return "PARAMETER"; + default: + return "NOT_SUPPORTED_INPUT_LAYER_TYPE"; + } +} + +const char* EltwiseTestNamespace::EltwiseOpType_to_string(EltwiseOpType eOp) { + switch (eOp) { + case EltwiseOpType::ADD: + return "Sum"; + case EltwiseOpType::MULTIPLY: + return "Prod"; + case EltwiseOpType::SUBSTRACT: + return "Sub"; + default: + return "NOT_SUPPORTED_ELTWISE_OPERATION"; + } +} + +TEST_P(EltwiseLayerTest, basic) { + Run(); +} diff --git a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/equal.cpp b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/equal.cpp new file mode 100644 index 00000000000..06907239540 --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/equal.cpp @@ -0,0 +1,60 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "single_layer_tests/equal.hpp" + +#include "functional_test_utils/blob_utils.hpp" +#include "functional_test_utils/layer_test_utils.hpp" +#include "common_test_utils/common_utils.hpp" + +#include +#include +#include +#include +#include + + +namespace LayerTestsDefinitions { + +std::string EqualLayerTest::getTestCaseName(const testing::TestParamInfo& obj) { + InferenceEngine::Precision inPrecision; + InferenceEngine::Precision outPrecision; + std::vector inputShapes; + std::string targetDevice; + + std::tie(inputShapes, inPrecision, outPrecision, targetDevice) = obj.param; + + std::ostringstream result; + result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_"; + result << "inPrc=" << inPrecision.name() << "_"; + result << "outPrc=" << outPrecision.name() << "_"; + result << "targetDevice=" << targetDevice; + + return result.str(); +} + +void EqualLayerTest::SetUp() { + std::vector inputShapes; + InferenceEngine::Precision inputPrecision = InferenceEngine::Precision::UNSPECIFIED; + + std::tie(inputShapes, inputPrecision, outPrc, targetDevice) = this->GetParam(); + + auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inputPrecision); + auto paramsVector = ngraph::builder::makeParams(ngPrc, {inputShapes}); + IE_ASSERT(paramsVector.size() == 2); + + auto equalOp = std::make_shared(paramsVector[0], paramsVector[1]); + ngraph::ResultVector results{std::make_shared(equalOp)}; + + function = std::make_shared(results, paramsVector, "Equal"); +} + +TEST_P(EqualLayerTest, CompareWithRefs) { + Run(); + + if (targetDevice == std::string{CommonTestUtils::DEVICE_GPU}) { + PluginCache::get().reset(); + } +} +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/fake_quantize.cpp b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/fake_quantize.cpp new file mode 100644 index 00000000000..e16eab43fae --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/fake_quantize.cpp @@ -0,0 +1,64 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include +#include +#include +#include + +#include "ie_core.hpp" + +#include "common_test_utils/common_utils.hpp" +#include "functional_test_utils/blob_utils.hpp" +#include "functional_test_utils/plugin_cache.hpp" +#include "functional_test_utils/layer_test_utils.hpp" + +#include "single_layer_tests/fake_quantize.hpp" + +namespace LayerTestsDefinitions { + +std::string FakeQuantizeLayerTest::getTestCaseName(testing::TestParamInfo obj) { + fqSpecificParams fqParams; + InferenceEngine::Precision netPrecision; + InferenceEngine::SizeVector inputShapes; + std::string targetDevice; + std::tie(fqParams, netPrecision, inputShapes, targetDevice) = obj.param; + size_t levels; + std::vector constShape; + std::tie(levels, constShape) = fqParams; + + std::ostringstream result; + result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_"; + result << "CS=" << CommonTestUtils::vec2str(constShape) << "_"; + result << "LEVELS=" << levels << "_"; + result << "netPRC=" << netPrecision.name() << "_"; + result << "targetDevice=" << targetDevice; + return result.str(); +} + +void FakeQuantizeLayerTest::SetUp() { + fqSpecificParams fqParams; + std::vector inputShape; + auto netPrecision = InferenceEngine::Precision::UNSPECIFIED; + std::tie(fqParams, netPrecision, inputShape, targetDevice) = this->GetParam(); + InferenceEngine::SizeVector kernel, stride, dilation; + size_t levels; + std::vector constShape; + std::tie(levels, constShape) = fqParams; + auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); + auto params = ngraph::builder::makeParams(ngPrc, {inputShape}); + auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); + + auto fq = std::dynamic_pointer_cast(ngraph::builder::makeFakeQuantize(paramOuts[0], ngPrc, levels, constShape)); + + ngraph::ResultVector results{std::make_shared(fq)}; + function = std::make_shared(results, params, "fakeQuantize"); +} + +TEST_P(FakeQuantizeLayerTest, CompareWithRefs) { + Run(); +} +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/greater.cpp b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/greater.cpp new file mode 100644 index 00000000000..49b680b9233 --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/greater.cpp @@ -0,0 +1,60 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "single_layer_tests/greater.hpp" + +#include "functional_test_utils/blob_utils.hpp" +#include "functional_test_utils/layer_test_utils.hpp" +#include "common_test_utils/common_utils.hpp" + +#include +#include +#include +#include +#include + + +namespace LayerTestsDefinitions { + +std::string GreaterLayerTest::getTestCaseName(const testing::TestParamInfo& obj) { + InferenceEngine::Precision inPrecision; + InferenceEngine::Precision outPrecision; + std::vector inputShapes; + std::string targetDevice; + + std::tie(inputShapes, inPrecision, outPrecision, targetDevice) = obj.param; + + std::ostringstream result; + result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_"; + result << "inPrc=" << inPrecision.name() << "_"; + result << "outPrc=" << outPrecision.name() << "_"; + result << "targetDevice=" << targetDevice; + + return result.str(); +} + +void GreaterLayerTest::SetUp() { + std::vector inputShapes; + InferenceEngine::Precision inputPrecision = InferenceEngine::Precision::UNSPECIFIED; + + std::tie(inputShapes, inputPrecision, outPrc, targetDevice) = this->GetParam(); + + auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inputPrecision); + auto paramsVector = ngraph::builder::makeParams(ngPrc, {inputShapes}); + IE_ASSERT(paramsVector.size() == 2); + + auto equalOp = std::make_shared(paramsVector[0], paramsVector[1]); + ngraph::ResultVector results{std::make_shared(equalOp)}; + + function = std::make_shared(results, paramsVector, "Greater"); +} + +TEST_P(GreaterLayerTest, CompareWithRefs) { + Run(); + + if (targetDevice == std::string{CommonTestUtils::DEVICE_GPU}) { + PluginCache::get().reset(); + } +} +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/group_convolution_backprop_data.cpp b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/group_convolution_backprop_data.cpp new file mode 100644 index 00000000000..a3731c8f0e5 --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/src/single_layer_tests/group_convolution_backprop_data.cpp @@ -0,0 +1,74 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include +#include +#include +#include + +#include "ie_core.hpp" + +#include "common_test_utils/common_utils.hpp" +#include "functional_test_utils/blob_utils.hpp" +#include "functional_test_utils/plugin_cache.hpp" +#include "functional_test_utils/layer_test_utils.hpp" + +#include "single_layer_tests/group_convolution_backprop_data.hpp" + +namespace LayerTestsDefinitions { + +std::string GroupConvBackpropDataLayerTest::getTestCaseName(testing::TestParamInfo obj) { + groupConvBackpropDataSpecificParams groupConvBackpropDataParams; + InferenceEngine::Precision netPrecision; + InferenceEngine::SizeVector inputShapes; + std::string targetDevice; + std::tie(groupConvBackpropDataParams, netPrecision, inputShapes, targetDevice) = obj.param; + ngraph::op::PadType padType; + InferenceEngine::SizeVector kernel, stride, dilation; + std::vector padBegin, padEnd; + size_t convOutChannels, numGroups; + std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, numGroups, padType) = groupConvBackpropDataParams; + + std::ostringstream result; + result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_"; + result << "K" << CommonTestUtils::vec2str(kernel) << "_"; + result << "S" << CommonTestUtils::vec2str(stride) << "_"; + result << "PB" << CommonTestUtils::vec2str(padBegin) << "_"; + result << "PE" << CommonTestUtils::vec2str(padEnd) << "_"; + result << "D=" << CommonTestUtils::vec2str(dilation) << "_"; + result << "O=" << convOutChannels << "_"; + result << "G=" << numGroups << "_"; + result << "AP=" << padType << "_"; + result << "netPRC=" << netPrecision.name() << "_"; + result << "targetDevice=" << targetDevice; + return result.str(); +} + +void GroupConvBackpropDataLayerTest::SetUp() { + groupConvBackpropDataSpecificParams groupConvBackpropDataParams; + std::vector inputShape; + auto netPrecision = InferenceEngine::Precision::UNSPECIFIED; + std::tie(groupConvBackpropDataParams, netPrecision, inputShape, targetDevice) = this->GetParam(); + ngraph::op::PadType padType; + InferenceEngine::SizeVector kernel, stride, dilation; + std::vector padBegin, padEnd; + size_t convOutChannels, numGroups; + std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, numGroups, padType) = groupConvBackpropDataParams; + auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); + auto params = ngraph::builder::makeParams(ngPrc, {inputShape}); + auto paramOuts = ngraph::helpers::convert2OutputVector( + ngraph::helpers::castOps2Nodes(params)); + auto groupConvBackpropData = std::dynamic_pointer_cast( + ngraph::builder::makeGroupConvolutionBackpropData(paramOuts[0], ngPrc, kernel, stride, padBegin, + padEnd, dilation, padType, convOutChannels, numGroups)); + ngraph::ResultVector results{std::make_shared(groupConvBackpropData)}; + function = std::make_shared(results, params, "GroupConvolutionBackpropData"); +} + +TEST_P(GroupConvBackpropDataLayerTest, CompareWithRefs) { + Run(); +} +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/quantized_convolution_backprop_data.cpp b/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/quantized_convolution_backprop_data.cpp new file mode 100644 index 00000000000..4c89df886a2 --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/quantized_convolution_backprop_data.cpp @@ -0,0 +1,98 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include +#include +#include +#include + +#include "ie_core.hpp" + +#include "common_test_utils/common_utils.hpp" +#include "functional_test_utils/blob_utils.hpp" +#include "functional_test_utils/plugin_cache.hpp" +#include "functional_test_utils/layer_test_utils.hpp" + +#include "subgraph_tests/quantized_convolution_backprop_data.hpp" + +using ngraph::helpers::QuantizationGranularity; + +namespace LayerTestsDefinitions { + +std::string QuantConvBackpropDataLayerTest::getTestCaseName(testing::TestParamInfo obj) { + quantConvBackpropDataSpecificParams groupConvBackpropDataParams; + InferenceEngine::Precision netPrecision; + InferenceEngine::SizeVector inputShapes; + std::string targetDevice; + std::tie(groupConvBackpropDataParams, netPrecision, inputShapes, targetDevice) = obj.param; + ngraph::op::PadType padType; + InferenceEngine::SizeVector kernel, stride, dilation; + std::vector padBegin, padEnd; + size_t convOutChannels; + size_t quantLevels; + QuantizationGranularity quantGranularity; + std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, padType, quantLevels, quantGranularity) = groupConvBackpropDataParams; + + std::ostringstream result; + result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_"; + result << "K" << CommonTestUtils::vec2str(kernel) << "_"; + result << "S" << CommonTestUtils::vec2str(stride) << "_"; + result << "PB" << CommonTestUtils::vec2str(padBegin) << "_"; + result << "PE" << CommonTestUtils::vec2str(padEnd) << "_"; + result << "D=" << CommonTestUtils::vec2str(dilation) << "_"; + result << "O=" << convOutChannels << "_"; + result << "AP=" << padType << "_"; + result << "Levels=" << quantLevels << "_"; + result << "QG=" << quantGranularity << "_"; + result << "netPRC=" << netPrecision.name() << "_"; + result << "targetDevice=" << targetDevice; + return result.str(); +} + +void QuantConvBackpropDataLayerTest::SetUp() { + quantConvBackpropDataSpecificParams groupConvBackpropDataParams; + std::vector inputShape; + auto netPrecision = InferenceEngine::Precision::UNSPECIFIED; + std::tie(groupConvBackpropDataParams, netPrecision, inputShape, targetDevice) = this->GetParam(); + ngraph::op::PadType padType; + InferenceEngine::SizeVector kernel, stride, dilation; + std::vector padBegin, padEnd; + size_t convOutChannels; + size_t quantLevels; + QuantizationGranularity quantGranularity; + std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, padType, quantLevels, quantGranularity) = groupConvBackpropDataParams; + auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); + auto params = ngraph::builder::makeParams(ngPrc, {inputShape}); + auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); + + std::vector dataFqConstShapes(inputShape.size(), 1); + if (quantGranularity == ngraph::helpers::Perchannel) + dataFqConstShapes[1] = inputShape[1]; + auto dataFq = ngraph::builder::makeFakeQuantize(paramOuts[0], ngPrc, quantLevels, dataFqConstShapes); + + std::vector weightsShapes = {inputShape[1], convOutChannels}; + weightsShapes.insert(weightsShapes.end(), kernel.begin(), kernel.end()); + + std::vector weightsData; + auto weightsNode = ngraph::builder::makeConstant(ngPrc, weightsShapes, weightsData, weightsData.empty()); + + std::vector weightsFqConstShapes(weightsShapes.size(), 1); + if (quantGranularity == ngraph::helpers::Perchannel) + weightsFqConstShapes[0] = weightsShapes[0]; + + auto weightsFq = ngraph::builder::makeFakeQuantize(weightsNode, ngPrc, quantLevels, weightsFqConstShapes); + + auto convBackpropData = std::dynamic_pointer_cast( + ngraph::builder::makeConvolutionBackpropData(dataFq, weightsFq, ngPrc, stride, padBegin, padEnd, dilation, padType)); + + ngraph::ResultVector results{std::make_shared(convBackpropData)}; + function = std::make_shared(results, params, "QuantConvolutionBackpropData"); +} + +TEST_P(QuantConvBackpropDataLayerTest, CompareWithRefs) { + Run(); +} +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/quantized_group_convolution_backprop_data.cpp b/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/quantized_group_convolution_backprop_data.cpp new file mode 100644 index 00000000000..5274c712bab --- /dev/null +++ b/inference-engine/tests/functional/plugin/shared/src/subgraph_tests/quantized_group_convolution_backprop_data.cpp @@ -0,0 +1,104 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include +#include +#include +#include + +#include "ie_core.hpp" + +#include "common_test_utils/common_utils.hpp" +#include "functional_test_utils/blob_utils.hpp" +#include "functional_test_utils/plugin_cache.hpp" +#include "functional_test_utils/layer_test_utils.hpp" + +#include "subgraph_tests/quantized_group_convolution_backprop_data.hpp" + +using ngraph::helpers::QuantizationGranularity; + +namespace LayerTestsDefinitions { + +std::string QuantGroupConvBackpropDataLayerTest::getTestCaseName(testing::TestParamInfo obj) { + quantGroupConvBackpropDataSpecificParams groupConvBackpropDataParams; + InferenceEngine::Precision netPrecision; + InferenceEngine::SizeVector inputShapes; + std::string targetDevice; + std::tie(groupConvBackpropDataParams, netPrecision, inputShapes, targetDevice) = obj.param; + ngraph::op::PadType padType; + InferenceEngine::SizeVector kernel, stride, dilation; + std::vector padBegin, padEnd; + size_t convOutChannels, numGroups; + size_t quantLevels; + QuantizationGranularity quantGranularity; + std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, numGroups, padType, quantLevels, quantGranularity) = groupConvBackpropDataParams; + + std::ostringstream result; + result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_"; + result << "K" << CommonTestUtils::vec2str(kernel) << "_"; + result << "S" << CommonTestUtils::vec2str(stride) << "_"; + result << "PB" << CommonTestUtils::vec2str(padBegin) << "_"; + result << "PE" << CommonTestUtils::vec2str(padEnd) << "_"; + result << "D=" << CommonTestUtils::vec2str(dilation) << "_"; + result << "O=" << convOutChannels << "_"; + result << "G=" << numGroups << "_"; + result << "AP=" << padType << "_"; + result << "Levels=" << quantLevels << "_"; + result << "QG=" << quantGranularity << "_"; + result << "netPRC=" << netPrecision.name() << "_"; + result << "targetDevice=" << targetDevice; + return result.str(); +} + +void QuantGroupConvBackpropDataLayerTest::SetUp() { + quantGroupConvBackpropDataSpecificParams groupConvBackpropDataParams; + std::vector inputShape; + auto netPrecision = InferenceEngine::Precision::UNSPECIFIED; + std::tie(groupConvBackpropDataParams, netPrecision, inputShape, targetDevice) = this->GetParam(); + ngraph::op::PadType padType; + InferenceEngine::SizeVector kernel, stride, dilation; + std::vector padBegin, padEnd; + size_t convOutChannels, numGroups; + size_t quantLevels; + QuantizationGranularity quantGranularity; + std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, numGroups, padType, quantLevels, quantGranularity) = groupConvBackpropDataParams; + auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); + auto params = ngraph::builder::makeParams(ngPrc, {inputShape}); + auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); + + std::vector dataFqConstShapes(inputShape.size(), 1); + if (quantGranularity == ngraph::helpers::Perchannel) + dataFqConstShapes[1] = inputShape[1]; + auto dataFq = ngraph::builder::makeFakeQuantize(paramOuts[0], ngPrc, quantLevels, dataFqConstShapes); + + std::vector weightsShapes = {inputShape[1], convOutChannels}; + if (weightsShapes[0] % numGroups || weightsShapes[1] % numGroups) + throw std::runtime_error("incorrect shape for QuantGroupConvolutionBackpropData"); + weightsShapes[0] /= numGroups; + weightsShapes[1] /= numGroups; + weightsShapes.insert(weightsShapes.begin(), numGroups); + weightsShapes.insert(weightsShapes.end(), kernel.begin(), kernel.end()); + + std::vector weightsData; + auto weightsNode = ngraph::builder::makeConstant(ngPrc, weightsShapes, weightsData, weightsData.empty()); + + std::vector weightsFqConstShapes(weightsShapes.size(), 1); + if (quantGranularity == ngraph::helpers::Perchannel) + weightsFqConstShapes[0] = weightsShapes[0]; + + auto weightsFq = ngraph::builder::makeFakeQuantize(weightsNode, ngPrc, quantLevels, weightsFqConstShapes); + + auto groupConvBackpropData = std::dynamic_pointer_cast( + ngraph::builder::makeGroupConvolutionBackpropData(dataFq, weightsFq, ngPrc, stride, padBegin, padEnd, dilation, padType)); + + ngraph::ResultVector results{std::make_shared(groupConvBackpropData)}; + function = std::make_shared(results, params, "QuantGroupConvolutionBackpropData"); +} + +TEST_P(QuantGroupConvBackpropDataLayerTest, CompareWithRefs) { + Run(); +} +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/ie_test_utils/common_test_utils/common_utils.hpp b/inference-engine/tests/ie_test_utils/common_test_utils/common_utils.hpp index 516765ba9ea..600a7955373 100644 --- a/inference-engine/tests/ie_test_utils/common_test_utils/common_utils.hpp +++ b/inference-engine/tests/ie_test_utils/common_test_utils/common_utils.hpp @@ -15,7 +15,7 @@ template inline std::string vec2str(const std::vector &vec) { std::ostringstream result; result << "("; - std::copy(vec.begin(), vec.end() - 1, std::ostream_iterator(result, ".")); + std::copy(vec.begin(), vec.end() - 1, std::ostream_iterator(result, ".")); result << vec.back() << ")"; return result.str(); } diff --git a/inference-engine/tests/ie_test_utils/functional_test_utils/test_model/test_model.cpp b/inference-engine/tests/ie_test_utils/functional_test_utils/test_model/test_model.cpp index 00eb541fa73..cbb4857b0ab 100644 --- a/inference-engine/tests/ie_test_utils/functional_test_utils/test_model/test_model.cpp +++ b/inference-engine/tests/ie_test_utils/functional_test_utils/test_model/test_model.cpp @@ -675,5 +675,43 @@ TestModel getConvReluNormPoolFcModel(InferenceEngine::Precision netPrc) { convWeigthsSize + convBiasesSize + fcWeigthsSize + fcBiasesSize)); } + +TestModel getModelWithMemory(InferenceEngine::Precision netPrc) { + CommonTestUtils::IRBuilder_v6 test_model_builder("model"); + + auto Memory_1_layer = + test_model_builder.AddLayer("Memory_1", "Memory", netPrc, {{"id", "r_1-3"}, + {"index", "1"}, + {"size", "2"}}) + .AddOutPort({1, 200}) + .getLayer(); + auto Input_2_layer = test_model_builder.AddLayer("Input_2", "input", netPrc).AddOutPort({1, 200}).getLayer(); + auto Eltwise_3_layer = test_model_builder.AddLayer("Eltwise_3", "Eltwise", netPrc, {{"operation", "mul"}}) + .AddInPort({1, 200}) + .AddInPort({1, 200}) + .AddOutPort({1, 200}) + .getLayer(); + + auto Activation_4_layer = + test_model_builder.AddLayer("Activation_4", "Activation", netPrc, {{"type", "sigmoid"}}) + .AddInPort({1, 200}) + .AddOutPort({1, 200}) + .getLayer(); + auto Memory_5_layer = + test_model_builder.AddLayer("Memory_5", "Memory", netPrc, {{"id", "r_1-3"}, + {"index", "0"}, + {"size", "2"}}) + .AddInPort({1, 200}) + .getLayer(); + + test_model_builder.AddEdge(Memory_1_layer.out(0), Eltwise_3_layer.in(0)); + test_model_builder.AddEdge(Input_2_layer.out(0), Eltwise_3_layer.in(1)); + test_model_builder.AddEdge(Eltwise_3_layer.out(0), Activation_4_layer.in(0)); + test_model_builder.AddEdge(Activation_4_layer.out(0), Memory_5_layer.in(0)); + + auto serial = test_model_builder.serialize(); + + return TestModel(serial, CommonTestUtils::getWeightsBlob(0)); +} } // namespace TestModel } // namespace FuncTestUtils \ No newline at end of file diff --git a/inference-engine/tests/ie_test_utils/functional_test_utils/test_model/test_model.hpp b/inference-engine/tests/ie_test_utils/functional_test_utils/test_model/test_model.hpp index 4373f07e5c8..b2224f7489b 100644 --- a/inference-engine/tests/ie_test_utils/functional_test_utils/test_model/test_model.hpp +++ b/inference-engine/tests/ie_test_utils/functional_test_utils/test_model/test_model.hpp @@ -41,6 +41,8 @@ const TestModel convReluNormPoolFcModelFP32 = getConvReluNormPoolFcModel(Inferen const TestModel convReluNormPoolFcModelFP16 = getConvReluNormPoolFcModel(InferenceEngine::Precision::FP16); const TestModel convReluNormPoolFcModelQ78 = getConvReluNormPoolFcModel(InferenceEngine::Precision::Q78); +TestModel getModelWithMemory(InferenceEngine::Precision netPrc); + const char incorrect_input_name[] = "incorrect_input_name"; } // namespace TestModel diff --git a/inference-engine/tests/ngraph_functions/include/ngraph_functions/builders.hpp b/inference-engine/tests/ngraph_functions/include/ngraph_functions/builders.hpp index c5185eca442..54274830b1b 100644 --- a/inference-engine/tests/ngraph_functions/include/ngraph_functions/builders.hpp +++ b/inference-engine/tests/ngraph_functions/include/ngraph_functions/builders.hpp @@ -49,6 +49,55 @@ std::shared_ptr makeGroupConvolution(const ngraph::Output &i const std::vector &filterWeights = {}, const std::vector &biasesWeights = {}); +std::shared_ptr makeConvolutionBackpropData(const ngraph::Output &in, + const element::Type &type, + const std::vector &filterSize, + const std::vector &strides, + const std::vector &padsBegin, + const std::vector &padsEnd, + const std::vector &dilations, + const op::PadType &autoPad, + size_t numOutChannels, + bool addBiases = false, + const std::vector &filterWeights = {}, + const std::vector &biasesWeights = {}); + +std::shared_ptr makeConvolutionBackpropData(const ngraph::Output &in, + const ngraph::Output &weights, + const element::Type &type, + const std::vector &strides, + const std::vector &padsBegin, + const std::vector &padsEnd, + const std::vector &dilations, + const op::PadType &autoPad, + bool addBiases = false, + const std::vector &biasesWeights = {}); + +std::shared_ptr makeGroupConvolutionBackpropData(const ngraph::Output &in, + const element::Type &type, + const std::vector &filterSize, + const std::vector &strides, + const std::vector &padsBegin, + const std::vector &padsEnd, + const std::vector &dilations, + const op::PadType &autoPad, + size_t numOutChannels, + size_t numGroups, + bool addBiases = false, + const std::vector &filterWeights = {}, + const std::vector &biasesWeights = {}); + +std::shared_ptr makeGroupConvolutionBackpropData(const ngraph::Output &in, + const ngraph::Output &weights, + const element::Type &type, + const std::vector &strides, + const std::vector &padsBegin, + const std::vector &padsEnd, + const std::vector &dilations, + const op::PadType &autoPad, + bool addBiases = false, + const std::vector &biasesWeights = {}); + std::shared_ptr makeSplit(const ngraph::Output &in, const element::Type &type, size_t numSplits, @@ -116,5 +165,24 @@ std::shared_ptr makeProposal(const ngraph::Output &class_pro std::shared_ptr makeSelect(std::vector> &in, const ngraph::op::AutoBroadcastSpec& auto_broadcast); +std::shared_ptr makeFakeQuantize(const ngraph::Output &in, + const element::Type &type, + std::size_t levels, + std::vector constShapes, + const std::vector &inputLowData, + const std::vector &inputHighData, + const std::vector &outputLowData, + const std::vector &outputHighData); + +std::shared_ptr makeFakeQuantize(const ngraph::Output &in, + const element::Type &type, + std::size_t levels, + std::vector constShapes); + +std::shared_ptr makeCumSum(const ngraph::Output &in, + const ngraph::Output &axis, + bool exclusive, + bool reverse); + } // namespace builder } // namespace ngraph diff --git a/inference-engine/tests/ngraph_functions/include/ngraph_functions/utils/ngraph_helpers.hpp b/inference-engine/tests/ngraph_functions/include/ngraph_functions/utils/ngraph_helpers.hpp index 6619120fe34..dcc06ac3256 100644 --- a/inference-engine/tests/ngraph_functions/include/ngraph_functions/utils/ngraph_helpers.hpp +++ b/inference-engine/tests/ngraph_functions/include/ngraph_functions/utils/ngraph_helpers.hpp @@ -92,6 +92,28 @@ enum ActivationTypes { Gelu }; +enum QuantizationGranularity { + Pertensor, + Perchannel +}; + +inline std::string quantizationGranularityToString(const QuantizationGranularity& granularity) { + static std::map names = { + {Pertensor, "Pertensor"}, + {Perchannel, "Perchannel"}, + }; + + auto i = names.find(granularity); + if (i != names.end()) + return i->second; + else + throw std::runtime_error("Unsupported QuantizationGranularity type"); +} + +inline std::ostream& operator<<(std::ostream& out, const QuantizationGranularity& granularity) { + return out << quantizationGranularityToString(granularity); +} + ngraph::OutputVector convert2OutputVector(const std::vector> &nodes); template diff --git a/inference-engine/tests/ngraph_functions/src/convolution_backprop_data.cpp b/inference-engine/tests/ngraph_functions/src/convolution_backprop_data.cpp new file mode 100644 index 00000000000..dd0bdf82994 --- /dev/null +++ b/inference-engine/tests/ngraph_functions/src/convolution_backprop_data.cpp @@ -0,0 +1,58 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +// + +#include +#include + +#include "ngraph_functions/builders.hpp" + +namespace ngraph { +namespace builder { + +std::shared_ptr makeConvolutionBackpropData(const ngraph::Output &in, + const element::Type &type, + const std::vector &filterSize, + const std::vector &strides, + const std::vector &padsBegin, + const std::vector &padsEnd, + const std::vector &dilations, + const op::PadType &autoPad, + size_t numOutChannels, + bool addBiases, + const std::vector &filterWeights, + const std::vector &biasesWeights) { + bool randomFilterWeights = filterWeights.empty(); + auto shape = in.get_shape(); + std::vector filterWeightsShape = {shape[1], numOutChannels}; + filterWeightsShape.insert(filterWeightsShape.end(), filterSize.begin(), filterSize.end()); + auto filterWeightsNode = makeConstant(type, filterWeightsShape, filterWeights, randomFilterWeights); + + return makeConvolutionBackpropData(in, filterWeightsNode, type, strides, padsBegin, padsEnd, dilations, autoPad, addBiases, biasesWeights); +} + +std::shared_ptr makeConvolutionBackpropData(const ngraph::Output &in, + const ngraph::Output &weights, + const element::Type &type, + const std::vector &strides, + const std::vector &padsBegin, + const std::vector &padsEnd, + const std::vector &dilations, + const op::PadType &autoPad, + bool addBiases, + const std::vector &biasesWeights) { + auto deconv = std::make_shared(in, weights, strides, padsBegin, padsEnd, dilations, autoPad); + + if (addBiases) { + bool randomBiases = biasesWeights.empty(); + auto biasesWeightsNode = makeConstant(type, {}, biasesWeights, randomBiases); + auto add = std::make_shared(deconv, biasesWeightsNode); + return add; + } else { + return deconv; + } +} + +} // namespace builder +} // namespace ngraph \ No newline at end of file diff --git a/inference-engine/tests/ngraph_functions/src/cum_sum.cpp b/inference-engine/tests/ngraph_functions/src/cum_sum.cpp new file mode 100644 index 00000000000..3ac47f2ac16 --- /dev/null +++ b/inference-engine/tests/ngraph_functions/src/cum_sum.cpp @@ -0,0 +1,18 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "ngraph_functions/builders.hpp" + +namespace ngraph { +namespace builder { + +std::shared_ptr makeCumSum(const ngraph::Output &in, + const ngraph::Output &axis, + bool exclusive, + bool reverse) { + return std::make_shared(in, axis, exclusive, reverse); +} + +} // namespace builder +} // namespace ngraph \ No newline at end of file diff --git a/inference-engine/tests/ngraph_functions/src/fake_quantize.cpp b/inference-engine/tests/ngraph_functions/src/fake_quantize.cpp new file mode 100644 index 00000000000..aab3c67b8a0 --- /dev/null +++ b/inference-engine/tests/ngraph_functions/src/fake_quantize.cpp @@ -0,0 +1,84 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +// + +#include +#include + +#include "ngraph_functions/builders.hpp" + +namespace ngraph { +namespace builder { + +std::shared_ptr makeFakeQuantize(const ngraph::Output &in, + const element::Type &type, + std::size_t levels, + std::vector constShapes, + const std::vector &inputLowData, + const std::vector &inputHighData, + const std::vector &outputLowData, + const std::vector &outputHighData) { + auto inputLowNode = makeConstant(type, constShapes, inputLowData, inputLowData.empty()); + auto inputHighNode = makeConstant(type, constShapes, inputHighData, inputHighData.empty()); + auto outputLowNode = makeConstant(type, constShapes, outputLowData, outputLowData.empty()); + auto outputHighNode = makeConstant(type, constShapes, outputHighData, outputHighData.empty()); + + auto fq = std::make_shared(in, inputLowNode, inputHighNode, outputLowNode, outputHighNode, levels); + + return fq; +} + +std::shared_ptr makeFakeQuantize(const ngraph::Output &in, + const ngraph::element::Type &type, + std::size_t levels, + std::vector constShapes) { + size_t constDataSize = ngraph::shape_size(constShapes); + std::vector inputLowData, inputHighData, outputLowData, outputHighData; + inputLowData = NGraphFunctions::Utils::generateVector(constDataSize); + if (levels != 2) { + inputHighData = NGraphFunctions::Utils::generateVector(constDataSize); + outputLowData = NGraphFunctions::Utils::generateVector(constDataSize); + outputHighData = NGraphFunctions::Utils::generateVector(constDataSize); + } else { + inputHighData = inputLowData; + outputLowData = NGraphFunctions::Utils::generateVector(constDataSize); + outputHighData = NGraphFunctions::Utils::generateVector(constDataSize); + + for (int i = 0; i < constDataSize; i++) { + if (outputLowData[i] > outputHighData[i]) { + outputLowData[i] = 1; + outputHighData[i] = 0; + } else { + outputLowData[i] = 0; + outputHighData[i] = 1; + } + } + } + + for (int i = 0; i < constDataSize; i++) { + inputLowData[i] = std::min(inputLowData[i], inputHighData[i]); + inputHighData[i] = std::max(inputLowData[i], inputHighData[i]); + if (inputLowData[i] == inputHighData[i]) + inputHighData[i] += 1; + } + + for (int i = 0; i < constDataSize; i++) { + outputLowData[i] = std::min(outputLowData[i], outputHighData[i]); + outputHighData[i] = std::max(outputLowData[i], outputHighData[i]); + if (outputLowData[i] == outputHighData[i]) + outputHighData[i] += 1; + } + + auto inputLowNode = ngraph::builder::makeConstant(type, constShapes, inputLowData, inputLowData.empty()); + auto inputHighNode = ngraph::builder::makeConstant(type, constShapes, inputHighData, inputHighData.empty()); + auto outputLowNode = ngraph::builder::makeConstant(type, constShapes, outputLowData, outputLowData.empty()); + auto outputHighNode = ngraph::builder::makeConstant(type, constShapes, outputHighData, outputHighData.empty()); + + auto fq = std::make_shared(in, inputLowNode, inputHighNode, outputLowNode, outputHighNode, levels); + + return fq; +} + +} // namespace builder +} // namespace ngraph \ No newline at end of file diff --git a/inference-engine/tests/ngraph_functions/src/group_convolution_backprop_data.cpp b/inference-engine/tests/ngraph_functions/src/group_convolution_backprop_data.cpp new file mode 100644 index 00000000000..9d807d0963f --- /dev/null +++ b/inference-engine/tests/ngraph_functions/src/group_convolution_backprop_data.cpp @@ -0,0 +1,63 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +// + +#include +#include + +#include "ngraph_functions/builders.hpp" + +namespace ngraph { +namespace builder { + +std::shared_ptr makeGroupConvolutionBackpropData(const ngraph::Output &in, + const element::Type &type, + const std::vector &filterSize, + const std::vector &strides, + const std::vector &padsBegin, + const std::vector &padsEnd, + const std::vector &dilations, + const op::PadType &autoPad, + size_t numOutChannels, + size_t numGroups, + bool addBiases, + const std::vector &filterWeights, + const std::vector &biasesWeights) { + bool randomFilterWeights = filterWeights.empty(); + auto shape = in.get_shape(); + std::vector filterWeightsShape = {shape[1], numOutChannels}; + if (filterWeightsShape[0] % numGroups || filterWeightsShape[1] % numGroups) + throw std::runtime_error("incorrected shape for GroupConvolutionBackpropData"); + filterWeightsShape[0] /= numGroups; + filterWeightsShape[1] /= numGroups; + filterWeightsShape.insert(filterWeightsShape.begin(), numGroups); + filterWeightsShape.insert(filterWeightsShape.end(), filterSize.begin(), filterSize.end()); + auto filterWeightsNode = makeConstant(type, filterWeightsShape, filterWeights, randomFilterWeights); + + return makeGroupConvolutionBackpropData(in, filterWeightsNode, type, strides, padsBegin, padsEnd, dilations, autoPad, addBiases, biasesWeights); +} + +std::shared_ptr makeGroupConvolutionBackpropData(const ngraph::Output &in, + const ngraph::Output &weights, + const element::Type &type, + const std::vector &strides, + const std::vector &padsBegin, + const std::vector &padsEnd, + const std::vector &dilations, + const op::PadType &autoPad, + bool addBiases, + const std::vector &biasesWeights) { + auto deconv = std::make_shared(in, weights, strides, padsBegin, padsEnd, dilations, autoPad); + if (addBiases) { + bool randomBiases = biasesWeights.empty(); + auto biasesWeightsNode = makeConstant(type, {}, biasesWeights, randomBiases); + auto add = std::make_shared(deconv, biasesWeightsNode); + return add; + } else { + return deconv; + } +} + +} // namespace builder +} // namespace ngraph \ No newline at end of file diff --git a/inference-engine/tests/ngraph_functions/src/utils/ngraph_helpers.cpp b/inference-engine/tests/ngraph_functions/src/utils/ngraph_helpers.cpp index 838b0f76eda..79b39583836 100644 --- a/inference-engine/tests/ngraph_functions/src/utils/ngraph_helpers.cpp +++ b/inference-engine/tests/ngraph_functions/src/utils/ngraph_helpers.cpp @@ -450,6 +450,37 @@ std::vector convertOutputPrecision(std::vector &outp element::Type(toPrecision).get_type_name()); } } + case element::Type_t::boolean: { + switch (toPrecision) { + case element::Type_t::u8: { + return convertPrecision(output, elementsCount, element::Type(toPrecision).size()); + } + case element::Type_t::u16: { + return convertPrecision(output, elementsCount, element::Type(toPrecision).size()); + } + case element::Type_t::i8: { + return convertPrecision(output, elementsCount, element::Type(toPrecision).size()); + } + case element::Type_t::i16: { + return convertPrecision(output, elementsCount, element::Type(toPrecision).size()); + } + case element::Type_t::i32: { + return convertPrecision(output, elementsCount, element::Type(toPrecision).size()); + } + case element::Type_t::i64: { + return convertPrecision(output, elementsCount, element::Type(toPrecision).size()); + } + case element::Type_t::f32: { + return convertPrecision(output, elementsCount, element::Type(toPrecision).size()); + } + case element::Type_t::u64: { + return convertPrecision(output, elementsCount, element::Type(toPrecision).size()); + } + default: + throw std::runtime_error("convertOutputPrecision can't convert from: " + element::Type(fromPrecision).get_type_name() + " to: " + + element::Type(toPrecision).get_type_name()); + } + } default: throw std::runtime_error("convertOutputPrecision can't convert from: " + element::Type(fromPrecision).get_type_name() + " precision"); } diff --git a/inference-engine/tests/unit/inference_engine/ie_blob_test.cpp b/inference-engine/tests/unit/inference_engine/ie_blob_test.cpp index 00ade9069bb..647b32083c0 100644 --- a/inference-engine/tests/unit/inference_engine/ie_blob_test.cpp +++ b/inference-engine/tests/unit/inference_engine/ie_blob_test.cpp @@ -4,19 +4,394 @@ #include #include +#include -using namespace InferenceEngine; +#include "unit_test_utils/mocks/mock_allocator.hpp" -using BlobTests = ::testing::Test; +#ifdef WIN32 +#define UNUSED +#else +#define UNUSED __attribute__((unused)) +#endif + +class BlobTests: public ::testing::Test { +protected: + virtual void TearDown() {} + + virtual void SetUp() {} + + std::shared_ptr createMockAllocator() { + return std::shared_ptr(new MockAllocator()); + } +}; // Testing TBlob(const TensorDesc& tensorDesc, T* ptr, size_t data_size = 0) -TEST(BlobTests, TBlobThrowsIfPtrForPreAllocatorIsNullPtr) { - ASSERT_THROW(TBlob({ Precision::FP32, {1}, C }, nullptr), - InferenceEngine::details::InferenceEngineException); +TEST_F(BlobTests, TBlobThrowsIfPtrForPreAllocatorIsNullPtr) { + ASSERT_THROW(InferenceEngine::TBlob({InferenceEngine::Precision::FP32, {1}, InferenceEngine::C}, nullptr), + InferenceEngine::details::InferenceEngineException); } -// Testing TBlob(const TensorDesc& tensorDesc, const std::shared_ptr& alloc) -TEST(BlobTests, TBlobThrowsIfAllocatorIsNullPtr) { - ASSERT_THROW(TBlob({ Precision::FP32, {1}, C }, std::shared_ptr ()), - InferenceEngine::details::InferenceEngineException); +// Testing TBlob(const TensorDesc& tensorDesc, const std::std::shared_ptr& alloc) +TEST_F(BlobTests, TBlobThrowsIfAllocatorIsNullPtr) { + ASSERT_THROW(InferenceEngine::TBlob( + {InferenceEngine::Precision::FP32, {1}, InferenceEngine::C}, std::shared_ptr()), + InferenceEngine::details::InferenceEngineException); +} + + +TEST_F(BlobTests, canCreateBlobUsingDefaultAllocator) { + InferenceEngine::SizeVector v = {1, 2, 3}; + auto allocator = createMockAllocator(); + + EXPECT_CALL(*allocator.get(), alloc(1 * 2 * 3 * sizeof(float))).WillRepeatedly(testing::Return(reinterpret_cast(1))); + EXPECT_CALL(*allocator.get(), free(::testing::_)).Times(1); + + { + InferenceEngine::TBlob blob({ InferenceEngine::Precision::FP32, v, InferenceEngine::CHW }, + std::dynamic_pointer_cast(allocator)); + blob.allocate(); + } +} + +TEST_F(BlobTests, secondAllocateWontMemLeak) { + InferenceEngine::SizeVector v = {1, 2, 3}; + auto allocator = createMockAllocator(); + + EXPECT_CALL(*allocator.get(), alloc(1 * 2 * 3 * sizeof(float))).Times(2).WillRepeatedly(testing::Return(reinterpret_cast(1))); + EXPECT_CALL(*allocator.get(), free(::testing::_)).Times(2).WillRepeatedly(testing::Return(true)); + + { + InferenceEngine::TBlob blob({ InferenceEngine::Precision::FP32, v, InferenceEngine::CHW }, + std::dynamic_pointer_cast(allocator)); + blob.allocate(); + blob.allocate(); + } +} + + +TEST_F(BlobTests, doesNotUnlockIfLockFailed) { + InferenceEngine::SizeVector v = {1, 2, 3}; + auto allocator = createMockAllocator(); + + EXPECT_CALL(*allocator.get(), alloc(1 * 2 * 3 * sizeof(float))).WillRepeatedly(testing::Return(reinterpret_cast(1))); + EXPECT_CALL(*allocator.get(), lock(reinterpret_cast(1), InferenceEngine::LOCK_FOR_WRITE)).Times(1); + EXPECT_CALL(*allocator.get(), free(::testing::_)).Times(1); + + InferenceEngine::TBlob blob({ InferenceEngine::Precision::FP32, v, InferenceEngine::CHW }, + std::dynamic_pointer_cast(allocator)); + blob.allocate(); + { + float UNUSED *ptr = blob.data(); + } +} + +TEST_F(BlobTests, canAccessDataUsingAllocator) { + InferenceEngine::SizeVector v = {1, 2, 3}; + auto allocator = createMockAllocator(); + + float data[] = {5.f, 6.f, 7.f}; + + EXPECT_CALL(*allocator.get(), alloc(1 * 2 * 3 * sizeof(float))).WillRepeatedly(testing::Return(reinterpret_cast(1))); + EXPECT_CALL(*allocator.get(), lock(reinterpret_cast(1), InferenceEngine::LOCK_FOR_WRITE)).WillRepeatedly(testing::Return(data)); + EXPECT_CALL(*allocator.get(), unlock(reinterpret_cast(1))).Times(1); + EXPECT_CALL(*allocator.get(), free(::testing::_)).Times(1); + + InferenceEngine::TBlob blob({ InferenceEngine::Precision::FP32, v, InferenceEngine::CHW }, + std::dynamic_pointer_cast(allocator)); + blob.allocate(); + { + float *ptr = blob.data(); + ASSERT_EQ(ptr[2] , 7); + } +} + + +TEST_F(BlobTests, canLockReadOnlyDataForRead) { + InferenceEngine::SizeVector v = {1, 2, 3}; + auto allocator = createMockAllocator(); + + float data[] = {5, 6, 7}; + + EXPECT_CALL(*allocator.get(), alloc(1 * 2 * 3 * sizeof(float))).WillRepeatedly(testing::Return(reinterpret_cast(1))); + EXPECT_CALL(*allocator.get(), lock(::testing::_, InferenceEngine::LOCK_FOR_READ)).WillRepeatedly(testing::Return(data)); + EXPECT_CALL(*allocator.get(), free(::testing::_)).Times(1); + EXPECT_CALL(*allocator.get(), unlock(reinterpret_cast(1))).Times(1); + + InferenceEngine::TBlob blob({ InferenceEngine::Precision::FP32, v, InferenceEngine::CHW }, + std::dynamic_pointer_cast(allocator)); + blob.allocate(); + + const float *ptr = blob.readOnly(); + ASSERT_EQ(ptr[2] , 7); +} + +TEST_F(BlobTests, canAccessDataUsingBufferBaseMethod) { + InferenceEngine::SizeVector v = {1, 2, 3}; + auto allocator = createMockAllocator(); + + float data[] = {5, 6, 7}; + + EXPECT_CALL(*allocator.get(), alloc(1 * 2 * 3 * sizeof(float))).WillRepeatedly(testing::Return(reinterpret_cast(1))); + EXPECT_CALL(*allocator.get(), lock(::testing::_, InferenceEngine::LOCK_FOR_WRITE)).WillRepeatedly(testing::Return(data)); + EXPECT_CALL(*allocator.get(), unlock(reinterpret_cast(1))).Times(1); + EXPECT_CALL(*allocator.get(), free(::testing::_)).Times(1); + + InferenceEngine::TBlob blob({ InferenceEngine::Precision::FP32, v, InferenceEngine::CHW }, + std::dynamic_pointer_cast(allocator)); + blob.allocate(); + auto buffer = blob.rwmap(); + const float *ptr = buffer.as(); + ASSERT_EQ(ptr[2] , 7); +} + +TEST_F(BlobTests, canMoveFromTBlobWithSameType) { + InferenceEngine::SizeVector v = {1, 2, 3}; + auto allocator = createMockAllocator(); + + uint8_t data[] = {5, 6}; + + EXPECT_CALL(*allocator.get(), alloc(1 * 2 * 3 * sizeof(uint8_t))).WillRepeatedly(testing::Return(reinterpret_cast(1))); + EXPECT_CALL(*allocator.get(), lock(::testing::_, InferenceEngine::LOCK_FOR_WRITE)).WillRepeatedly(testing::Return(data)); + EXPECT_CALL(*allocator.get(), unlock(reinterpret_cast(1))).Times(1); + EXPECT_CALL(*allocator.get(), free(::testing::_)).Times(1); + + InferenceEngine::TBlob blob({ InferenceEngine::Precision::U8, v, InferenceEngine::CHW }, + std::dynamic_pointer_cast(allocator)); + blob.allocate(); + + InferenceEngine::TBlob newBlob(std::move(blob)); + + auto buffer = newBlob.rwmap(); + uint8_t *ptr = buffer.as (); + ASSERT_EQ(ptr[0] , data[0]); +} + +TEST_F(BlobTests, saveDimsAndSizeAfterMove) { + InferenceEngine::SizeVector v = {1, 2, 3}; + auto allocator = createMockAllocator(); + + InferenceEngine::TBlob blob({ InferenceEngine::Precision::U8, v, InferenceEngine::CHW }, + std::dynamic_pointer_cast(allocator)); + + InferenceEngine::TBlob newBlob(std::move(blob)); + + ASSERT_EQ(newBlob.size(), 1 * 2 * 3); + ASSERT_EQ(newBlob.getTensorDesc().getDims()[0], 1); + ASSERT_EQ(newBlob.getTensorDesc().getDims()[1], 2); + ASSERT_EQ(newBlob.getTensorDesc().getDims()[2], 3); +} + +TEST_F(BlobTests, canCopyBlob) { + InferenceEngine::SizeVector v = {1, 3}; + InferenceEngine::TBlob blob({ InferenceEngine::Precision::U8, v, InferenceEngine::HW }); + blob.allocate(); + blob.data()[0] = 1; + blob.data()[1] = 2; + blob.data()[2] = 3; + + InferenceEngine::TBlob blob2(blob); + + ASSERT_EQ(blob2.getTensorDesc().getDims().size(), blob.getTensorDesc().getDims().size()); + ASSERT_EQ(blob2.getTensorDesc().getDims()[0], blob.getTensorDesc().getDims()[0]); + ASSERT_EQ(blob2.getTensorDesc().getDims()[1], blob.getTensorDesc().getDims()[1]); + ASSERT_EQ(blob2.size(), blob.size()); + ASSERT_EQ(blob2.data()[0], blob.data()[0]); + ASSERT_EQ(blob2.data()[1], blob.data()[1]); + ASSERT_EQ(blob2.data()[2], blob.data()[2]); +} + +TEST_F(BlobTests, canCompareToNullPtrWithoutDereferencing) { + InferenceEngine::SizeVector v = {1, 2, 3}; + auto allocator = createMockAllocator(); + + InferenceEngine::TBlob blob({ InferenceEngine::Precision::U8, v, InferenceEngine::CHW }, + std::dynamic_pointer_cast(allocator)); + + ASSERT_TRUE(blob.readOnly() == nullptr); + ASSERT_TRUE(blob.data() == nullptr); + ASSERT_TRUE(blob.rwmap() == nullptr); + + ASSERT_TRUE(nullptr == blob.readOnly()); + ASSERT_TRUE(nullptr == blob.data()); + ASSERT_TRUE(nullptr == blob.rwmap()); +} + +TEST_F(BlobTests, canCreateBlob) { + InferenceEngine::SizeVector size = { 1, 1, 1 }; + InferenceEngine::TBlob blob({ InferenceEngine::Precision::FP32, size, InferenceEngine::CHW }); + ASSERT_NE(blob.size(), 0); + ASSERT_EQ(blob.rwmap(), nullptr); +} + +TEST_F(BlobTests, canAllocateBlob) { + InferenceEngine::SizeVector size = { 1, 1, 1 }; + InferenceEngine::TBlob blob({ InferenceEngine::Precision::FP32, size, InferenceEngine::CHW }); + blob.allocate(); + float* buffer = static_cast(blob.data()); + ASSERT_NE(buffer, nullptr); +} + +TEST_F(BlobTests, canDeallocateBlob) { + InferenceEngine::SizeVector size = { 1, 1, 1 }; + InferenceEngine::TBlob blob({ InferenceEngine::Precision::FP32, size, InferenceEngine::CHW }); + blob.allocate(); + blob.deallocate(); + ASSERT_EQ(nullptr, blob.data().as()); +} + +TEST_F(BlobTests, canCreateBlobWithoutDims) { + InferenceEngine::TBlob blob(InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, InferenceEngine::NCHW)); + ASSERT_EQ(blob.getTensorDesc().getDims().size(), 0); +} + +TEST_F(BlobTests, canReadDataFromConstBlob) { + InferenceEngine::TBlob blob({ InferenceEngine::Precision::FP32, { 1, 1, 1 }, InferenceEngine::CHW }); + blob.allocate(); + blob.data()[0] = 1.0f; + InferenceEngine::TBlob const blob2 = blob; + const float* buf = blob2.readOnly(); + ASSERT_NE(buf, nullptr); +} + +TEST_F(BlobTests, canMakeSharedBlob) { + InferenceEngine::SizeVector size = { 1, 1, 1 }; + InferenceEngine::TBlob::Ptr blob1 = InferenceEngine::make_shared_blob( + InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, InferenceEngine::NCHW)); + InferenceEngine::TBlob::Ptr blob2 = InferenceEngine::make_shared_blob( + { InferenceEngine::Precision::FP32, size, InferenceEngine::CHW }); + InferenceEngine::TBlob::Ptr blob3 + = InferenceEngine::make_shared_blob({ InferenceEngine::Precision::FP32, { 0 }, InferenceEngine::C }); + ASSERT_EQ(blob1->size(), 0); + ASSERT_EQ(blob2->size(), 1); + ASSERT_EQ(blob3->size(), 0); +} + +TEST_F(BlobTests, cannotCreateBlobWithIncorrectPrecision) { + InferenceEngine::TensorDesc desc(InferenceEngine::Precision::FP16, {1, 3, 227, 227}, InferenceEngine::Layout::NCHW); + ASSERT_THROW(InferenceEngine::make_shared_blob(desc), InferenceEngine::details::InferenceEngineException); +} + +TEST_F(BlobTests, canUseBlobInMoveSemantics) { + InferenceEngine::TBlob b(InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, InferenceEngine::C)); + + b.getTensorDesc().setDims({3}); + b.allocate(); + b.data()[0] = 1.0f; + b.data()[1] = 2.0f; + b.data()[2] = 3.0f; + + std::vector dump; + + for (const auto& e : b) { + dump.push_back(e); + } + + ASSERT_EQ(dump.size(), 3); + + ASSERT_EQ(dump[0], 1.0f); + ASSERT_EQ(dump[1], 2.0f); + ASSERT_EQ(dump[2], 3.0f); +} + +TEST_F(BlobTests, DISABLED_canUseLockedMemoryAsRvalueReference) { + std::vector dump; + std::vector v({1.0f, 2.0f, 3.0f}); + auto blob = InferenceEngine::make_shared_blob( + InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, InferenceEngine::C), &v[0], v.size()); + for (auto e : *blob) { + dump.push_back(e); + } + + ASSERT_EQ(dump.size(), 3); + + ASSERT_EQ(dump[0], 1.0f); + ASSERT_EQ(dump[1], 2.0f); + ASSERT_EQ(dump[2], 3.0f); +} + +TEST_F(BlobTests, canCreateBlobOnExistedMemory) { + float input[] = {0.1f, 0.2f, 0.3f}; + { + auto b = InferenceEngine::make_shared_blob( + InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, {1, 2}, InferenceEngine::HW), input); + auto i = b->begin(); + ASSERT_NEAR(*i, 0.1, 0.00001); + i++; + ASSERT_NEAR(*i, 0.2, 0.00001); + i++; + ASSERT_EQ(i, b->end()); + + ASSERT_EQ(&*b->begin(), input); + } +} + + +TEST_F(BlobTests, canModifyDataInRangedFor) { + InferenceEngine::SizeVector v = {1, 2, 3}; + InferenceEngine::TBlob blob({ InferenceEngine::Precision::I32, v, InferenceEngine::CHW }); + blob.allocate(); + + for (auto & data : blob) { + data = 5; + } + + for (int i = 0; i < v.size(); i++) { + ASSERT_EQ(5, blob.data()[i]) << "Mismatch at" << i; + } +} + +TEST_F(BlobTests, makeRoiBlobNchw) { + // we create main blob with NCHW layout. We will crop ROI from this blob. + InferenceEngine::SizeVector dims = {1, 3, 6, 5}; // RGB picture of size (WxH) = 5x6 + InferenceEngine::Blob::Ptr blob = InferenceEngine::make_shared_blob( + InferenceEngine::TensorDesc(InferenceEngine::Precision::U8, dims, InferenceEngine::NCHW)); + blob->allocate(); + + // create ROI blob based on the already created blob + InferenceEngine::ROI roi = {0, 2, 1, 2, 4}; // cropped picture with: id = 0, (x,y) = (2,1), sizeX (W) = 2, sizeY (H) = 4 + InferenceEngine::Blob::Ptr roiBlob = make_shared_blob(blob, roi); + + // check that BlockingDesc is constructed properly for the ROI blob + InferenceEngine::SizeVector refDims = {1, 3, 4, 2}; + InferenceEngine::SizeVector refOrder = {0, 1, 2, 3}; + size_t refOffset = 7; + InferenceEngine::SizeVector refStrides = {90, 30, 5, 1}; + ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getBlockDims(), refDims); + ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getOrder(), refOrder); + ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getOffsetPadding(), refOffset); + ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getStrides(), refStrides); +} + +TEST_F(BlobTests, makeRoiBlobNhwc) { + // we create main blob with NHWC layout. We will crop ROI from this blob. + InferenceEngine::SizeVector dims = {1, 3, 4, 8}; // RGB picture of size (WxH) = 8x4 + InferenceEngine::Blob::Ptr blob = InferenceEngine::make_shared_blob( + InferenceEngine::TensorDesc(InferenceEngine::Precision::U8, dims, InferenceEngine::NHWC)); + blob->allocate(); + + // create ROI blob based on the already created blob + InferenceEngine::ROI roi = {0, 3, 2, 5, 2}; // cropped picture with: id = 0, (x,y) = (3,2), sizeX (W) = 5, sizeY (H) = 2 + InferenceEngine::Blob::Ptr roiBlob = make_shared_blob(blob, roi); + + // check that BlockingDesc is constructed properly for the ROI blob + InferenceEngine::SizeVector refDims = {1, 2, 5, 3}; + InferenceEngine::SizeVector refOrder = {0, 2, 3, 1}; + size_t refOffset = 57; + InferenceEngine::SizeVector refStrides = {96, 24, 3, 1}; + ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getBlockDims(), refDims); + ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getOrder(), refOrder); + ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getOffsetPadding(), refOffset); + ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getStrides(), refStrides); +} + +TEST_F(BlobTests, makeRoiBlobWrongSize) { + // we create main blob with NCHW layout. We will crop ROI from this blob. + InferenceEngine::SizeVector dims = {1, 3, 4, 4}; // RGB picture of size (WxH) = 4x4 + InferenceEngine::Blob::Ptr blob = InferenceEngine::make_shared_blob( + InferenceEngine::TensorDesc(InferenceEngine::Precision::U8, dims, InferenceEngine::NCHW)); + blob->allocate(); + + // try to create ROI blob with wrong size + InferenceEngine::ROI roi = {0, 1, 1, 4, 4}; // cropped picture with: id = 0, (x,y) = (1,1), sizeX (W) = 4, sizeY (H) = 4 + ASSERT_THROW(make_shared_blob(blob, roi), InferenceEngine::details::InferenceEngineException); } diff --git a/inference-engine/tests_deprecated/unit/inference_engine_tests/blob_test.cpp b/inference-engine/tests/unit/inference_engine/ie_compound_blob_test.cpp similarity index 62% rename from inference-engine/tests_deprecated/unit/inference_engine_tests/blob_test.cpp rename to inference-engine/tests/unit/inference_engine/ie_compound_blob_test.cpp index a93420ed66f..2ffb7a0721e 100644 --- a/inference-engine/tests_deprecated/unit/inference_engine_tests/blob_test.cpp +++ b/inference-engine/tests/unit/inference_engine/ie_compound_blob_test.cpp @@ -2,42 +2,15 @@ // SPDX-License-Identifier: Apache-2.0 // -#include #include #include #include #include -#include - -#include "unit_test_utils/mocks/mock_allocator.hpp" - -#ifdef WIN32 -#define UNUSED -#else -#define UNUSED __attribute__((unused)) -#endif - using namespace ::testing; using namespace std; using namespace InferenceEngine; -class BlobTests: public ::testing::Test { -protected: - virtual void TearDown() { - } - - virtual void SetUp() { - } - - shared_ptr createMockAllocator() { - return shared_ptr(new MockAllocator()); - } - -public: - -}; - class CompoundBlobTests : public ::testing::Test { protected: Blob::Ptr _test_blob; @@ -75,25 +48,6 @@ public: class NV12BlobTests : public CompoundBlobTests {}; class I420BlobTests : public CompoundBlobTests {}; -struct ScopedTimer -{ - chrono::high_resolution_clock::time_point t0; - function cb; - - ScopedTimer(function callback) - : t0(chrono::high_resolution_clock::now()) - , cb(callback) - { - } - ~ScopedTimer(void) - { - auto t1 = chrono::high_resolution_clock::now(); - auto milli = chrono::duration_cast(t1-t0).count(); - - cb((int)milli); - } -}; - TEST(BlobConversionTests, canWorkWithMemoryBlob) { Blob::Ptr blob = make_shared_blob(TensorDesc(Precision::U8, {1, 3, 4, 4}, NCHW)); ASSERT_TRUE(blob->is()); @@ -182,360 +136,6 @@ TEST(BlobConversionTests, blobSharesOwnershipOnCast) { ASSERT_EQ(stored_value, tblob->data()[0]); } -TEST_F(BlobTests, canCreateBlobUsingDefaultAllocator) -{ - SizeVector v = {1,2,3}; - auto allocator = createMockAllocator(); - - EXPECT_CALL(*allocator.get(), alloc(1 * 2 * 3 * sizeof(float))).WillRepeatedly(Return((void*)1)); - EXPECT_CALL(*allocator.get(), free(_)).Times(1); - - { - TBlob blob({ Precision::FP32, v, CHW }, dynamic_pointer_cast(allocator)); - blob.allocate(); - } -} - -TEST_F(BlobTests, secondAllocateWontMemLeak) { - SizeVector v = {1,2,3}; - auto allocator = createMockAllocator(); - - EXPECT_CALL(*allocator.get(), alloc(1 * 2 * 3 * sizeof(float))).Times(2).WillRepeatedly(Return((void*)1)); - EXPECT_CALL(*allocator.get(), free(_)).Times(2).WillRepeatedly(Return(true)); - - { - TBlob blob({ Precision::FP32, v, CHW }, dynamic_pointer_cast(allocator)); - blob.allocate(); - blob.allocate(); - } -} - - -TEST_F(BlobTests, doesNotUnlockIfLockFailed) -{ - SizeVector v = {1,2,3}; - auto allocator = createMockAllocator(); - - EXPECT_CALL(*allocator.get(), alloc(1 * 2 * 3 * sizeof(float))).WillRepeatedly(Return((void*)1)); - EXPECT_CALL(*allocator.get(), lock((void*)1,LOCK_FOR_WRITE)).Times(1); - EXPECT_CALL(*allocator.get(), free(_)).Times(1); - - TBlob blob({ Precision::FP32, v, CHW }, dynamic_pointer_cast(allocator)); - blob.allocate(); - { - float UNUSED *ptr = blob.data(); - } -} - -TEST_F(BlobTests, canAccessDataUsingAllocator) -{ - SizeVector v = {1,2,3}; - auto allocator = createMockAllocator(); - - float data[] = {5.f,6.f,7.f}; - - EXPECT_CALL(*allocator.get(), alloc(1 * 2 * 3 * sizeof(float))).WillRepeatedly(Return((void*)1)); - EXPECT_CALL(*allocator.get(), lock((void*)1, LOCK_FOR_WRITE)).WillRepeatedly(Return(data)); - EXPECT_CALL(*allocator.get(), unlock((void*)1)).Times(1); - EXPECT_CALL(*allocator.get(), free(_)).Times(1); - - TBlob blob({ Precision::FP32, v, CHW }, dynamic_pointer_cast(allocator)); - blob.allocate(); - { - float *ptr = blob.data(); - ASSERT_EQ(ptr[2] , 7); - } - -} - - -TEST_F(BlobTests, canLockReadOnlyDataForRead) -{ - SizeVector v = {1, 2, 3}; - auto allocator = createMockAllocator(); - - float data[] = {5,6,7}; - - EXPECT_CALL(*allocator.get(), alloc(1 * 2 * 3 * sizeof(float))).WillRepeatedly(Return((void*)1)); - EXPECT_CALL(*allocator.get(), lock(_,LOCK_FOR_READ)).WillRepeatedly(Return(data)); - EXPECT_CALL(*allocator.get(), free(_)).Times(1); - EXPECT_CALL(*allocator.get(), unlock((void*)1)).Times(1); - - TBlob blob({ Precision::FP32, v, CHW }, dynamic_pointer_cast(allocator)); - blob.allocate(); - - const float *ptr = blob.readOnly(); - ASSERT_EQ(ptr[2] , 7); -} - -TEST_F(BlobTests, canAccessDataUsingBufferBaseMethod) -{ - SizeVector v = {1, 2, 3}; - auto allocator = createMockAllocator(); - - float data[] = {5,6,7}; - - EXPECT_CALL(*allocator.get(), alloc(1 * 2 * 3 * sizeof(float))).WillRepeatedly(Return((void*)1)); - EXPECT_CALL(*allocator.get(), lock(_,LOCK_FOR_WRITE)).WillRepeatedly(Return(data)); - EXPECT_CALL(*allocator.get(), unlock((void*)1)).Times(1); - EXPECT_CALL(*allocator.get(), free(_)).Times(1); - - TBlob blob({ Precision::FP32, v, CHW }, dynamic_pointer_cast(allocator)); - blob.allocate(); - auto buffer = blob.rwmap(); - const float *ptr = buffer.as(); - ASSERT_EQ(ptr[2] , 7); -} - -TEST_F(BlobTests, canMoveFromTBlobWithSameType) -{ - SizeVector v = {1, 2, 3}; - auto allocator = createMockAllocator(); - - uint8_t data[] = {5,6}; - - EXPECT_CALL(*allocator.get(), alloc(1 * 2 * 3 * sizeof(uint8_t))).WillRepeatedly(Return((void*)1)); - EXPECT_CALL(*allocator.get(), lock(_,LOCK_FOR_WRITE)).WillRepeatedly(Return(data)); - EXPECT_CALL(*allocator.get(), unlock((void*)1)).Times(1); - EXPECT_CALL(*allocator.get(), free(_)).Times(1); - - TBlob blob({ Precision::U8, v, CHW }, dynamic_pointer_cast(allocator)); - blob.allocate(); - - TBlob newBlob(std::move(blob)); - - auto buffer = newBlob.rwmap(); - uint8_t *ptr = buffer.as (); - ASSERT_EQ(ptr[0] , data[0]); -} - -TEST_F(BlobTests, saveDimsAndSizeAfterMove) -{ - SizeVector v = {1, 2, 3}; - auto allocator = createMockAllocator(); - - TBlob blob({ Precision::U8, v, CHW }, dynamic_pointer_cast(allocator)); - - TBlob newBlob(std::move(blob)); - - ASSERT_EQ(newBlob.size(), 1 * 2 * 3); - ASSERT_EQ(newBlob.getTensorDesc().getDims()[0], 1); - ASSERT_EQ(newBlob.getTensorDesc().getDims()[1], 2); - ASSERT_EQ(newBlob.getTensorDesc().getDims()[2], 3); -} - -TEST_F(BlobTests, canCopyBlob) -{ - SizeVector v = {1, 3}; - TBlob blob({ Precision::U8, v, HW }); - blob.allocate(); - blob.data()[0] = 1; - blob.data()[1] = 2; - blob.data()[2] = 3; - - TBlob blob2(blob); - - ASSERT_EQ(blob2.getTensorDesc().getDims().size(), blob.getTensorDesc().getDims().size()); - ASSERT_EQ(blob2.getTensorDesc().getDims()[0], blob.getTensorDesc().getDims()[0]); - ASSERT_EQ(blob2.getTensorDesc().getDims()[1], blob.getTensorDesc().getDims()[1]); - ASSERT_EQ(blob2.size(), blob.size()); - ASSERT_EQ(blob2.data()[0], blob.data()[0]); - ASSERT_EQ(blob2.data()[1], blob.data()[1]); - ASSERT_EQ(blob2.data()[2], blob.data()[2]); -} - -TEST_F(BlobTests, canCompareToNullPtrWithoutDereferencing) { - SizeVector v = {1, 2, 3}; - auto allocator = createMockAllocator(); - - TBlob blob({ Precision::U8, v, CHW }, dynamic_pointer_cast(allocator)); - - ASSERT_TRUE(blob.readOnly() == nullptr); - ASSERT_TRUE(blob.data() == nullptr); - ASSERT_TRUE(blob.rwmap() == nullptr); - - ASSERT_TRUE(nullptr == blob.readOnly()); - ASSERT_TRUE(nullptr == blob.data()); - ASSERT_TRUE(nullptr == blob.rwmap()); -} - -TEST_F(BlobTests, canCreateBlob) { - InferenceEngine::SizeVector size = { 1, 1, 1 }; - InferenceEngine::TBlob blob({ Precision::FP32, size, CHW }); - ASSERT_NE(blob.size(), 0); - ASSERT_EQ(blob.rwmap(), nullptr); -} - -TEST_F(BlobTests, canAllocateBlob) { - InferenceEngine::SizeVector size = { 1, 1, 1 }; - InferenceEngine::TBlob blob({ Precision::FP32, size, CHW }); - blob.allocate(); - float* buffer = static_cast(blob.data()); - ASSERT_NE(buffer, nullptr); -} - -TEST_F(BlobTests, canDeallocateBlob) { - InferenceEngine::SizeVector size = { 1, 1, 1 }; - InferenceEngine::TBlob blob({ Precision::FP32, size, CHW }); - blob.allocate(); - blob.deallocate(); - ASSERT_EQ(nullptr, blob.data().as()); -} - -TEST_F(BlobTests, canCreateBlobWithoutDims) { - InferenceEngine::TBlob blob(TensorDesc(Precision::FP32, NCHW)); - ASSERT_EQ(blob.getTensorDesc().getDims().size(), 0); -} - -TEST_F(BlobTests, canReadDataFromConstBlob) { - InferenceEngine::TBlob blob({ Precision::FP32, { 1, 1, 1 }, CHW }); - blob.allocate(); - blob.data()[0] = 1.0f; - InferenceEngine::TBlob const blob2 = blob; - const float* buf = blob2.readOnly(); - ASSERT_NE(buf, nullptr); -} - -TEST_F(BlobTests, canMakeSharedBlob) { - InferenceEngine::SizeVector size = { 1, 1, 1 }; - InferenceEngine::TBlob::Ptr blob1 = InferenceEngine::make_shared_blob(TensorDesc(Precision::FP32, NCHW)); - InferenceEngine::TBlob::Ptr blob2 = InferenceEngine::make_shared_blob({ Precision::FP32, size, CHW }); - InferenceEngine::TBlob::Ptr blob3 - = InferenceEngine::make_shared_blob({ Precision::FP32, { 0 }, C }); - ASSERT_EQ(blob1->size(), 0); - ASSERT_EQ(blob2->size(), 1); - ASSERT_EQ(blob3->size(), 0); -} - -TEST_F(BlobTests, cannotCreateBlobWithIncorrectPrecision) { - InferenceEngine::TensorDesc desc(InferenceEngine::Precision::FP16, {1, 3, 227, 227}, Layout::NCHW); - ASSERT_THROW(InferenceEngine::make_shared_blob(desc), InferenceEngine::details::InferenceEngineException); -} - -TEST_F(BlobTests, canUseBlobInMoveSemantics) { - - TBlob b(TensorDesc(Precision::FP32, C)); - - b.getTensorDesc().setDims({3}); - b.allocate(); - b.data()[0] = 1.0f; - b.data()[1] = 2.0f; - b.data()[2] = 3.0f; - - std::vector dump; - - for (const auto & e: b) { - dump.push_back(e); - } - - ASSERT_EQ(dump.size(), 3); - - ASSERT_EQ(dump[0], 1.0f); - ASSERT_EQ(dump[1], 2.0f); - ASSERT_EQ(dump[2], 3.0f); - -} - -TEST_F(BlobTests, DISABLED_canUseLockedMemoryAsRvalueReference) { - - std::vector dump; - std::vector v({1.0f, 2.0f, 3.0f}); - for (auto e: *make_shared_blob(TensorDesc(Precision::FP32, C), &v[0], v.size())) { - dump.push_back(e); - } - - ASSERT_EQ(dump.size(), 3); - - ASSERT_EQ(dump[0], 1.0f); - ASSERT_EQ(dump[1], 2.0f); - ASSERT_EQ(dump[2], 3.0f); -} - -TEST_F(BlobTests, canCreateBlobOnExistedMemory) { - - float input[] = {0.1f, 0.2f, 0.3f}; - { - auto b = make_shared_blob(TensorDesc(Precision::FP32, {1, 2}, HW), input); - auto i = b->begin(); - ASSERT_NEAR(*i, 0.1, 0.00001); - i++; - ASSERT_NEAR(*i, 0.2, 0.00001); - i++; - ASSERT_EQ(i, b->end()); - - ASSERT_EQ(&*b->begin(), input); - } -} - - -TEST_F(BlobTests, canModifyDataInRangedFor) { - - SizeVector v = {1,2,3}; - TBlob blob({ Precision::I32, v, CHW }); - blob.allocate(); - - for (auto & data : blob) { - data = 5; - } - - for(int i=0;i(TensorDesc(Precision::U8, dims, NCHW)); - blob->allocate(); - - // create ROI blob based on the already created blob - ROI roi = {0, 2, 1, 2, 4}; // cropped picture with: id = 0, (x,y) = (2,1), sizeX (W) = 2, sizeY (H) = 4 - Blob::Ptr roiBlob = make_shared_blob(blob, roi); - - // check that BlockingDesc is constructed properly for the ROI blob - SizeVector refDims = {1, 3, 4, 2}; - SizeVector refOrder = {0, 1, 2, 3}; - size_t refOffset = 7; - SizeVector refStrides = {90, 30, 5, 1}; - ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getBlockDims(), refDims); - ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getOrder(), refOrder); - ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getOffsetPadding(), refOffset); - ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getStrides(), refStrides); -} - -TEST_F(BlobTests, makeRoiBlobNhwc) { - // we create main blob with NHWC layout. We will crop ROI from this blob. - SizeVector dims = {1, 3, 4, 8}; // RGB picture of size (WxH) = 8x4 - Blob::Ptr blob = make_shared_blob(TensorDesc(Precision::U8, dims, NHWC)); - blob->allocate(); - - // create ROI blob based on the already created blob - ROI roi = {0, 3, 2, 5, 2}; // cropped picture with: id = 0, (x,y) = (3,2), sizeX (W) = 5, sizeY (H) = 2 - Blob::Ptr roiBlob = make_shared_blob(blob, roi); - - // check that BlockingDesc is constructed properly for the ROI blob - SizeVector refDims = {1, 2, 5, 3}; - SizeVector refOrder = {0, 2, 3, 1}; - size_t refOffset = 57; - SizeVector refStrides = {96, 24, 3, 1}; - ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getBlockDims(), refDims); - ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getOrder(), refOrder); - ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getOffsetPadding(), refOffset); - ASSERT_EQ(roiBlob->getTensorDesc().getBlockingDesc().getStrides(), refStrides); -} - -TEST_F(BlobTests, makeRoiBlobWrongSize) { - // we create main blob with NCHW layout. We will crop ROI from this blob. - SizeVector dims = {1, 3, 4, 4}; // RGB picture of size (WxH) = 4x4 - Blob::Ptr blob = make_shared_blob(TensorDesc(Precision::U8, dims, NCHW)); - blob->allocate(); - - // try to create ROI blob with wrong size - ROI roi = {0, 1, 1, 4, 4}; // cropped picture with: id = 0, (x,y) = (1,1), sizeX (W) = 4, sizeY (H) = 4 - ASSERT_THROW(make_shared_blob(blob, roi), InferenceEngine::details::InferenceEngineException); -} - TEST_F(CompoundBlobTests, cannotCreateCompoundBlobFromNullptr) { Blob::Ptr valid = make_shared_blob(TensorDesc(Precision::U8, {1, 3, 4, 4}, NCHW)); EXPECT_THROW(make_shared_blob(std::vector({valid, nullptr})), @@ -769,8 +369,8 @@ TEST_F(I420BlobTests, cannotCreateI420BlobFromCompoundBlobs) { auto c_v_blob = make_cblob(v_blob); using ie_exception_t = InferenceEngine::details::InferenceEngineException; - EXPECT_THROW(make_shared_blob(c_y_blob, u_blob, v_blob ), ie_exception_t); - EXPECT_THROW(make_shared_blob(y_blob, c_u_blob, v_blob ), ie_exception_t); + EXPECT_THROW(make_shared_blob(c_y_blob, u_blob, v_blob), ie_exception_t); + EXPECT_THROW(make_shared_blob(y_blob, c_u_blob, v_blob), ie_exception_t); EXPECT_THROW(make_shared_blob(y_blob, u_blob, c_v_blob), ie_exception_t); } @@ -804,7 +404,6 @@ TEST_F(I420BlobTests, cannotCreateI420BlobFromPlanesWithWrongChannelNumber) { Blob::Ptr v_blob = make_shared_blob(TensorDesc(Precision::U8, {1, 1, 3, 4}, NHWC)); EXPECT_THROW(make_shared_blob(y_blob, u_blob, v_blob), InferenceEngine::details::InferenceEngineException); EXPECT_THROW(make_shared_blob(y_blob, v_blob, u_blob), InferenceEngine::details::InferenceEngineException); - } TEST_F(I420BlobTests, cannotCreateI420BlobFromPlanesWithWrongWidthRatio) { diff --git a/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_set_preprocess.cpp b/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_set_preprocess.cpp deleted file mode 100644 index 14d5d0339fc..00000000000 --- a/inference-engine/tests_deprecated/behavior/cldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_set_preprocess.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (C) 2018-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "behavior_test_plugin_set_preprocess.hpp" -#include "cldnn_test_data.hpp" - -INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, - BehaviorPluginTestPreProcess, - ValuesIn(supportedValues), - getTestCaseName); diff --git a/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_set_preprocess.cpp b/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_set_preprocess.cpp deleted file mode 100644 index 17369e12bf3..00000000000 --- a/inference-engine/tests_deprecated/behavior/mkldnn/shared_tests_instances/plugin_tests/behavior_test_plugin_set_preprocess.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (C) 2018-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "behavior_test_plugin_set_preprocess.hpp" -#include "mkldnn_test_data.hpp" - -INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, - BehaviorPluginTestPreProcess, - ValuesIn(requestsSupportedValues), - getTestCaseName); diff --git a/inference-engine/tests_deprecated/behavior/shared_tests/plugin_tests/behavior_test_plugin_set_preprocess.hpp b/inference-engine/tests_deprecated/behavior/shared_tests/plugin_tests/behavior_test_plugin_set_preprocess.hpp deleted file mode 100644 index 0881e51dd57..00000000000 --- a/inference-engine/tests_deprecated/behavior/shared_tests/plugin_tests/behavior_test_plugin_set_preprocess.hpp +++ /dev/null @@ -1,74 +0,0 @@ -// Copyright (C) 2018-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "behavior_test_plugin.h" - -using namespace std; -using namespace ::testing; -using namespace InferenceEngine; -using namespace InferenceEngine::details; - -namespace { - std::string getTestCaseName(testing::TestParamInfo obj) { - return obj.param.device + "_" + obj.param.input_blob_precision.name() - + (obj.param.config.size() ? "_" + obj.param.config.begin()->second : ""); - } -} - -TEST_P(BehaviorPluginTestPreProcess, SetPreProcessToInputInfo) { - InferenceEngine::Core core; - - CNNNetwork cnnNetwork = core.ReadNetwork(GetParam().model_xml_str, GetParam().weights_blob); - - auto &preProcess = cnnNetwork.getInputsInfo().begin()->second->getPreProcess(); - preProcess.setResizeAlgorithm(ResizeAlgorithm::RESIZE_BILINEAR); - - InferenceEngine::IExecutableNetwork::Ptr exeNetwork; - ASSERT_NO_THROW(exeNetwork = core.LoadNetwork(cnnNetwork, GetParam().device, GetParam().config)); - - IInferRequest::Ptr inferRequest; - ASSERT_EQ(StatusCode::OK, exeNetwork->CreateInferRequest(inferRequest, &response)); - - { - ConstInputsDataMap inputsMap; - ASSERT_EQ(StatusCode::OK, exeNetwork->GetInputsInfo(inputsMap, &response)); - const auto& name = inputsMap.begin()->second->name(); - const PreProcessInfo *info; - inferRequest->GetPreProcess(name.c_str(), &info, &response); - - ASSERT_EQ(info->getResizeAlgorithm(), ResizeAlgorithm::RESIZE_BILINEAR); - ASSERT_PREPROCESS_INFO_EQ(preProcess, *info); - } -} - -TEST_P(BehaviorPluginTestPreProcess, SetPreProcessToInferRequest) { - TestEnv::Ptr testEnv; - ASSERT_NO_FATAL_FAILURE(_createAndCheckInferRequest(GetParam(), testEnv)); - ResponseDesc response; - - auto& request = testEnv->inferRequest; - PreProcessInfo preProcessInfo; - preProcessInfo.setResizeAlgorithm(ResizeAlgorithm::RESIZE_BILINEAR); - - IInferRequest::Ptr untouched_request = testEnv->exeNetwork.CreateInferRequest(); - - ConstInputsDataMap inputs = testEnv->exeNetwork.GetInputsInfo(); - auto input_name = inputs.begin()->second->name(); - auto inputBlob = prepareInputBlob(GetParam().input_blob_precision, testEnv->inputDims); - - ASSERT_EQ(StatusCode::OK, request->SetBlob(input_name.c_str(), inputBlob, preProcessInfo, &response)); - - { - const PreProcessInfo *info = nullptr; - ASSERT_EQ(StatusCode::OK, request->GetPreProcess(input_name.c_str(), &info, &response)); - ASSERT_EQ(info->getResizeAlgorithm(), ResizeAlgorithm::RESIZE_BILINEAR); - ASSERT_PREPROCESS_INFO_EQ(preProcessInfo, *info); - } - - { - const PreProcessInfo *info = nullptr; - ASSERT_EQ(StatusCode::OK, untouched_request->GetPreProcess(input_name.c_str(), &info, &response)); - ASSERT_EQ(testEnv->network.getInputsInfo()[input_name]->getPreProcess().getResizeAlgorithm(),info->getResizeAlgorithm()); - } -} diff --git a/inference-engine/tests_deprecated/behavior/vpu/myriad_tests/aot_behavior_tests.cpp b/inference-engine/tests_deprecated/behavior/vpu/myriad_tests/aot_behavior_tests.cpp index 40eb1684d49..8740cd52cee 100644 --- a/inference-engine/tests_deprecated/behavior/vpu/myriad_tests/aot_behavior_tests.cpp +++ b/inference-engine/tests_deprecated/behavior/vpu/myriad_tests/aot_behavior_tests.cpp @@ -38,6 +38,7 @@ extern "C" void initialize_usb_boot(); class AOTBehaviorTests : public BehaviorPluginTest { public: + WatchdogHndl_t* m_watchdogHndl = nullptr; typedef std::chrono::high_resolution_clock Time; typedef std::chrono::milliseconds ms; @@ -48,6 +49,12 @@ class AOTBehaviorTests : public BehaviorPluginTest { void SetUp() override { initialize_usb_boot(); + + ASSERT_EQ(WD_ERRNO, watchdog_create(&m_watchdogHndl)); + } + + void TearDown() override { + watchdog_destroy(m_watchdogHndl); } void dumpBlob() { @@ -136,10 +143,15 @@ class AOTBehaviorTests : public BehaviorPluginTest { deviceDesc.protocol = NC_ANY_PROTOCOL; deviceDesc.platform = NC_ANY_PLATFORM; - statusOpen = ncDeviceOpen(&device, deviceDesc, 1000, pathToFw); + ncDeviceOpenParams_t deviceOpenParams = {}; + deviceOpenParams.watchdogHndl = m_watchdogHndl; + deviceOpenParams.watchdogInterval = 1000; + deviceOpenParams.customFirmwareDirectory = pathToFw; + + statusOpen = ncDeviceOpen(&device, deviceDesc, deviceOpenParams); if (statusOpen != NC_OK) { - ncDeviceClose(&device); + ncDeviceClose(&device, m_watchdogHndl); return false; } @@ -174,7 +186,7 @@ TEST_P(AOTBehaviorTests, canLoadGraphWithoutPlugin) { sizeof(ElfN_Ehdr) + sizeof(mv_blob_header)); ncGraphDestroy(&graphHandle); - ncDeviceClose(&device); + ncDeviceClose(&device, m_watchdogHndl); ASSERT_EQ(NC_OK, res); } @@ -195,7 +207,7 @@ TEST_P(AOTBehaviorTests, deviceSideErrorImportingIfVersionIncorrect) { sizeof(ElfN_Ehdr) + sizeof(mv_blob_header)); ncGraphDestroy(&graphHandle); - ncDeviceClose(&device); + ncDeviceClose(&device, m_watchdogHndl); ASSERT_NE(NC_OK, res); } diff --git a/inference-engine/tests_deprecated/behavior/vpu/myriad_tests/vpu_watchdog_tests.cpp b/inference-engine/tests_deprecated/behavior/vpu/myriad_tests/vpu_watchdog_tests.cpp index eec34b12bbf..8340d2a9065 100644 --- a/inference-engine/tests_deprecated/behavior/vpu/myriad_tests/vpu_watchdog_tests.cpp +++ b/inference-engine/tests_deprecated/behavior/vpu/myriad_tests/vpu_watchdog_tests.cpp @@ -37,11 +37,18 @@ extern "C" void initialize_usb_boot(); class MYRIADWatchdog : public BehaviorPluginTest, public MyriadDevicesInfo { public: + WatchdogHndl_t* m_watchdogHndl = nullptr; typedef std::chrono::high_resolution_clock Time; typedef std::chrono::milliseconds ms; void SetUp() override { initialize_usb_boot(); + + ASSERT_EQ(WD_ERRNO, watchdog_create(&m_watchdogHndl)); + } + + void TearDown() override { + watchdog_destroy(m_watchdogHndl); } struct DevicesState { @@ -59,7 +66,7 @@ class MYRIADWatchdog : public BehaviorPluginTest, ncDeviceHandle_t *device = nullptr; void resetOneDevice() { - ncDeviceClose(&device); + ncDeviceClose(&device, m_watchdogHndl); device = nullptr; } @@ -77,10 +84,15 @@ class MYRIADWatchdog : public BehaviorPluginTest, deviceDesc.protocol = NC_ANY_PROTOCOL; deviceDesc.platform = NC_ANY_PLATFORM; - statusOpen = ncDeviceOpen(&device, deviceDesc, watchdogInterval, pathToFw); + ncDeviceOpenParams_t deviceOpenParams = {}; + deviceOpenParams.watchdogHndl = m_watchdogHndl; + deviceOpenParams.watchdogInterval = watchdogInterval; + deviceOpenParams.customFirmwareDirectory = pathToFw; + + statusOpen = ncDeviceOpen(&device, deviceDesc, deviceOpenParams); if (statusOpen != NC_OK) { - ncDeviceClose(&device); + ncDeviceClose(&device, m_watchdogHndl); } } }; @@ -175,7 +187,7 @@ TEST_P(MYRIADWatchdog, watchDogIntervalDefault) { ExecutableNetwork ret; ctime = Time::now(); ASSERT_THROW(ret = core.LoadNetwork(network, GetParam().device, { - {KEY_LOG_LEVEL, LOG_DEBUG}}), + {KEY_LOG_LEVEL, LOG_INFO}}), InferenceEngine::details::InferenceEngineException); ASSERT_BOOTED_DEVICES_ONE_MORE(); @@ -208,7 +220,7 @@ TEST_P(MYRIADWatchdog, canTurnoffWatchDogViaConfig) { ExecutableNetwork ret; ctime = Time::now(); ASSERT_THROW(ret = core.LoadNetwork(network, GetParam().device, { - {KEY_LOG_LEVEL, LOG_DEBUG}, + {KEY_LOG_LEVEL, LOG_INFO}, {KEY_VPU_MYRIAD_WATCHDOG, NO}}), InferenceEngine::details::InferenceEngineException); diff --git a/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_set_preprocess.cpp b/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_set_preprocess.cpp deleted file mode 100644 index 8fa9bbbd09f..00000000000 --- a/inference-engine/tests_deprecated/behavior/vpu/shared_tests_instances/plugin_tests/behavior_test_plugin_set_preprocess.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (C) 2018-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "behavior_test_plugin_set_preprocess.hpp" -#include "vpu_test_data.hpp" - -INSTANTIATE_TEST_CASE_P(smoke_BehaviorTest, - BehaviorPluginTestPreProcess, - ValuesIn(supportedValues), - getTestCaseName); diff --git a/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/network_tests/network_test.cpp b/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/network_tests/network_test.cpp index 6930beb4039..7cd7e473138 100644 --- a/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/network_tests/network_test.cpp +++ b/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/network_tests/network_test.cpp @@ -15,18 +15,11 @@ #include "network_i8.hpp" -#define XBYAK_NO_OP_NAMES -#define XBYAK_UNDEF_JNL -#include "../../../../thirdparty/mkl-dnn/src/cpu/xbyak/xbyak_util.h" - /************************************************* * !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! * All ref values was obtained from Caffe scoring * !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! *************************************************/ -#ifndef ENABLE_MKL_DNN -#include "disable_tests.hpp" -#endif TEST_P(ModelTransformationsTest, LPT) {} diff --git a/inference-engine/tests_deprecated/functional/shared_tests/network_tests/network_i8.hpp b/inference-engine/tests_deprecated/functional/shared_tests/network_tests/network_i8.hpp index af91077932d..f4ff8e9c498 100644 --- a/inference-engine/tests_deprecated/functional/shared_tests/network_tests/network_i8.hpp +++ b/inference-engine/tests_deprecated/functional/shared_tests/network_tests/network_i8.hpp @@ -24,10 +24,7 @@ #include "ie_util_internal.hpp" #include "cnn_network_ngraph_impl.hpp" - -#define XBYAK_NO_OP_NAMES -#define XBYAK_UNDEF_JNL -#include "../../../../thirdparty/mkl-dnn/src/cpu/xbyak/xbyak_util.h" +#include using namespace ::testing; using namespace InferenceEngine; @@ -512,7 +509,7 @@ protected: if (transformationsParam.modelParams.referenceOutputDataWithTransformations.size() == 1) { referenceValues = transformationsParam.modelParams.referenceOutputDataWithTransformations[0]; } else { - referenceValues = Xbyak::util::Cpu().has(Xbyak::util::Cpu::tAVX512F) ? + referenceValues = InferenceEngine::with_cpu_x86_avx512f() ? transformationsParam.modelParams.referenceOutputDataWithTransformations[1] : transformationsParam.modelParams.referenceOutputDataWithTransformations[0]; } @@ -520,7 +517,7 @@ protected: if (transformationsParam.modelParams.referenceOutputDataWithoutTransformations.size() == 1) { referenceValues = transformationsParam.modelParams.referenceOutputDataWithoutTransformations[0]; } else { - referenceValues = Xbyak::util::Cpu().has(Xbyak::util::Cpu::tAVX512F) ? + referenceValues = InferenceEngine::with_cpu_x86_avx512f() ? transformationsParam.modelParams.referenceOutputDataWithoutTransformations[1] : transformationsParam.modelParams.referenceOutputDataWithoutTransformations[0]; } diff --git a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_strided_slice_test.h b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_strided_slice_test.h index 4f324e979cd..b4e8fc2d531 100644 --- a/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_strided_slice_test.h +++ b/inference-engine/tests_deprecated/functional/vpu/common/layers/myriad_layers_strided_slice_test.h @@ -26,7 +26,7 @@ struct strided_slice_test_param { }; class myriadLayersTestsStridedSlice_smoke: public myriadLayersTests_nightly, - public testing::WithParamInterface { + public testing::WithParamInterface { public: std::string model_t = R"V0G0N( @@ -241,7 +241,7 @@ TEST_P(myriadLayersTestsStridedSlice_smoke, TestsStridedSlice) { // Load network. StatusCode st = GENERAL_ERROR; ASSERT_NO_THROW(st = _vpuPluginPtr->LoadNetwork( - _exeNetwork, network, { {VPU_CONFIG_KEY(PERF_REPORT_MODE), VPU_CONFIG_VALUE(PER_STAGE)} }, + _exeNetwork, network, { {VPU_CONFIG_KEY(DETECT_NETWORK_BATCH), CONFIG_VALUE(NO)} }, &_resp)); ASSERT_EQ(StatusCode::OK, st) << _resp.msg; ASSERT_NE(_exeNetwork, nullptr) << _resp.msg; @@ -297,4 +297,8 @@ static std::vector s_stridedSliceParams = { strided_slice_test_param{ { 2, 8, 32, 32}, 4, { 0, 0, 0, 2 }, { 2, 8, 32, 32 }, { 1, 1, 1, 3 }, {}, {}, {}, {}, {}, { 2, 8, 32, 10 } }, strided_slice_test_param{ { 1, 32, 128, 128 }, 4, {0, 0, 0, 0 }, { 1, 32, 128, 128 }, { 1, 2, 4, 8 }, {}, {}, {}, {}, {}, { 1, 16, 32, 16 } }, strided_slice_test_param{ { 1, 32, 128, 128 }, 4, {0, 16, 0, 0 }, { 1, 32, 128, 128 }, {}, {}, {}, {}, {}, {}, { 1, 16, 128, 128 } }, + + strided_slice_test_param{ { 4, 1000 }, 2, { 0, 0 }, { 4, 9999 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 4, 1000 } }, + strided_slice_test_param{ { 4, 1000 }, 2, { 0, 0 }, { 4, -1 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 4, 1000 } }, + strided_slice_test_param{ { 4, 1000 }, 2, { 0, 0 }, { 4, -3 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 4, 998 } }, }; diff --git a/inference-engine/tests_deprecated/functional/vpu/vpu_base/myriad_layers_reference_functions.cpp b/inference-engine/tests_deprecated/functional/vpu/vpu_base/myriad_layers_reference_functions.cpp index 1aced5f00bb..16ea2aaee5c 100644 --- a/inference-engine/tests_deprecated/functional/vpu/vpu_base/myriad_layers_reference_functions.cpp +++ b/inference-engine/tests_deprecated/functional/vpu/vpu_base/myriad_layers_reference_functions.cpp @@ -1445,22 +1445,29 @@ void ref_strided_slice(const InferenceEngine::Blob::Ptr& src, _begin_mask.insert(_begin_mask.end(), num_dims - _begin_mask.size(), 1); _end_mask.insert(_end_mask.end(), num_dims - _end_mask.size(), 1); - auto clip = [](int value, int min, int max) { - return std::min(std::max(min, value), max); + const auto numpyIdxVectorToIdxVector = [&num_dims, &src_dims](const std::vector& values) { + std::vector convertedDims(num_dims); + for (size_t i = 0; i < num_dims; i++) { + auto value = values[i]; + if (value < 0) { + value = std::max(src_dims[i] + value + 1, 0); + } + value = std::min(src_dims[i], value); + convertedDims[i] = value; + } + + return convertedDims; }; - auto begin_dms = begin; - auto end_dms = end; + auto begin_dms = numpyIdxVectorToIdxVector(begin); + auto end_dms = numpyIdxVectorToIdxVector(end); for (size_t i = 0; i < num_dims; i++) { IE_ASSERT(_begin_mask[i] == 1 || _begin_mask[i] == 0); IE_ASSERT(_end_mask[i] == 1 || _end_mask[i] == 0); - begin_dms[i] = _begin_mask[i] ? begin[i] : 0; - begin_dms[i] = clip(begin_dms[i], 0, src_dims[i]); - - end_dms[i] = _end_mask[i] ? end[i] : src_dims[i]; - end_dms[i] = clip(end_dms[i], 0, src_dims[i]); + begin_dms[i] = _begin_mask[i] ? begin_dms[i] : 0; + end_dms[i] = _end_mask[i] ? end_dms[i] : src_dims[i]; IE_ASSERT(begin_dms[i] >= 0 && begin_dms[i] < end_dms[i]); IE_ASSERT(end_dms[i] <= src_dims[i]); diff --git a/inference-engine/tests_deprecated/unit/CMakeLists.txt b/inference-engine/tests_deprecated/unit/CMakeLists.txt index 0f6a70005f2..9f614a276c8 100644 --- a/inference-engine/tests_deprecated/unit/CMakeLists.txt +++ b/inference-engine/tests_deprecated/unit/CMakeLists.txt @@ -75,8 +75,6 @@ if (ENABLE_MKL_DNN) MKLDNN_TESTS_INCLUDE engines/mkldnn/graph/*.hpp) include_directories( ${IE_MAIN_SOURCE_DIR}/thirdparty/mkl-dnn/include - ${IE_MAIN_SOURCE_DIR}/thirdparty/mkl-dnn/src/common - ${IE_MAIN_SOURCE_DIR}/thirdparty/mkl-dnn/src/cpu engines/mkldnn/graph ${CMAKE_BINARY_DIR}/include/) diff --git a/inference-engine/tests_deprecated/unit/engines/gna/fp32_non_quantized_tests.cpp b/inference-engine/tests_deprecated/unit/engines/gna/fp32_non_quantized_tests.cpp index 53947cd87da..d4bdd1492f9 100644 --- a/inference-engine/tests_deprecated/unit/engines/gna/fp32_non_quantized_tests.cpp +++ b/inference-engine/tests_deprecated/unit/engines/gna/fp32_non_quantized_tests.cpp @@ -193,6 +193,17 @@ TEST_F(FP32NonQuantizedTest, CropWithOffsetPropagateForwardWithSuccessOnCPU) { .called_with_input_and_expected_output(input_data, expected_result); } +TEST_F(FP32NonQuantizedTest, CropWithOffsetAndSecondDimPropagateForwardWithSuccessOnCPU) { + std::vector input_data = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, + 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0}; + std::vector expected_result = {7.0, 7.0, 7.0, 7.0, 7.0, + 7.0, 7.0, 7.0, 7.0, 7.0}; + + assert_that().onInferModel(cropWithOffsetAndSecondDimModel()) + .inNotCompactMode().gna().propagate_forward().onCPU() + .called_with_input_and_expected_output(input_data, expected_result); +} + TEST_F(FP32NonQuantizedTest, CropWithMaxOffsetPropagateForwardWithSuccessOnCPU) { std::vector input_data = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; @@ -368,7 +379,8 @@ TEST_F(FP32NonQuantizedTest, TI1PropagateForwardWithoutScaleShift) { .called_with_input(input_data).equals_to(expected_result1).equals_to(expected_result2); } -TEST_F(FP32NonQuantizedTest, TI1AlignedPropagateForward) { +// DISABLED DUE TO (31901) +TEST_F(FP32NonQuantizedTest, DISABLED_TI1AlignedPropagateForward) { std::vector input_data(32, 0.1f); std::vector expected_result1(32, 0.25883245); std::vector expected_result2(12, 0.59515548f); @@ -378,7 +390,8 @@ TEST_F(FP32NonQuantizedTest, TI1AlignedPropagateForward) { .called_with_input(input_data).equals_to(expected_result1).And().equals_to(expected_result2); } -TEST_F(FP32NonQuantizedTest, TI3AlignedPropagateForward) { +// DISABLED DUE TO (31901) +TEST_F(FP32NonQuantizedTest, DISABLED_TI3AlignedPropagateForward) { std::vector input_data(96, 0.1f); std::vector expected_result1(32, 0.42592844f); std::vector expected_result2(12, 0.97069889f); diff --git a/inference-engine/tests_deprecated/unit/engines/gna/test_irs.cpp b/inference-engine/tests_deprecated/unit/engines/gna/test_irs.cpp index f02c111766c..f34fe2668a1 100644 --- a/inference-engine/tests_deprecated/unit/engines/gna/test_irs.cpp +++ b/inference-engine/tests_deprecated/unit/engines/gna/test_irs.cpp @@ -2980,6 +2980,60 @@ std::string cropWithOffsetModel() { )V0G0N"; } +std::string cropWithOffsetAndSecondDimModel() { + return R"V0G0N( + + + + + + 1 + 20 + + + + + + + + 1 + 20 + + + + + 1 + 10 + + + + + + + + + + 1 + 10 + + + + + 1 + 10 + + + + + + + + + +)V0G0N"; +} + + std::string cropWithMaxOffsetModel() { return R"V0G0N( @@ -4734,10 +4788,10 @@ std::string TIModelWithLSTMCell1WithoutScaleShift() { 32 - - - - + + + + @@ -4868,7 +4922,7 @@ std::string TIModelWithLSTMCell1WithoutScaleShift() { - + diff --git a/inference-engine/tests_deprecated/unit/engines/gna/test_irs.hpp b/inference-engine/tests_deprecated/unit/engines/gna/test_irs.hpp index 5d52e1a8ccb..bd0c995903e 100644 --- a/inference-engine/tests_deprecated/unit/engines/gna/test_irs.hpp +++ b/inference-engine/tests_deprecated/unit/engines/gna/test_irs.hpp @@ -43,6 +43,7 @@ std::string TFSoftsignUnfoldedModel(); std::string cropWithoutOffsetModel(); std::string cropWithAlignedOffsetModel(); std::string cropWithOffsetModel(); +std::string cropWithOffsetAndSecondDimModel(); std::string cropWithMaxOffsetModel(); std::string cropWithOffsetExtendedModel(); std::string twoCropsModel(); diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/constant_propagation_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/constant_propagation_test.cpp index 99d1212ef17..9c91c9cdc64 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/constant_propagation_test.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/constant_propagation_test.cpp @@ -2,14 +2,12 @@ // SPDX-License-Identifier: Apache-2.0 // -#include #include #include #include #include -#include -#include #include "graph/test_graph.hpp" +#include using namespace ::testing; diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/broadcast_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/broadcast_tests.cpp index 7e986aba103..f4c20b088ef 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/broadcast_tests.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/broadcast_tests.cpp @@ -2,10 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" #include "single_layer_common.hpp" diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/bucketize_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/bucketize_tests.cpp index b12376aa96e..3f0a9a9f6c8 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/bucketize_tests.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/bucketize_tests.cpp @@ -2,15 +2,10 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" #include #include "single_layer_common.hpp" -#include #include "tests_common.hpp" #include diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/depth_to_space_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/depth_to_space_tests.cpp index 18703e86867..5cbfc981d49 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/depth_to_space_tests.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/depth_to_space_tests.cpp @@ -2,14 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" #include "single_layer_common.hpp" -#include #include "tests_common.hpp" #include diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/fake_layer.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/fake_layer.cpp index df2d3c5e247..cd1839f5438 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/fake_layer.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/fake_layer.cpp @@ -7,7 +7,6 @@ #include #include #include -#include "nodes/list.hpp" #include "nodes/base.hpp" using namespace InferenceEngine; @@ -92,7 +91,7 @@ class FakeExtensions : public IExtension { } }; - class FakeLayerPLNImpl: public Cpu::ExtLayerBase { +class FakeLayerPLNImpl: public Cpu::ExtLayerBase { public: explicit FakeLayerPLNImpl(const CNNLayer* layer) { try { diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/fill_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/fill_tests.cpp index d2d5bc9e24e..944f724251c 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/fill_tests.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/fill_tests.cpp @@ -2,14 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" #include "single_layer_common.hpp" -#include #include "tests_common.hpp" #include diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/gather_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/gather_tests.cpp index bb5e607a8d0..68d28f26706 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/gather_tests.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/gather_tests.cpp @@ -2,10 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" #include "single_layer_common.hpp" diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/graph_generic_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/graph_generic_test.cpp index 4f4b00c9dc5..132bbbcf705 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/graph_generic_test.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/graph_generic_test.cpp @@ -2,15 +2,11 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" #include #include #include -#include #include "tests_common.hpp" #include "unit_test_utils/mocks/mock_error_listener.hpp" diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/interp_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/interp_tests.cpp index 9d5c1902abd..e7e900a7d01 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/interp_tests.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/interp_tests.cpp @@ -2,14 +2,10 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "mkldnn_graph.h" #include "test_graph.hpp" #include "single_layer_common.hpp" -#include #include "tests_common.hpp" #include diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/log_softmax_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/log_softmax_tests.cpp index 32bf55eae13..4a103227b7a 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/log_softmax_tests.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/log_softmax_tests.cpp @@ -2,14 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" #include "single_layer_common.hpp" -#include #include "tests_common.hpp" #include diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/math_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/math_tests.cpp index 570abc94a0e..930fea530ce 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/math_tests.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/math_tests.cpp @@ -2,13 +2,10 @@ // SPDX-License-Identifier: Apache-2.0 // - -#include "common_test_utils/data_utils.hpp" -#include "mkldnn_graph.h" #include "test_graph.hpp" +#include "common_test_utils/data_utils.hpp" #include "single_layer_common.hpp" -#include #include "tests_common.hpp" #include diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/mvn_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/mvn_tests.cpp index 42aa7d5c9cf..ee0643916a8 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/mvn_tests.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/mvn_tests.cpp @@ -2,30 +2,23 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" #include "single_layer_common.hpp" -#include #include "tests_common.hpp" #include "ir_gen_helper.hpp" #include #include -#include +#include using namespace InferenceEngine; using namespace ::testing; using namespace std; -using namespace mkldnn; using namespace single_layer_tests; using namespace Extensions; using namespace ::Cpu; -using namespace mkldnn::impl; struct mvn_test_params { vector dims; @@ -416,7 +409,7 @@ public: InferenceEngine::Precision precision = data_desc.getPrecision(); Layout layout; if (is_blocked) { - int blk_size = cpu::mayiuse(cpu::avx512_common) ? 16 : 8; + int blk_size = InferenceEngine::with_cpu_x86_avx512f() ? 16 : 8; std::vector blocks = data_dims; std::vector order(blocks.size()); @@ -446,8 +439,6 @@ public: } }; -REG_FACTORY_FOR(Cpu::ImplFactory, FakeLayer_MVN); - class MKLDNNCPUExtMVNTests_Blocked: public TestsCommon, public WithParamInterface { std::string layers_t = R"V0G0N( @@ -534,7 +525,16 @@ protected: ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr())); MKLDNNGraphTestClass graph; - graph.CreateGraph(network); + auto manager = std::make_shared(); + { + auto defaultExt = std::make_shared(); + defaultExt->AddExt("FakeLayer_MVN", + [](const CNNLayer* layer) -> InferenceEngine::ILayerImplFactory* { + return new Cpu::ImplFactory(layer); + }); + manager->AddExtension(defaultExt); + } + graph.CreateGraph(network, manager); auto& nodes = graph.getNodes(); nodes = graph.getNodes(); diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/non_max_suppression_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/non_max_suppression_tests.cpp index 9643fa814fb..063033cca71 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/non_max_suppression_tests.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/non_max_suppression_tests.cpp @@ -2,14 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" #include "single_layer_common.hpp" -#include #include "tests_common.hpp" #include diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/normalize_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/normalize_tests.cpp index de0caf8c198..f8063517593 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/normalize_tests.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/normalize_tests.cpp @@ -2,32 +2,26 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include "common_test_utils/data_utils.hpp" -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" + +#include "common_test_utils/data_utils.hpp" #include "ir_gen_helper.hpp" #include "single_layer_common.hpp" -#include #include "tests_common.hpp" #include #include -#include +#include using namespace InferenceEngine; using namespace ::testing; using namespace std; -using namespace mkldnn; using namespace single_layer_tests; using namespace Extensions; using namespace ::Cpu; -using namespace mkldnn::impl; struct normalize_test_params { struct { @@ -367,7 +361,7 @@ public: InferenceEngine::Precision precision = data_desc.getPrecision(); Layout layout; if (is_blocked) { - int blk_size = cpu::mayiuse(cpu::avx512_common) ? 16 : 8; + int blk_size = InferenceEngine::with_cpu_x86_avx512f() ? 16 : 8; std::vector blocks = data_dims; std::vector order(blocks.size()); @@ -397,8 +391,6 @@ public: } }; -REG_FACTORY_FOR(Cpu::ImplFactory, FakeLayer_Normalize); - class MKLDNNCPUExtNormalizeTests_Blocked: public TestsCommon, public WithParamInterface { std::string model_t = R"V0G0N( @@ -516,7 +508,16 @@ protected: ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr)); MKLDNNGraphTestClass graph; - graph.CreateGraph(network); + auto manager = std::make_shared(); + { + auto defaultExt = std::make_shared(); + defaultExt->AddExt("FakeLayer_Normalize", + [](const CNNLayer* layer) -> InferenceEngine::ILayerImplFactory* { + return new Cpu::ImplFactory(layer); + }); + manager->AddExtension(defaultExt); + } + graph.CreateGraph(network, manager); auto& nodes = graph.getNodes(); nodes = graph.getNodes(); diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/onehot_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/onehot_tests.cpp index 9c22a2afed8..3dca9308d33 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/onehot_tests.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/onehot_tests.cpp @@ -2,14 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" #include "single_layer_common.hpp" -#include #include "tests_common.hpp" #include "single_layer_common.hpp" diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/range_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/range_tests.cpp index 0a55c5388b4..8faef838f06 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/range_tests.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/range_tests.cpp @@ -2,14 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" #include "single_layer_common.hpp" -#include #include "tests_common.hpp" #include diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/reduce_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/reduce_tests.cpp index c172334d431..121294e42ab 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/reduce_tests.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/reduce_tests.cpp @@ -2,14 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" #include "single_layer_common.hpp" -#include #include "tests_common.hpp" #include diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/resample_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/resample_tests.cpp index def0f7dfe51..e46a14b7f31 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/resample_tests.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/resample_tests.cpp @@ -2,29 +2,22 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" #include "single_layer_common.hpp" -#include #include "tests_common.hpp" #include "ir_gen_helper.hpp" #include #include -#include using namespace InferenceEngine; using namespace ::testing; using namespace std; -using namespace mkldnn; using namespace single_layer_tests; using namespace Extensions; using namespace ::Cpu; -using namespace mkldnn::impl; struct resample_test_params { std::vector in_dims; diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/reverse_sequence_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/reverse_sequence_tests.cpp index 926d8f2eb96..4219168c096 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/reverse_sequence_tests.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/reverse_sequence_tests.cpp @@ -2,14 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" #include "single_layer_common.hpp" -#include #include "tests_common.hpp" #include diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/scatter_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/scatter_tests.cpp index 1ab0823779c..5a5413853a8 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/scatter_tests.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/scatter_tests.cpp @@ -2,14 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" #include "single_layer_common.hpp" -#include #include "tests_common.hpp" #include diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/select_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/select_tests.cpp index a154be8b9c3..16638f05d45 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/select_tests.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/select_tests.cpp @@ -2,14 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" #include "single_layer_common.hpp" -#include #include "tests_common.hpp" #include diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/shuffle_channels_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/shuffle_channels_tests.cpp index ce4a30c6e97..16a38d61ef6 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/shuffle_channels_tests.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/shuffle_channels_tests.cpp @@ -2,14 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" #include "single_layer_common.hpp" -#include #include "tests_common.hpp" #include diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_fill_empty_rows_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_fill_empty_rows_tests.cpp index 7510958a262..33400465e0b 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_fill_empty_rows_tests.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_fill_empty_rows_tests.cpp @@ -2,14 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" #include "single_layer_common.hpp" -#include #include "tests_common.hpp" #include diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_segment_reduce_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_segment_reduce_tests.cpp index adde6ece6f2..b545bd79064 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_segment_reduce_tests.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_segment_reduce_tests.cpp @@ -2,14 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" #include "single_layer_common.hpp" -#include #include "tests_common.hpp" #include diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_to_dense_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_to_dense_tests.cpp index cb820b4728c..755d28d33f3 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_to_dense_tests.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_to_dense_tests.cpp @@ -2,14 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" #include "single_layer_common.hpp" -#include #include "tests_common.hpp" #include diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_weighted_reduce_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_weighted_reduce_tests.cpp index 3da2b0cbcfe..9516b9d6200 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_weighted_reduce_tests.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/sparse_weighted_reduce_tests.cpp @@ -2,14 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" #include "single_layer_common.hpp" -#include #include "tests_common.hpp" #include diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/strided_slice_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/strided_slice_tests.cpp index 0a3e12112c9..f1e231b24e1 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/strided_slice_tests.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/strided_slice_tests.cpp @@ -2,14 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" #include "single_layer_common.hpp" -#include #include "tests_common.hpp" #include diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/topk_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/topk_tests.cpp index 75a53e7e094..2e89f20de76 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/topk_tests.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/topk_tests.cpp @@ -2,14 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" #include "single_layer_common.hpp" -#include #include "tests_common.hpp" #include diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/unique_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/unique_tests.cpp index 962af31d239..962515cf636 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/unique_tests.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/unique_tests.cpp @@ -2,14 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" #include "single_layer_common.hpp" -#include #include "tests_common.hpp" #include diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_activation_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_activation_test.cpp index 2a235b71ad9..a9c1db717d3 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_activation_test.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_activation_test.cpp @@ -2,13 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "mkldnn_graph.h" #include "test_graph.hpp" #include "single_layer_common.hpp" -#include -#include #include "tests_common.hpp" #include #include diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_batchnorm_scaleshift_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_batchnorm_scaleshift_test.cpp index b7edba06ff6..8f409cd85c5 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_batchnorm_scaleshift_test.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_batchnorm_scaleshift_test.cpp @@ -2,11 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" #include "single_layer_common.hpp" diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_batchnorm_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_batchnorm_test.cpp index 2faade07eac..750f952f760 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_batchnorm_test.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_batchnorm_test.cpp @@ -2,15 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" #include "single_layer_common.hpp" -#include -#include #include #include #include "tests_common.hpp" diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_concat_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_concat_test.cpp index 2e5d388b414..d44cc2d0dd6 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_concat_test.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_concat_test.cpp @@ -2,14 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" #include "single_layer_common.hpp" -#include #include #include #include diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_conv_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_conv_test.cpp index b81239450c7..3102143798c 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_conv_test.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_conv_test.cpp @@ -2,21 +2,13 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" #include "single_layer_common.hpp" -#include #include #include "tests_common.hpp" #include - -#define XBYAK_NO_OP_NAMES -#define XBYAK_UNDEF_JNL -#include "../../../../../../../thirdparty/mkl-dnn/src/cpu/xbyak/xbyak_util.h" +#include using namespace InferenceEngine; using namespace ::testing; @@ -278,11 +270,8 @@ protected: p.comp.at(j)(node->getSupportedPrimitiveDescriptors().at(j)); } ASSERT_NE(nullptr, node->getSelectedPrimitiveDescriptor()); - Xbyak::util::Cpu cpu; - if (cpu.has(Xbyak::util::Cpu::tAVX512F) - && cpu.has(Xbyak::util::Cpu::tAVX512BW) - && cpu.has(Xbyak::util::Cpu::tAVX512VL) - && cpu.has(Xbyak::util::Cpu::tAVX512DQ) + if (InferenceEngine::with_cpu_x86_avx512f() && + InferenceEngine::with_cpu_x86_avx512_core() && !p.preferTypes.empty() && p.preferTypes[0] == MKLDNNPlugin::impl_desc_type::jit_avx512_winograd) { isWino = true; diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_crop_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_crop_test.cpp index d06993d0060..60d6b1b02cd 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_crop_test.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_crop_test.cpp @@ -2,15 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" #include "single_layer_common.hpp" -#include -#include #include "tests_common.hpp" #include diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_deconv_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_deconv_test.cpp index d85b94c13a7..738f0068a08 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_deconv_test.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_deconv_test.cpp @@ -2,15 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" #include "single_layer_common.hpp" -#include -#include #include "ir_gen_helper.hpp" #include "tests_common.hpp" diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_depthwise_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_depthwise_test.cpp index 2c115859312..57170941a48 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_depthwise_test.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_depthwise_test.cpp @@ -2,13 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "mkldnn_graph.h" #include "test_graph.hpp" #include "single_layer_common.hpp" -#include -#include #include "tests_common.hpp" #include #include diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_eltwise_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_eltwise_test.cpp index 4e4dfbf927d..1c311cb3d06 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_eltwise_test.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_eltwise_test.cpp @@ -6,12 +6,10 @@ #define NOMINMAX #endif -#include -#include "common_test_utils/data_utils.hpp" -#include "mkldnn_graph.h" - #include "test_graph.hpp" +#include +#include "common_test_utils/data_utils.hpp" #include "single_layer_common.hpp" #include #include diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_fullyconnected_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_fullyconnected_test.cpp index 5c4da1c7f5b..b940a2694bb 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_fullyconnected_test.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_fullyconnected_test.cpp @@ -2,15 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" #include "single_layer_common.hpp" -#include -#include #include "tests_common.hpp" #include diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_gemm_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_gemm_test.cpp index 53b9c05d011..de920b21353 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_gemm_test.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_gemm_test.cpp @@ -2,15 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" #include "single_layer_common.hpp" -#include -#include #include "tests_common.hpp" #include #include diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_input_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_input_test.cpp index 27ea6569ad3..ee69f83594a 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_input_test.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_input_test.cpp @@ -2,13 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" -#include #include "tests_common.hpp" #include diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_leaks_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_leaks_test.cpp index 169b27de475..17838b3447d 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_leaks_test.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_leaks_test.cpp @@ -2,15 +2,11 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include -#include "mkldnn_graph.h" -#include "mkldnn_exec_network.h" -#include - #include "test_graph.hpp" +#include +#include "mkldnn_exec_network.h" +#include #include #include diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_lrn_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_lrn_test.cpp index b6a65d6895b..3ecbb67598b 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_lrn_test.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_lrn_test.cpp @@ -2,15 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" #include "single_layer_common.hpp" -#include -#include #include "tests_common.hpp" #include #include diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_permute_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_permute_test.cpp index 577222f1326..21d588768ec 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_permute_test.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_permute_test.cpp @@ -2,15 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" #include "single_layer_common.hpp" -#include -#include #include "tests_common.hpp" #include @@ -97,8 +91,6 @@ public: } }; -REG_FACTORY_FOR(Cpu::ImplFactory, FakeLayer_permute); - static std::string precToStr (Precision prec) { return prec == Precision::I8 ? "I8" : "FP32"; } @@ -260,7 +252,16 @@ protected: ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr())); MKLDNNGraphTestClass graph; - graph.CreateGraph(network); + auto manager = std::make_shared(); + { + auto defaultExt = std::make_shared(); + defaultExt->AddExt("FakeLayer_permute", + [](const CNNLayer* layer) -> InferenceEngine::ILayerImplFactory* { + return new Cpu::ImplFactory(layer); + }); + manager->AddExtension(defaultExt); + } + graph.CreateGraph(network, manager); auto& nodes = graph.getNodes(); for (int i = 0; i < nodes.size(); i++) { if (nodes[i]->getType() == MKLDNNPlugin::Permute) { @@ -556,9 +557,18 @@ protected: InferenceEngine::StatusCode sts = implNet->setBatchSizeReshape(MB, &resp); ASSERT_EQ((int)InferenceEngine::StatusCode::OK, sts) << resp.msg; + auto manager = std::make_shared(); + { + auto defaultExt = std::make_shared(); + defaultExt->AddExt("FakeLayer_permute", + [](const CNNLayer* layer) -> InferenceEngine::ILayerImplFactory* { + return new Cpu::ImplFactory(layer); + }); + manager->AddExtension(defaultExt); + } MKLDNNGraphTestClass graph; graph.setProperty({{InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_ENABLED, InferenceEngine::PluginConfigParams::YES}}); - graph.CreateGraph(network); + graph.CreateGraph(network, manager); InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob({InferenceEngine::Precision::FP32, p.dims, InferenceEngine::TensorDesc::getLayoutByDims(p.dims)}); src->allocate(); diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_pooling_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_pooling_test.cpp index a9ccb11b1b7..062fc1dd6e2 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_pooling_test.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_pooling_test.cpp @@ -6,18 +6,11 @@ #define NOMINMAX #endif -#include - -#include -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" +#include #include "single_layer_common.hpp" #include -#include -#include #include "tests_common.hpp" #include "ir_gen_helper.hpp" #include diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_power_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_power_test.cpp index f382a2e33f0..7c1a92b85fb 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_power_test.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_power_test.cpp @@ -2,15 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" #include "single_layer_common.hpp" -#include -#include #include "tests_common.hpp" #include diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_relu_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_relu_test.cpp index a234a2de906..4fcea93cc4a 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_relu_test.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_relu_test.cpp @@ -2,14 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" #include "single_layer_common.hpp" -#include #include "tests_common.hpp" #include diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_reorder_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_reorder_test.cpp index d7c1139fe49..f47d245540d 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_reorder_test.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_reorder_test.cpp @@ -2,12 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" -#include #include "tests_common.hpp" #include diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_reshape_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_reshape_test.cpp index 7ffadd25a21..ea2695ce71a 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_reshape_test.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_reshape_test.cpp @@ -2,14 +2,10 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "mkldnn_graph.h" -#include "single_layer_common.hpp" #include "test_graph.hpp" -#include +#include "single_layer_common.hpp" #include "tests_common.hpp" #include diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_roi_pooling_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_roi_pooling_test.cpp index 01ef525daf9..b1b87db2cb5 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_roi_pooling_test.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_roi_pooling_test.cpp @@ -2,14 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" #include "single_layer_common.hpp" -#include #include "tests_common.hpp" #include #include diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_simplernms_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_simplernms_test.cpp index e7ea4f2cc5b..c1860f09b13 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_simplernms_test.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_simplernms_test.cpp @@ -2,15 +2,10 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" #include #include "single_layer_common.hpp" -#include #include "tests_common.hpp" using namespace ::testing; diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_softmax_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_softmax_test.cpp index af696c07262..3d80459220f 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_softmax_test.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_softmax_test.cpp @@ -2,15 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" #include "single_layer_common.hpp" -#include -#include #include "tests_common.hpp" #include diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_split_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_split_test.cpp index f1e4138d0ca..237a70d6a0f 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_split_test.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_split_test.cpp @@ -2,15 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" #include "single_layer_common.hpp" -#include -#include #include "tests_common.hpp" #include diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_tile_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_tile_test.cpp index 4fb88dc74a8..cf856581879 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_tile_test.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_tile_test.cpp @@ -2,15 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" #include "single_layer_common.hpp" -#include -#include #include "tests_common.hpp" #include #include diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_conv_concat_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_conv_concat_tests.cpp index 73f02e71ecc..c1c4c87820a 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_conv_concat_tests.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_conv_concat_tests.cpp @@ -2,13 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" #include "single_layer_common.hpp" -#include #include "tests_common.hpp" #include "ir_gen_helper.hpp" #include diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_conv_depthwise_fusing_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_conv_depthwise_fusing_test.cpp index 7dddde6ed34..3752e32c5a3 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_conv_depthwise_fusing_test.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_conv_depthwise_fusing_test.cpp @@ -2,14 +2,10 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "common_test_utils/data_utils.hpp" -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" +#include "common_test_utils/data_utils.hpp" #include "single_layer_common.hpp" -#include #include "tests_common.hpp" #include diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_deconv_concat_tests.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_deconv_concat_tests.cpp index 8768b78756b..6c500cd419f 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_deconv_concat_tests.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_deconv_concat_tests.cpp @@ -2,14 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" #include "single_layer_common.hpp" -#include #include "tests_common.hpp" #include "ir_gen_helper.hpp" #include diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_dw_conv_fusing_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_dw_conv_fusing_test.cpp index 5a81b3e7aa8..e96cb0af41f 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_dw_conv_fusing_test.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_dw_conv_fusing_test.cpp @@ -2,14 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "mkldnn_graph.h" - #include "test_graph.hpp" #include "single_layer_common.hpp" -#include #include "tests_common.hpp" #include #include diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_optimization_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_optimization_test.cpp index e6764badb30..2f14f18770a 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_optimization_test.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_optimization_test.cpp @@ -2,16 +2,12 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include -#include "mkldnn_graph.h" +#include "../test_graph.hpp" #include "single_layer_common.hpp" -#include #include #include "tests_common.hpp" #include -#include "../test_graph.hpp" using namespace ::testing; diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_structure_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_structure_test.cpp index 598a4f2a62f..e5d2c3bc287 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_structure_test.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_structure_test.cpp @@ -2,15 +2,11 @@ // SPDX-License-Identifier: Apache-2.0 // -#include +#include "../test_graph.hpp" #include "mkldnn_exec_network.h" -#include #include "tests_common.hpp" -#include "../test_graph.hpp" -#include #include -#include #include diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/test_graph.hpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/test_graph.hpp index 8c4a8fb2f24..815f82defdf 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/test_graph.hpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/test_graph.hpp @@ -4,8 +4,21 @@ #pragma once -#include +// WA for windows.h +#ifdef _WIN32 +# ifndef NOMINMAX +# define NOMINMAX +# endif +# ifndef _WINSOCKAPI_ +# define _WINSOCKAPI_ +# endif +# ifndef _WINSOCK2API_ +# define _WINSOCK2API_ +# endif +#endif + #include +#include #include #include #include diff --git a/inference-engine/tests_deprecated/unit/engines/vpu/mvnc/watchdog_tests.cpp b/inference-engine/tests_deprecated/unit/engines/vpu/mvnc/watchdog_tests.cpp index 3d331635055..a307c295106 100644 --- a/inference-engine/tests_deprecated/unit/engines/vpu/mvnc/watchdog_tests.cpp +++ b/inference-engine/tests_deprecated/unit/engines/vpu/mvnc/watchdog_tests.cpp @@ -7,136 +7,93 @@ #include #include #include -#include #include using namespace ::testing; using namespace InferenceEngine; +using ms = std::chrono::milliseconds; + class MockWatchdogDevice : public Watchdog::IDevice { public: using time_point = Watchdog::IDevice::time_point; - MOCK_QUALIFIED_METHOD1(setInterval, noexcept, void(const std::chrono::milliseconds)); MOCK_QUALIFIED_METHOD1(keepAlive, noexcept, void(const time_point &)); MOCK_QUALIFIED_METHOD1(dueIn, const noexcept, std::chrono::milliseconds (const time_point ¤t_time)); MOCK_QUALIFIED_METHOD0(isTimeout, const noexcept, bool ()); MOCK_QUALIFIED_METHOD0(getHandle, const noexcept, void* ()); }; -struct wd_context_opaque_private { - void * magic = reinterpret_cast (0xdeadbeaf); - Watchdog::IDevice * actual = nullptr; - bool destroyed = false; -}; - - class MVNCWatchdogTests: public TestsCommon { protected: - devicePrivate_t d; - wd_context ctx, ctx1; + WatchdogHndl_t* m_watchdogHndl = nullptr; + WdDeviceHndl_t deviceHndl, deviceHndl1; StrictMock mockWatchee, mockWatchee1; - wd_context_opaque_private opaque, opaque1; void SetUp() override { - opaque.actual = &mockWatchee; - ctx.opaque = &opaque; + deviceHndl.m_device = &mockWatchee; + deviceHndl1.m_device = &mockWatchee1; - opaque1.actual = &mockWatchee1; - ctx1.opaque = &opaque1; - - pthread_mutex_init(&d.dev_stream_m, nullptr); + ASSERT_EQ(WD_ERRNO, watchdog_create(&m_watchdogHndl)); } + void TearDown() override { - pthread_mutex_destroy(&d.dev_stream_m); + watchdog_destroy(m_watchdogHndl); + } + + void setExpectations(StrictMock& mock){ + EXPECT_CALL(mock, keepAlive(_)).Times(AtLeast(0)); + EXPECT_CALL(mock, dueIn(_)).WillRepeatedly(Return(ms(20000))); + EXPECT_CALL(mock, isTimeout()).WillRepeatedly(Return(false)); + EXPECT_CALL(mock, getHandle()).WillRepeatedly(Return(&mock)); } }; -using ms = std::chrono::milliseconds; TEST_F(MVNCWatchdogTests, canRegisterExternalWatchee) { + setExpectations(mockWatchee); - int handle = 1; - EXPECT_CALL(mockWatchee, getHandle()).WillRepeatedly(Return(&handle)); - // do not expect that any ping happened before we remove the thread - // this can be changed for example registering succeed only if first ping succeed - EXPECT_CALL(mockWatchee, keepAlive(_)).Times(AtLeast(0)); - EXPECT_CALL(mockWatchee, setInterval(ms(1))).Times(1); - EXPECT_CALL(mockWatchee, isTimeout()).WillRepeatedly(Return(false)); - EXPECT_CALL(mockWatchee, dueIn(_)).WillRepeatedly(Return(ms(20000))); - - d.wd_interval = 1; - - ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx, &d)); + ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl, &deviceHndl)); // allowing thread spin std::this_thread::sleep_for(std::chrono::milliseconds(1000)); - ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx)); + + ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl)); } -// TODO: implement logic -TEST_F(MVNCWatchdogTests, DISABLED_removeDeviceIfXLINKSessionNotIninitialized) { - - d.wd_interval = 10; - ASSERT_EQ(WD_ERRNO, watchdog_init_context(&ctx)); - ASSERT_NE(WD_ERRNO, watchdog_register_device(&ctx, &d)); - - std::this_thread::sleep_for(std::chrono::milliseconds(1000)); -} - -#if defined(__APPLE__) && !defined(NDEBUG) -TEST_F(MVNCWatchdogTests, DISABLED_canNotBeRegisteredTwice) { -#else TEST_F(MVNCWatchdogTests, canNotBeRegisteredTwice) { -#endif + setExpectations(mockWatchee); - d.wd_interval = 10; + ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl, &deviceHndl)); + ASSERT_NE(WD_ERRNO, watchdog_register_device(m_watchdogHndl, &deviceHndl)); - ASSERT_EQ(WD_ERRNO, watchdog_init_context(&ctx)); - ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx, &d)); - ASSERT_NE(WD_ERRNO, watchdog_register_device(&ctx, &d)); // allowing thread spin std::this_thread::sleep_for(std::chrono::milliseconds(1000)); - ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx)); + ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl)); } -TEST_F(MVNCWatchdogTests, canUnRegisterNotInitialized) { +TEST_F(MVNCWatchdogTests, canNotUnRegisterNotInitialized) { + EXPECT_CALL(mockWatchee, getHandle()).WillRepeatedly(Return(&mockWatchee)); - ASSERT_EQ(WD_ERRNO, watchdog_init_context(&ctx)); - ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx)); + ASSERT_NE(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl)); } -TEST_F(MVNCWatchdogTests, canUnRegisterIfInterval0) { - - d.wd_interval = 0; - - ASSERT_EQ(WD_ERRNO, watchdog_init_context(&ctx)); - ASSERT_NE(WD_ERRNO, watchdog_register_device(&ctx, &d)); - ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx)); -} - -#if defined(__APPLE__) && !defined(NDEBUG) -TEST_F(MVNCWatchdogTests, DISABLED_failUnRegisterTwice) { -#else TEST_F(MVNCWatchdogTests, failUnRegisterTwice) { -#endif + setExpectations(mockWatchee); - d.wd_interval = 10; + ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl, &deviceHndl)); - ASSERT_EQ(WD_ERRNO, watchdog_init_context(&ctx)); - ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx, &d)); // allowing thread spin std::this_thread::sleep_for(std::chrono::milliseconds(1000)); - ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx)); - ASSERT_NE(WD_ERRNO, watchdog_unregister_device(&ctx)); + + ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl)); + ASSERT_NE(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl)); } TEST_F(MVNCWatchdogTests, canRemoveOneDeviceFromQueueInCaseOfTimeout) { - int handle = 1; int x = 0; int y = 0; int z = 0; - EXPECT_CALL(mockWatchee, getHandle()).WillRepeatedly(Return(&handle)); + EXPECT_CALL(mockWatchee, getHandle()).WillRepeatedly(Return(&mockWatchee)); EXPECT_CALL(mockWatchee, keepAlive(_)).Times(AtLeast(1)); - EXPECT_CALL(mockWatchee, setInterval(ms(10))).Times(1); EXPECT_CALL(mockWatchee, isTimeout()).WillRepeatedly(Invoke([&z, &y]() { // will sleep at least 100 ms and avoid second keep alive call y = 100; @@ -151,9 +108,8 @@ TEST_F(MVNCWatchdogTests, canRemoveOneDeviceFromQueueInCaseOfTimeout) { return std::chrono::milliseconds(y); })); - EXPECT_CALL(mockWatchee1, getHandle()).WillRepeatedly(Return(&handle)); + EXPECT_CALL(mockWatchee1, getHandle()).WillRepeatedly(Return(&mockWatchee1)); EXPECT_CALL(mockWatchee1, keepAlive(_)).Times(AtLeast(2)); - EXPECT_CALL(mockWatchee1, setInterval(ms(10))).Times(1); EXPECT_CALL(mockWatchee1, isTimeout()).WillRepeatedly(Invoke([&x]() { // allow every second time to wait x = x == 0 ? 100 : 0; @@ -163,201 +119,134 @@ TEST_F(MVNCWatchdogTests, canRemoveOneDeviceFromQueueInCaseOfTimeout) { return std::chrono::milliseconds(x); })); - - d.wd_interval = 10; - - ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx, &d)); - ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx1, &d)); + ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl, &deviceHndl)); + ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl, &deviceHndl1)); std::this_thread::sleep_for(ms(1000)); - ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx)); - ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx1)); -} - -TEST_F(MVNCWatchdogTests, canNotStartWatchdogIfIntervalInvalid) { - - opaque.actual = &mockWatchee; - - int handle = 1; - - EXPECT_CALL(mockWatchee, getHandle()).WillRepeatedly(Return(&handle)); - - d.wd_interval = 0; - ASSERT_NE(WD_ERRNO, watchdog_register_device(&ctx, &d)); - - d.wd_interval = -1; - ASSERT_NE(WD_ERRNO, watchdog_register_device(&ctx, &d)); - - // if fo some reason thread started we will get unxpected updatePongInterval calls - std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl)); + ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl1)); } TEST_F(MVNCWatchdogTests, canGetPingsOnRegularBasis) { - - int handle = 1; int x = 0; - EXPECT_CALL(mockWatchee, getHandle()).WillRepeatedly(Return(&handle)); + EXPECT_CALL(mockWatchee, getHandle()).WillRepeatedly(Return(&mockWatchee)); // since interval is small keepAlive can happen several times once EXPECT_CALL(mockWatchee, keepAlive(_)).Times(AtLeast(2)); - EXPECT_CALL(mockWatchee, setInterval(ms(10))).Times(1); EXPECT_CALL(mockWatchee, isTimeout()).WillRepeatedly(Return(false)); EXPECT_CALL(mockWatchee, dueIn(_)).WillRepeatedly(Invoke([&x](const MockWatchdogDevice::time_point ¤t_time){ x = x == 0 ? 100 : 0; return std::chrono::milliseconds(x); })); - - d.wd_interval = 10; - - ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx, &d)); + ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl, &deviceHndl)); std::this_thread::sleep_for(ms(1000)); - ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx)); + ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl)); } TEST_F(MVNCWatchdogTests, canWakeUpWatchdogWhenAddAndRemoveDevice) { - - int handle = 1, handle1 = 2; - - EXPECT_CALL(mockWatchee, getHandle()).WillRepeatedly(Return(&handle)); + EXPECT_CALL(mockWatchee, getHandle()).WillRepeatedly(Return(&mockWatchee)); EXPECT_CALL(mockWatchee, keepAlive(_)).Times(1); - EXPECT_CALL(mockWatchee, setInterval(ms(10))).Times(1); EXPECT_CALL(mockWatchee, isTimeout()).WillRepeatedly(Return(false)); // without wake this will sleep for ever EXPECT_CALL(mockWatchee, dueIn(_)).WillRepeatedly(Return(ms(20000))); - EXPECT_CALL(mockWatchee1, getHandle()).WillRepeatedly(Return(&handle1)); + EXPECT_CALL(mockWatchee1, getHandle()).WillRepeatedly(Return(&mockWatchee1)); EXPECT_CALL(mockWatchee1, keepAlive(_)).Times(1); - EXPECT_CALL(mockWatchee1, setInterval(ms(10))).Times(1); EXPECT_CALL(mockWatchee1, isTimeout()).WillRepeatedly(Return(false)); EXPECT_CALL(mockWatchee1, dueIn(_)).WillRepeatedly(Return(ms(20000))); - - d.wd_interval = 10; - - ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx, &d)); - + ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl, &deviceHndl)); std::this_thread::sleep_for(std::chrono::milliseconds(2000)); - ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx1, &d)); - + ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl, &deviceHndl1)); std::this_thread::sleep_for(std::chrono::milliseconds(2000)); - ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx)); - ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx1)); + ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl)); + ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl1)); } TEST_F(MVNCWatchdogTests, stressWatchDog) { - const int num_watchdog_device = 10; - - watchdog_init_context(nullptr); - StrictMock mockWatchee[num_watchdog_device]; - int handle[num_watchdog_device]; - wd_context ctx[num_watchdog_device]; - wd_context_opaque_private opaque[num_watchdog_device]; + WdDeviceHndl_t deviceHndl[num_watchdog_device]; for (int i = 0; i != num_watchdog_device; i++) { - handle[i] = i; - - EXPECT_CALL(mockWatchee[i], getHandle()).WillRepeatedly(Return(handle + i)); + EXPECT_CALL(mockWatchee[i], getHandle()).WillRepeatedly(Return(&mockWatchee[i])); // since interval is big keepAlive happens only once EXPECT_CALL(mockWatchee[i], keepAlive(_)).Times(1); - EXPECT_CALL(mockWatchee[i], setInterval(ms(10))).Times(1); EXPECT_CALL(mockWatchee[i], isTimeout()).WillRepeatedly(Return(false)); EXPECT_CALL(mockWatchee[i], dueIn(_)).WillRepeatedly(Return(ms(20000))); + + deviceHndl[i].m_device = &mockWatchee[i]; } - d.wd_interval = 10; - for (int k = 0; k != num_watchdog_device; k++) { - opaque[k].actual = &mockWatchee[k]; - ctx[k].opaque = &opaque[k]; - ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx[k], &d)); + ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl, &deviceHndl[k])); } std::this_thread::sleep_for(std::chrono::milliseconds(2000)); for (int k = 0; k != num_watchdog_device; k++) { - ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx[k])); + ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl[k])); } std::this_thread::sleep_for(std::chrono::milliseconds(100)); } TEST_F(MVNCWatchdogTests, stressWatchDog1) { - const int num_watchdog_device = 10; const int num_watchdog_device_half = num_watchdog_device / 2; - watchdog_init_context(nullptr); - StrictMock mockWatchee[num_watchdog_device]; - int handle[num_watchdog_device]; - wd_context ctx[num_watchdog_device]; - wd_context_opaque_private opaque[num_watchdog_device]; + WdDeviceHndl_t deviceHndl[num_watchdog_device]; for (int i = 0; i != num_watchdog_device; i++) { - handle[i] = i; - - EXPECT_CALL(mockWatchee[i], getHandle()).WillRepeatedly(Return(handle + i)); + EXPECT_CALL(mockWatchee[i], getHandle()).WillRepeatedly(Return(&mockWatchee[i])); // since interval is big keepAlive happens only once EXPECT_CALL(mockWatchee[i], keepAlive(_)).Times(1); - EXPECT_CALL(mockWatchee[i], setInterval(ms(10))).Times(1); EXPECT_CALL(mockWatchee[i], isTimeout()).WillRepeatedly(Return(false)); EXPECT_CALL(mockWatchee[i], dueIn(_)).WillRepeatedly(Return(ms(20000))); - } - d.wd_interval = 10; - for (int k = 0; k != num_watchdog_device; k++) { - opaque[k].actual = &mockWatchee[k]; - ctx[k].opaque = &opaque[k]; + deviceHndl[i].m_device = &mockWatchee[i]; } for (int k = 0; k != num_watchdog_device_half; k++) { - ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx[k], &d)); + ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl, &deviceHndl[k])); } std::this_thread::sleep_for(std::chrono::milliseconds(2000)); for (int k = 0; k != num_watchdog_device_half; k++) { - ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx[k + num_watchdog_device_half], &d)); + ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl, &deviceHndl[k + num_watchdog_device_half])); std::this_thread::sleep_for(std::chrono::milliseconds(20)); - ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx[k])); + ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl[k])); std::this_thread::sleep_for(std::chrono::milliseconds(20)); } std::this_thread::sleep_for(std::chrono::milliseconds(2000)); for (int k = 0; k != num_watchdog_device_half; k++) { - ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx[k + num_watchdog_device_half])); + ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl[k + num_watchdog_device_half])); } std::this_thread::sleep_for(std::chrono::milliseconds(100)); } TEST_F(MVNCWatchdogTests, stressWatchDog2) { - const int num_watchdog_device = 30; const int num_watchdog_device_half1 = num_watchdog_device / 3; const int num_watchdog_device_half2 = 2 * num_watchdog_device / 3; - watchdog_init_context(nullptr); - StrictMock mockWatchee[num_watchdog_device]; - int handle[num_watchdog_device]; - wd_context ctx[num_watchdog_device]; - wd_context_opaque_private opaque[num_watchdog_device]; + WdDeviceHndl_t deviceHndl[num_watchdog_device]; for (int i = 0; i != num_watchdog_device; i++) { - handle[i] = i; - - EXPECT_CALL(mockWatchee[i], getHandle()).WillRepeatedly(Return(handle + i)); + EXPECT_CALL(mockWatchee[i], getHandle()).WillRepeatedly(Return(&mockWatchee[i])); // since interval is big keepAlive happens only once if (i >= num_watchdog_device_half2) { @@ -366,41 +255,36 @@ TEST_F(MVNCWatchdogTests, stressWatchDog2) { EXPECT_CALL(mockWatchee[i], keepAlive(_)).Times(1); } - EXPECT_CALL(mockWatchee[i], setInterval(ms(10))).Times(1); EXPECT_CALL(mockWatchee[i], isTimeout()).WillRepeatedly(Return(false)); EXPECT_CALL(mockWatchee[i], dueIn(_)).WillRepeatedly(Return(ms(20000))); - } - d.wd_interval = 10; - for (int k = 0; k != num_watchdog_device; k++) { - opaque[k].actual = &mockWatchee[k]; - ctx[k].opaque = &opaque[k]; + deviceHndl[i].m_device = &mockWatchee[i]; } for (int k = 0; k != num_watchdog_device_half1; k++) { - ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx[k], &d)); + ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl, &deviceHndl[k])); } std::this_thread::sleep_for(std::chrono::milliseconds(2000)); for (int k = 0; k != num_watchdog_device_half1; k++) { - ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx[k])); + ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl[k])); } std::this_thread::sleep_for(std::chrono::milliseconds(2000)); for (int k = num_watchdog_device_half1; k != num_watchdog_device_half2; k++) { - ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx[k], &d)); + ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl, &deviceHndl[k])); //this might lead to UB, for example thread might restart but after that device get removed, so giving more time std::this_thread::sleep_for(std::chrono::milliseconds(1000)); - ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx[k])); + ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl[k])); } for (int k = num_watchdog_device_half2; k != num_watchdog_device; k++) { - ASSERT_EQ(WD_ERRNO, watchdog_register_device(&ctx[k], &d)); + ASSERT_EQ(WD_ERRNO, watchdog_register_device(m_watchdogHndl, &deviceHndl[k])); //this might lead to UB, for example thread might restart but after that device get removed, so giving more time //so our expectations for number of calls are not set for last third - ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(&ctx[k])); + ASSERT_EQ(WD_ERRNO, watchdog_unregister_device(m_watchdogHndl, &deviceHndl[k])); } std::this_thread::sleep_for(std::chrono::milliseconds(3000)); diff --git a/inference-engine/tests_deprecated/unit/engines/vpu/mvnc/xlink_device_tests.cpp b/inference-engine/tests_deprecated/unit/engines/vpu/mvnc/xlink_device_tests.cpp new file mode 100644 index 00000000000..369d291540f --- /dev/null +++ b/inference-engine/tests_deprecated/unit/engines/vpu/mvnc/xlink_device_tests.cpp @@ -0,0 +1,39 @@ +// Copyright (C) 2018-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include +#include +#include + +using namespace ::testing; +using namespace InferenceEngine; + +class XLinkDeviceTests: public TestsCommon {}; +class XLinkDeviceTestsWithParam: public TestsCommon, public testing::WithParamInterface {}; + +TEST_F(XLinkDeviceTests, shouldCreateXlinkDevice) { + devicePrivate_t devicePrivate = {0}; + devicePrivate.wd_interval = 1; + + WdDeviceHndl_t* deviceHndl = nullptr; + ASSERT_EQ(WD_ERRNO, xlink_device_create(&deviceHndl, &devicePrivate)); + + xlink_device_destroy(deviceHndl); +} + +TEST_P(XLinkDeviceTestsWithParam, shouldNotCreateXlinkDeviceWithInvalidInterval) { + devicePrivate_t devicePrivate = {0}; + devicePrivate.wd_interval = GetParam(); + + WdDeviceHndl_t* deviceHndl = nullptr; + ASSERT_NE(WD_ERRNO, xlink_device_create(&deviceHndl, &devicePrivate)); + + xlink_device_destroy(deviceHndl); +} + +INSTANTIATE_TEST_CASE_P(WatchdogDevice, + XLinkDeviceTestsWithParam, + testing::Values(0, -1, -WATCHDOG_MAX_PING_INTERVAL_MS)); diff --git a/inference-engine/tests_deprecated/unit/engines/vpu/myriad_tests/helpers/myriad_mvnc_stub.h b/inference-engine/tests_deprecated/unit/engines/vpu/myriad_tests/helpers/myriad_mvnc_stub.h index 8109a1bc240..2e146fb307f 100644 --- a/inference-engine/tests_deprecated/unit/engines/vpu/myriad_tests/helpers/myriad_mvnc_stub.h +++ b/inference-engine/tests_deprecated/unit/engines/vpu/myriad_tests/helpers/myriad_mvnc_stub.h @@ -20,5 +20,7 @@ public: MOCK_QUALIFIED_METHOD0(AvailableDevicesNames, const, std::vector()); MOCK_QUALIFIED_METHOD0(AvailableDevicesDesc, const, std::vector()); + MOCK_METHOD0(watchdogHndl, WatchdogHndl_t*()); + ~MvncStub() = default; }; diff --git a/inference-engine/thirdparty/CMakeLists.txt b/inference-engine/thirdparty/CMakeLists.txt index 1cb62b21a2c..f94453e060b 100644 --- a/inference-engine/thirdparty/CMakeLists.txt +++ b/inference-engine/thirdparty/CMakeLists.txt @@ -59,11 +59,11 @@ function(build_with_lto) ie_developer_export_targets(pugixml_mt) set_target_properties(pugixml_mt PROPERTIES FOLDER thirdparty) endif() + + if(ENABLE_MKL_DNN) + set(SDL_cmake_included ON) + include(mkldnn.cmake) + endif() endfunction() build_with_lto() - -if(ENABLE_MKL_DNN) - set(SDL_cmake_included ON) - include(mkldnn.cmake) -endif() diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad.cpp new file mode 100644 index 00000000000..13647c7cdf0 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad.cpp @@ -0,0 +1,392 @@ +// Copyright (c) 2020 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "convolution_kernel_b_fs_yx_fsv16_imad.h" +#include "kernel_selector_utils.h" +#include "common_tools.h" +#include +#include +#include +#include + +// +// Kernel specific constants +// +static constexpr size_t fsv = 16; +static constexpr size_t simd = 16; + +static size_t getOutBlock_X(const size_t output_size_x, const size_t stride_x, const size_t filter_size_x, const size_t dilation_x) { + // Calculate number of variables needed to hold minimum input width. + // Equation for input block width: (output_block - 1) * stride + (filter_size - 1) * dilation + 1 + // Result for one output_block gives minimum size of input width. + size_t min_in_block_size = (filter_size_x - 1) * dilation_x + 1; + // Input block is spread across sub-group, so ceil-divide by simd size. + size_t min_in_block_simds = kernel_selector::CeilDiv(min_in_block_size, simd); + + size_t output_block_width = 0; + size_t max_block_size = std::min((min_in_block_simds * simd - 1 - (filter_size_x - 1) * dilation_x) / stride_x + 1, output_size_x); + + if (output_size_x <= max_block_size) + return output_size_x; + + for (size_t block = 4; block <= max_block_size; ++block) { + if (output_size_x % block == 0) + output_block_width = block; + } + if (output_block_width == 0 && output_size_x < max_block_size * 3) { + size_t min_overhang = max_block_size; + for (size_t block = 4; block <= max_block_size; ++block) { + size_t overhang = block - output_size_x % block; + if (overhang <= min_overhang) { + min_overhang = overhang; + output_block_width = block; + } + } + } + + if (output_block_width == 0) { + output_block_width = max_block_size; + } + return output_block_width; +} + +namespace kernel_selector { + +Convolution_kernel_b_fs_yx_fsv16_imad::BlockParams +Convolution_kernel_b_fs_yx_fsv16_imad::GetBlockParams(const convolution_params& params) const { + constexpr float max_reg_pressure = 0.75f; + + // TODO Investigate whether below algorithm for selecting optimal block params could be reduced to: + // 1. Enumerate possible block params as optimization space + // 2. Prune invalid params (too high register pressure, too big local memory usage) + // 3. Rank params according to some combination of: + // - compute/memory ratio + // - occupancy + // - register pressure + // - local memory usage + // 4. Select params with highest rank + + // Select optimal block width + size_t block_width = getOutBlock_X(params.output.X().v, params.stride.x, params.filterSize.x, params.dilation.x); + size_t in_block_width = (block_width - 1) * params.stride.x + (params.filterSize.x - 1) * params.dilation.x + 1; + + // If possible increase features block size + size_t block_features = simd; + { + size_t tmp_block_features = simd * 2; + auto block2_params = BlockParams{ block_width, 1, tmp_block_features, in_block_width, 1, 1 }; + + bool c_mul_f = params.output.Feature().v % tmp_block_features == 0; + bool c_reg_pressure = EstimateRegPressure(params, block2_params) <= max_reg_pressure; + + if (c_mul_f && c_reg_pressure) { + block_features = tmp_block_features; + } + } + + // If not enough occupancy try to perform feature split or/and block reduction + size_t feature_slm_split = 1; + auto no_split_params = BlockParams{ block_width, 1, block_features, in_block_width, 1, 1 }; + if (EstimateOccupancy(params, no_split_params) < 1.f) { + // Temporary variables for possible reductions in block sizes + bool update_block_params = false; + size_t split_block_width = block_width; + size_t split_in_block_width = in_block_width; + size_t split_block_features = block_features; + + // Feature split requires extra registers, so check if it can be done with current block sizes + bool can_split = + EstimateRegPressure(params, BlockParams{ block_width, 1, block_features, in_block_width, 1, 2 }) <= max_reg_pressure; + // Has the occupancy reached sufficient level + bool enough_occupancy = false; + // Reductions to reduce register pressure + // Try to reduce block width to free some registers. Good compute/memory ratio will be pointless if barely any threads will run. + if (!can_split && block_width != 1) { + // At most twice reduction in output block width is acceptable + for (size_t w = block_width; w >= CeilDiv(block_width, 2); w -= 1) { + size_t tmp_in_width = (w - 1) * params.stride.x + (params.filterSize.x - 1) * params.dilation.x + 1; + auto dummy_split_params = BlockParams{ w, 1, block_features, tmp_in_width, 1, 2 }; + + bool c_reg_pressure = EstimateRegPressure(params, dummy_split_params) <= max_reg_pressure; + bool c_mul_x = params.output.X().v % w == 0; + + if (c_reg_pressure && c_mul_x) { + split_block_width = w; + split_in_block_width = tmp_in_width; + can_split = true; + break; + } + } + } + // Try to reduce block features. + // Done after attempting block width reduction, because bigger feature block allows more threads to write results in parallel. + if (!can_split) { + if (block_features / simd % 2 == 0) { + split_block_features = block_features / 2; + can_split = true; + } + } + // Check if previous reductions haven't improved occupancy enough + { + auto reduced_params = BlockParams{ split_block_width, 1, split_block_features, split_in_block_width, 1, 1 }; + enough_occupancy = EstimateOccupancy(params, reduced_params) >= 1.f; + update_block_params = enough_occupancy; + } + + if (can_split && !enough_occupancy) { + // TODO Try other split sizes + for (size_t split = 4; split < 5; ++split) { + auto tmp_params = BlockParams{ block_width, 1, block_features, in_block_width, 1, split }; + + bool c_ifm_mul = CeilDiv(params.weights.IFM().v, fsv) % split == 0; + bool c_slm = EstimateSLMUsage(params, tmp_params) <= 1.f; + bool c_lws = split * simd <= params.engineInfo.maxWorkGroupSize; + bool c_reg_pressure = EstimateRegPressure(params, tmp_params) <= max_reg_pressure; + bool c_occupancy = EstimateOccupancy(params, tmp_params) >= 1.f; + + if (c_ifm_mul && c_slm && c_lws && c_reg_pressure) { + feature_slm_split = split; + update_block_params = true; + enough_occupancy = c_occupancy; + } + + // slm usage and work group sizes will only grow with split, so no point in checking + if (!c_slm || !c_lws || split * fsv >= params.weights.IFM().v) + break; + } + } + // Splitting was not sufficient or couldn't be done + // Try to reduce block width if hasn't been done before + if (!enough_occupancy && split_block_width == block_width && block_width != 1) { + // At most twice reduction in output block width is acceptable + for (size_t w = block_width; w >= CeilDiv(block_width, 2); w -= 1) { + size_t tmp_in_width = (w - 1) * params.stride.x + (params.filterSize.x - 1) * params.dilation.x + 1; + auto tmp_params = BlockParams{ w, 1, split_block_features, tmp_in_width, 1, feature_slm_split }; + + bool c_occupancy = EstimateOccupancy(params, tmp_params) >= 1.f; + bool c_mul_x = params.output.X().v % w == 0; + + if (c_mul_x) { + split_block_width = w; + split_in_block_width = tmp_in_width; + update_block_params = true; + } + // Reached enough occupancy, don't reduce futher to not hurt compute/mem ratio + if (c_mul_x && c_occupancy) + break; + } + } + if (update_block_params) { + block_width = split_block_width; + in_block_width = split_in_block_width; + block_features = split_block_features; + } + } + + // Select biggest block height that fits into registers + size_t block_height = 1; + size_t in_block_height = 1; + for (size_t h = 2; h < 16; ++h) { + if (params.output.Y().v % h != 0) + continue; + + size_t tmp_in_block_height = (h - 1) * params.stride.y + (params.filterSize.y - 1) * params.dilation.y + 1; + auto tmp_params = BlockParams{ block_width, h, block_features, in_block_width, tmp_in_block_height, feature_slm_split }; + + bool c_reg_pressure = EstimateRegPressure(params, tmp_params) <= max_reg_pressure; + bool c_occupancy = EstimateOccupancy(params, tmp_params) >= 1.f; + bool c_slm = EstimateSLMUsage(params, tmp_params) <= 1.f; + + if (c_reg_pressure && c_occupancy && c_slm) { + block_height = h; + in_block_height = tmp_in_block_height; + } else { + break; + } + } + + return BlockParams{ block_width, block_height, block_features, in_block_width, in_block_height, feature_slm_split }; +} + +float Convolution_kernel_b_fs_yx_fsv16_imad::EstimateRegPressure(const convolution_params& params, const BlockParams& block) const { + size_t bytes_used = 0; + // accumulator + size_t accumulator_elements = block.output_block_width * block.output_block_height * block.output_block_features; + bytes_used += accumulator_elements * BytesPerElement(GetAccumulatorType(params)); + // input block + size_t input_block_elements = block.input_block_height * Align(block.input_block_width, simd) * fsv; + bytes_used += input_block_elements * BytesPerElement(params.inputs[0].GetDType()); + // weights block + size_t weights_block_elements = block.output_block_features * fsv; + bytes_used += weights_block_elements * BytesPerElement(params.weights.GetDType()); + + // Experimentally selected number of registers needed for extra variables (eg. out_x, out_y, filter_idx, etc.) + constexpr size_t experimental_extra_regs = 8 * 32; + bytes_used += experimental_extra_regs; + + // Experimentally selected number of registers needed for slm handling + constexpr size_t experimental_slm_regs = 4 * 32; + if (block.feature_slm_split != 1) { + bytes_used += experimental_slm_regs; + } + + constexpr size_t reg_num = 128; + constexpr size_t bytes_per_reg = 32; + constexpr size_t max_reg_bytes = reg_num * bytes_per_reg; + + return static_cast(bytes_used) / static_cast(max_reg_bytes); +} + +float Convolution_kernel_b_fs_yx_fsv16_imad::EstimateOccupancy(const convolution_params& params, const BlockParams& block) const { + size_t blocks_w = CeilDiv(params.output.X().v, block.output_block_width); + size_t blocks_h = CeilDiv(params.output.Y().v, block.output_block_height); + size_t blocks_f = CeilDiv(params.output.Feature().v, block.output_block_features) * block.feature_slm_split; + size_t block_b = params.output.Batch().v; + + auto threads = blocks_w * blocks_h * blocks_f * block_b; + constexpr size_t max_threads_per_cu = 7; + size_t compute_units = params.engineInfo.computeUnitsCount; + size_t max_threads = compute_units * max_threads_per_cu; + + return static_cast(threads) / static_cast(max_threads); +} + +float Convolution_kernel_b_fs_yx_fsv16_imad::EstimateSLMUsage(const convolution_params& params, const BlockParams& block) const { + size_t slm_elements = block.output_block_width * block.output_block_height * block.output_block_features * (block.feature_slm_split - 1); + size_t slm_bytes = slm_elements * BytesPerElement(GetAccumulatorType(params)); + + // TODO Actual maximum slm should also depend on number of work-groups, but this is device specific + size_t max_slm_bytes = params.engineInfo.maxLocalMemSize; + + return static_cast(slm_bytes) / static_cast(max_slm_bytes); +} + +ParamsKey Convolution_kernel_b_fs_yx_fsv16_imad::GetSupportedKey() const { + ParamsKey k; + k.EnableInputDataType(Datatype::INT8); + k.EnableInputDataType(Datatype::UINT8); + + k.EnableOutputDataType(Datatype::INT8); + k.EnableOutputDataType(Datatype::UINT8); + k.EnableOutputDataType(Datatype::F32); + k.EnableOutputDataType(Datatype::F16); + + k.EnableInputWeightsType(WeightsType::INT8); + + k.EnableInputLayout(DataLayout::b_fs_yx_fsv16); + k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16); + + k.EnableDifferentTypes(); + k.EnableDifferentInputWeightsTypes(); + k.EnableTensorOffset(); + k.EnableTensorPitches(); + k.EnableBiasPerFeature(); + k.EnableNonBiasTerm(); + k.EnableBatching(); + k.EnableQuantization(QuantizationType::SYMMETRIC); + k.EnableDilation(); + k.DisableTuning(); + return k; +} + +KernelsData Convolution_kernel_b_fs_yx_fsv16_imad::GetKernelsData(const Params& params, + const optional_params& options) const { + return GetCommonKernelsData(params, options); +} + +JitConstants Convolution_kernel_b_fs_yx_fsv16_imad::GetJitConstants(const convolution_params& params, + const DispatchData& kd) const { + auto mem_consts = Parent::GetJitConstants(params, kd); + + auto block_params = GetBlockParams(params); + + bool unroll_filter_y = block_params.output_block_height != 1; + + mem_consts.AddConstant(MakeJitConstant("OUT_BLOCK_WIDTH", block_params.output_block_width)); + mem_consts.AddConstant(MakeJitConstant("IN_BLOCK_WIDTH", block_params.input_block_width)); + mem_consts.AddConstant(MakeJitConstant("OUT_BLOCK_HEIGHT", block_params.output_block_height)); + mem_consts.AddConstant(MakeJitConstant("IN_BLOCK_HEIGHT", block_params.input_block_height)); + mem_consts.AddConstant(MakeJitConstant("FILTER_SIZE_Y_UNROLL", unroll_filter_y ? params.filterSize.y : 1)); + mem_consts.AddConstant(MakeJitConstant("OFM_BLOCKS_PER_SIMD", block_params.output_block_features / simd)); + mem_consts.AddConstant(MakeJitConstant("OFM_SIZE_PER_SIMD", block_params.output_block_features)); + mem_consts.AddConstant(MakeJitConstant("FEATURE_SLM_SPLIT", block_params.feature_slm_split)); + mem_consts.Merge(MakeTypeJitConstants(GetAccumulatorType(params), "ACCUMULATOR")); + mem_consts.Merge(MakeTypeJitConstants(GetActivationType(params), "ACTIVATION")); + + if (!params.fused_ops.empty()) { + auto input_dt = GetActivationType(params); + std::vector idx_order = { "out_b", "(out_f + ofb * 16)", "(out_y + oh)", "(out_x + ow)" }; + std::vector loop_axes = { Tensor::DataChannelName::X }; + if (block_params.output_block_height != 1) { + loop_axes.push_back(Tensor::DataChannelName::Y); + } else { + idx_order[idx_order.size() - 2] = "out_y"; + } + + FusedOpsConfiguration conf_scalar = { "_SCALAR", + idx_order, + "dequantized_val", + input_dt, + 1, + LoadType::LT_UNALIGNED, + BoundaryCheck::DISABLED }; + conf_scalar.SetLoopAxes(loop_axes, true); + + mem_consts.Merge(MakeFusedOpsJitConstants(params, {conf_scalar})); + } + + return mem_consts; +} // GetJitConstants + +ConvolutionKernelBase::DispatchData Convolution_kernel_b_fs_yx_fsv16_imad::SetDefault(const convolution_params& params, + int) const { + DispatchData kd; + const auto& output = params.output; + auto block_params = GetBlockParams(params); + + kd.gws0 = CeilDiv(output.X().v, block_params.output_block_width); + kd.gws1 = CeilDiv(output.Y().v, block_params.output_block_height); + kd.gws2 = output.Batch().v * CeilDiv(output.Feature().v, block_params.output_block_features) * simd * block_params.feature_slm_split; + + kd.lws0 = 1; + kd.lws1 = 1; + kd.lws2 = simd * block_params.feature_slm_split; + + kd.cldnnStyle = {0, 0, 0, 0, 0}; + kd.gemmStyle = {0, 0, 0, 0, 0, 0}; + + kd.efficiency = FORCE_PRIORITY_2; + // TODO Optimize 1x1, because this kernel is better in most cases + //if (params.filterSize.x == 1 && params.filterSize.y == 1) + // kd.efficiency = FORCE_PRIORITY_1; + if (static_cast(params.weights.IFM().v) / static_cast(Align(params.weights.IFM().v, fsv)) < 0.5f) + kd.efficiency = FORCE_PRIORITY_4; + + return kd; +} // SetDefault + +bool Convolution_kernel_b_fs_yx_fsv16_imad::Validate(const Params& params, const optional_params& options) const { + if (!Parent::Validate(params, options)) { + return false; + } + + KernelData kd = KernelData::Default(params); + convolution_params& newParams = *static_cast(kd.params.get()); + + if (newParams.groups != 1 || newParams.split != 1) + return false; + + return true; +} +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad.h similarity index 66% rename from inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks.h rename to inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad.h index d3dfeaf147b..dc3950ca128 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad.h @@ -21,11 +21,11 @@ namespace kernel_selector { -class Convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks : public ConvolutionKernelBase { +class Convolution_kernel_b_fs_yx_fsv16_imad : public ConvolutionKernelBase { public: using Parent = ConvolutionKernelBase; - Convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks() : ConvolutionKernelBase("convolution_gpu_b_fs_yx_fsv16_imad_3x3_ks") {} - virtual ~Convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks() {} + Convolution_kernel_b_fs_yx_fsv16_imad() : ConvolutionKernelBase("convolution_gpu_b_fs_yx_fsv16_imad") {} + virtual ~Convolution_kernel_b_fs_yx_fsv16_imad() {} KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; ParamsKey GetSupportedKey() const override; @@ -45,5 +45,21 @@ protected: FusedOpType::SCALE, FusedOpType::ACTIVATION }; } + + struct BlockParams { + size_t output_block_width; + size_t output_block_height; + size_t output_block_features; + + size_t input_block_width; + size_t input_block_height; + + size_t feature_slm_split; + }; + + BlockParams GetBlockParams(const convolution_params& params) const; + float EstimateRegPressure(const convolution_params& params, const BlockParams& block) const; + float EstimateOccupancy(const convolution_params& params, const BlockParams& block) const; + float EstimateSLMUsage(const convolution_params& params, const BlockParams& block) const; }; } // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_3x3.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_3x3.cpp deleted file mode 100644 index a1aef4f61d7..00000000000 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_3x3.cpp +++ /dev/null @@ -1,165 +0,0 @@ -// Copyright (c) 2020 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - - -#include "convolution_kernel_b_fs_yx_fsv16_imad_3x3.h" -#include "kernel_selector_utils.h" -#include "common_tools.h" -#include -#include - -// -// Kernel specific constants -// -#define SIMD_SIZE 16 - -static size_t getOutBlock_X(const size_t output_size_x, const size_t stride_x, const size_t filter_size_x) { - size_t output_block_width = 0; - size_t max_block_size = std::min((SIMD_SIZE - filter_size_x) / stride_x + 1, output_size_x); - - if (output_size_x <= max_block_size) - return output_size_x; - - for (size_t block = 4; block <= max_block_size; ++block) { - if (output_size_x % block == 0) - output_block_width = block; - } - if (output_block_width == 0 && output_size_x < max_block_size * 3) { - size_t min_overhang = max_block_size; - for (size_t block = 4; block <= max_block_size; ++block) { - size_t overhang = block - output_size_x % block; - if (overhang <= min_overhang) { - min_overhang = overhang; - output_block_width = block; - } - } - } - - if (output_block_width == 0) { - output_block_width = max_block_size; - } - return output_block_width; -} - -static size_t get_ofm_per_wi(const size_t output_size_f) { - if (output_size_f % 32 == 0) - return 2; - return 1; -} - -namespace kernel_selector { - -ParamsKey Convolution_kernel_b_fs_yx_fsv16_imad_3x3::GetSupportedKey() const { - ParamsKey k; - k.EnableInputDataType(Datatype::INT8); - k.EnableInputDataType(Datatype::UINT8); - - k.EnableOutputDataType(Datatype::INT8); - k.EnableOutputDataType(Datatype::UINT8); - k.EnableOutputDataType(Datatype::F32); - k.EnableOutputDataType(Datatype::F16); - - k.EnableInputWeightsType(WeightsType::INT8); - - k.EnableInputLayout(DataLayout::b_fs_yx_fsv16); - k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16); - - k.EnableDifferentTypes(); - k.EnableDifferentInputWeightsTypes(); - k.EnableTensorOffset(); - k.EnableTensorPitches(); - k.EnableBiasPerFeature(); - k.EnableNonBiasTerm(); - k.EnableBatching(); - k.EnableQuantization(QuantizationType::SYMMETRIC); - k.DisableTuning(); - return k; -} - -KernelsData Convolution_kernel_b_fs_yx_fsv16_imad_3x3::GetKernelsData(const Params& params, - const optional_params& options) const { - return GetCommonKernelsData(params, options); -} - -JitConstants Convolution_kernel_b_fs_yx_fsv16_imad_3x3::GetJitConstants(const convolution_params& params, - const DispatchData& kd) const { - auto mem_consts = Parent::GetJitConstants(params, kd); - const auto& output = params.output; - - mem_consts.AddConstant(MakeJitConstant("OUT_BLOCK_WIDTH", getOutBlock_X(output.X().v, params.stride.x, params.filterSize.x))); - mem_consts.AddConstant(MakeJitConstant("OFM_BLOCKS_PER_SIMD", get_ofm_per_wi(output.Feature().v))); - mem_consts.AddConstant(MakeJitConstant("OFM_SIZE_PER_SIMD", SIMD_SIZE * get_ofm_per_wi(output.Feature().v))); - - if (!params.fused_ops.empty()) { - auto input_dt = GetActivationType(params); - FusedOpsConfiguration conf_scalar = {"", {"out_b", "out_f + j * 16", "out_y", "out_x + i"}, "dequantized", input_dt, 1}; - conf_scalar.SetLoopAxes({ Tensor::DataChannelName::X }, true); - mem_consts.Merge(MakeFusedOpsJitConstants(params, {conf_scalar})); - } - - return mem_consts; -} // GetJitConstants - -ConvolutionKernelBase::DispatchData Convolution_kernel_b_fs_yx_fsv16_imad_3x3::SetDefault(const convolution_params& params, - int) const { - DispatchData kd; - const auto& output = params.output; - auto output_block_width = getOutBlock_X(output.X().v, params.stride.x, params.filterSize.x); - auto ofm_blocks_per_simd = get_ofm_per_wi(output.Feature().v); - - kd.gws0 = CeilDiv(output.X().v, output_block_width); - kd.gws1 = output.Y().v; - kd.gws2 = output.Batch().v * Align(output.Feature().v / ofm_blocks_per_simd, SIMD_SIZE); - - kd.lws0 = 1; - kd.lws1 = 1; - kd.lws2 = SIMD_SIZE; - - kd.cldnnStyle = {0, 0, 0, 0, 0}; - kd.gemmStyle = {0, 0, 0, 0, 0, 0}; - - if (params.filterSize.x == 3) - kd.efficiency = FORCE_PRIORITY_2; - else - kd.efficiency = FORCE_PRIORITY_5; - - return kd; -} // SetDefault - -bool Convolution_kernel_b_fs_yx_fsv16_imad_3x3::Validate(const Params& params, const optional_params& options) const { - if (!Parent::Validate(params, options)) { - return false; - } - - KernelData kd = KernelData::Default(params); - convolution_params& newParams = *static_cast(kd.params.get()); - - if ((newParams.filterSize.x != newParams.filterSize.y) || - (newParams.filterSize.x != 3 && newParams.filterSize.x != 5)) { - // Fitler size needs to be 3x3 or 5x5 - return false; - } - - if ((newParams.stride.x != newParams.stride.y) || - (newParams.stride.x != 1 && newParams.stride.x != 2)) { - // Strides must be 1x1 or 2x2 - return false; - } - - if (newParams.groups != 1 || newParams.split != 1) - return false; - - return true; -} -} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_3x3.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_3x3.h deleted file mode 100644 index e69a798e4e6..00000000000 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_3x3.h +++ /dev/null @@ -1,49 +0,0 @@ -/* -// Copyright (c) 2020 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -*/ - -#pragma once - -#include "convolution_kernel_base.h" -#include - -namespace kernel_selector { - -class Convolution_kernel_b_fs_yx_fsv16_imad_3x3 : public ConvolutionKernelBase { -public: - using Parent = ConvolutionKernelBase; - Convolution_kernel_b_fs_yx_fsv16_imad_3x3() : ConvolutionKernelBase("convolution_gpu_b_fs_yx_fsv16_imad_3x3") {} - virtual ~Convolution_kernel_b_fs_yx_fsv16_imad_3x3() {} - - KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; - ParamsKey GetSupportedKey() const override; - -protected: - bool Validate(const Params& params, const optional_params& options) const override; - JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override; - DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const override; - bool NeedPaddedInput() const override { return true; } - WeightsLayout GetPreferredWeightsLayout(const convolution_params&) const override { - return WeightsLayout::os_is_yx_osv16_isv16; - } - - std::vector GetSupportedFusedOps() const override { - return { FusedOpType::ELTWISE, - FusedOpType::QUANTIZE, - FusedOpType::SCALE, - FusedOpType::ACTIVATION }; - } -}; -} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks.cpp deleted file mode 100644 index e62348c3d29..00000000000 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks.cpp +++ /dev/null @@ -1,158 +0,0 @@ -// Copyright (c) 2020 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks.h" -#include "kernel_selector_utils.h" -#include "common_tools.h" -#include -#include - -// -// Kernel specific constants -// -#define SIMD_SIZE 16 - -static size_t getOutBlock_X(size_t output_size_x) { - auto output_block_width = 7; - if (output_size_x % 8 == 0) - output_block_width = 8; - return output_block_width; -} - - -namespace kernel_selector { - -ParamsKey Convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks::GetSupportedKey() const { - ParamsKey k; - k.EnableInputDataType(Datatype::INT8); - k.EnableInputDataType(Datatype::UINT8); - - k.EnableOutputDataType(Datatype::INT8); - k.EnableOutputDataType(Datatype::UINT8); - k.EnableOutputDataType(Datatype::F32); - k.EnableOutputDataType(Datatype::F16); - - k.EnableInputWeightsType(WeightsType::INT8); - - k.EnableInputLayout(DataLayout::b_fs_yx_fsv16); - k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16); - - k.EnableDifferentTypes(); - k.EnableDifferentInputWeightsTypes(); - k.EnableTensorOffset(); - k.EnableTensorPitches(); - k.EnableBiasPerFeature(); - k.EnableNonBiasTerm(); - k.EnableBatching(); - k.EnableQuantization(QuantizationType::SYMMETRIC); - k.DisableTuning(); - return k; -} - -KernelsData Convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks::GetKernelsData(const Params& params, - const optional_params& options) const { - return GetCommonKernelsData(params, options); -} - -JitConstants Convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks::GetJitConstants(const convolution_params& params, - const DispatchData& kd) const { - auto mem_consts = Parent::GetJitConstants(params, kd); - const auto& output = params.output; - - mem_consts.AddConstants({MakeJitConstant("OUT_BLOCK_WIDTH", getOutBlock_X(output.X().v))}); - - if (!params.fused_ops.empty()) { - auto input_dt = GetActivationType(params); - FusedOpsConfiguration conf_scalar = {"", - {"out_b", "(out_f + get_sub_group_id() * 16)", "out_y", "out_x + i"}, - "dequantized", - input_dt, - 1}; - conf_scalar.SetLoopAxes({ Tensor::DataChannelName::X }, true); - mem_consts.Merge(MakeFusedOpsJitConstants(params, {conf_scalar})); - } - - return mem_consts; -} // GetJitConstants - -ConvolutionKernelBase::DispatchData Convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks::SetDefault( - const convolution_params& params, - int) const { - DispatchData kd; - const auto& output = params.output; - - auto output_block_width = getOutBlock_X(output.X().v); - kd.gws0 = output.X().v / output_block_width; - kd.gws1 = output.Y().v; - kd.gws2 = output.Batch().v * output.Feature().v * 2; - - kd.lws0 = 1; - kd.lws1 = 1; - kd.lws2 = SIMD_SIZE * 4; - - kd.cldnnStyle = {0, 0, 0, 0, 0}; - kd.gemmStyle = {0, 0, 0, 0, 0, 0}; - - kd.efficiency = FORCE_PRIORITY_1; - - return kd; -} // SetDefault - -bool Convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks::Validate(const Params& params, const optional_params& options) const { - if (!Parent::Validate(params, options)) { - return false; - } - - KernelData kd = KernelData::Default(params); - convolution_params& newParams = *static_cast(kd.params.get()); - - if (newParams.output.Feature().v % (2 * SIMD_SIZE) != 0) { - return false; - } - - if ((newParams.filterSize.x != newParams.filterSize.y) || - newParams.filterSize.x != 3) { - // Fitler size needs to be 3x3 - return false; - } - - if ((newParams.stride.x != newParams.stride.y) || - (newParams.stride.x != 1 && newParams.stride.x != 2)) { - // Strides must be 1x1 or 2x2 - return false; - } - - if (newParams.output.X().v % 8 != 0 && newParams.output.X().v % 7 != 0) { - return false; - } - - if (CeilDiv(newParams.inputs[0].Feature().v, 16) % 4 != 0) { - return false; - } - - const auto& output = newParams.output; - auto output_block_width = getOutBlock_X(output.X().v); - size_t eu_count = params.engineInfo.computeUnitsCount; - auto global_size = - (output.X().v / output_block_width) * output.Y().v * ((output.Batch().v * output.Feature().v)); - if ((global_size / 16) > (eu_count * 7)) { - return false; - } - - if (newParams.groups != 1 || newParams.split != 1) - return false; - - return true; -} -} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_selector.cpp index 2e0b86428e5..ec28317a4ba 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_selector.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_selector.cpp @@ -71,8 +71,7 @@ #include "convolution_kernel_mmad_bfyx_to_b_fs_yx_fsv32.h" #include "convolution_kernel_bfyx_to_bs_fs_yx_bsv16_fsv16.h" #include "convolution_kernel_b_fs_yx_fsv16_imad_1x1.h" -#include "convolution_kernel_b_fs_yx_fsv16_imad_3x3.h" -#include "convolution_kernel_b_fs_yx_fsv16_imad_3x3_ks.h" +#include "convolution_kernel_b_fs_yx_fsv16_imad.h" #include "convolution_kernel_b_fs_yx_fsv_16_32_imad_dw.hpp" namespace kernel_selector { @@ -82,8 +81,7 @@ convolution_kernel_selector::convolution_kernel_selector() { // b_fs_yx_fsv16 int8 Attach(); - Attach(); - Attach(); + Attach(); // b_fs_yx_fsv16 and b_fs_zyx_fsv16 Attach(); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_b_fs_yx_fsv16_imad.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_b_fs_yx_fsv16_imad.cpp index c7d1c6abf21..55325dbeff4 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_b_fs_yx_fsv16_imad.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_b_fs_yx_fsv16_imad.cpp @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. - #include "mvn_kernel_b_fs_yx_fsv16_imad.hpp" #include "common/common_tools.h" @@ -28,6 +27,7 @@ static constexpr size_t pref_work_groups = 16; ParamsKey MVNKernel_b_fs_yx_fsv16_imad::GetSupportedKey() const { ParamsKey k; + k.EnableInputDataType(Datatype::INT8); k.EnableInputDataType(Datatype::UINT8); k.EnableOutputDataType(Datatype::F16); @@ -36,6 +36,8 @@ ParamsKey MVNKernel_b_fs_yx_fsv16_imad::GetSupportedKey() const { k.EnableOutputDataType(Datatype::UINT8); k.EnableInputLayout(DataLayout::b_fs_yx_fsv16); k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16); + k.EnableInputLayout(DataLayout::b_fs_zyx_fsv16); + k.EnableOutputLayout(DataLayout::b_fs_zyx_fsv16); k.EnableTensorOffset(); k.EnableTensorPitches(); k.EnableDifferentTypes(); @@ -44,6 +46,7 @@ ParamsKey MVNKernel_b_fs_yx_fsv16_imad::GetSupportedKey() const { // k.EnableMVNMode(MVNMode::ACROSS_CHANNELS); k.EnableMVNMode(MVNMode::WITHIN_CHANNELS); k.EnableMVNNormalizeVariance(); + return k; } @@ -54,7 +57,8 @@ bool MVNKernel_b_fs_yx_fsv16_imad::Validate(const Params& p, const optional_para auto params = static_cast(p); // TODO Add support for input padding via iterating over y (parallel or in kernel). - if (params.inputs[0].X().pad.Total() != 0 || params.inputs[0].Y().pad.Total() != 0) + if (params.inputs[0].X().pad.Total() != 0 || params.inputs[0].Y().pad.Total() != 0 || + params.inputs[0].Z().pad.Total() != 0) return false; return true; @@ -63,7 +67,7 @@ bool MVNKernel_b_fs_yx_fsv16_imad::Validate(const Params& p, const optional_para MVNKernelBase::DispatchData MVNKernel_b_fs_yx_fsv16_imad::SetDefault(const mvn_params& params) const { auto kd = Parent::SetDefault(params); - auto items_num = params.output.X().v * params.output.Y().v; + auto items_num = params.output.X().v * params.output.Y().v * params.output.Z().v; auto max_wg = params.engineInfo.maxWorkGroupSize; auto slm_per_sg = fsv * 4; auto max_slm = params.engineInfo.maxLocalMemSize; @@ -98,17 +102,31 @@ JitConstants MVNKernel_b_fs_yx_fsv16_imad::GetJitConstants(const mvn_params& par if (!params.fused_ops.empty()) { std::vector idx_order; - idx_order = { "b", "(f + set_idx)", "(output_spatial / OUTPUT_SIZE_X)", "(output_spatial % OUTPUT_SIZE_X)" }; + + if (params.inputs[0].GetDims().size() <= 4) { + idx_order = {"b", + "(f + set_idx)", + "(output_spatial / OUTPUT_SIZE_X)", + "(output_spatial % OUTPUT_SIZE_X)"}; + } else if (params.inputs[0].GetDims().size() == 5) { + idx_order = {"b", + "(f + set_idx)", + "(output_spatial / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y))", + "((output_spatial / OUTPUT_SIZE_X) % OUTPUT_SIZE_Y)", + "(output_spatial % OUTPUT_SIZE_X)"}; + } + auto conf = FusedOpsConfiguration("", idx_order, "normalized", activation_dt); - jits.Merge(MakeFusedOpsJitConstants(params, { conf })); + jits.Merge(MakeFusedOpsJitConstants(params, {conf})); } return jits; } -MVNKernel_b_fs_yx_fsv16_imad::MultiDispatchData MVNKernel_b_fs_yx_fsv16_imad::SetDefaultForMulti(const mvn_params& params) const { +MVNKernel_b_fs_yx_fsv16_imad::MultiDispatchData MVNKernel_b_fs_yx_fsv16_imad::SetDefaultForMulti( + const mvn_params& params) const { MultiDispatchData md; - auto items_num = params.output.X().v * params.output.Y().v; + auto items_num = params.output.X().v * params.output.Y().v * params.output.Z().v; auto max_wg = params.engineInfo.maxWorkGroupSize; auto slm_per_sg = fsv * 4; auto max_slm = params.engineInfo.maxLocalMemSize; @@ -158,7 +176,9 @@ MVNKernel_b_fs_yx_fsv16_imad::MultiDispatchData MVNKernel_b_fs_yx_fsv16_imad::Se return md; } -KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_params& params, const optional_params& options, float estimated_time) const { +KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_params& params, + const optional_params& options, + float estimated_time) const { if (!Validate(params, options)) return {}; @@ -190,10 +210,10 @@ KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_par 0, 0); kernel.arguments.clear(); // Clear original output argument - kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 0 }); - kernel.arguments.push_back({ ArgumentDescriptor::Types::INTERNAL_BUFFER, 0 }); - kd.internalBufferSizes.push_back( - params.output.Batch().v * Align(params.output.Feature().v, fsv) * runInfo.item_groups * intermidiate_bytes); + kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 0}); + kernel.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); + kd.internalBufferSizes.push_back(params.output.Batch().v * Align(params.output.Feature().v, fsv) * + runInfo.item_groups * intermidiate_bytes); } { // Mean second stage @@ -214,9 +234,10 @@ KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_par 0, 0); kernel.arguments.clear(); // Clear original output argument - kernel.arguments.push_back({ ArgumentDescriptor::Types::INTERNAL_BUFFER, 0 }); - kernel.arguments.push_back({ ArgumentDescriptor::Types::INTERNAL_BUFFER, 1 }); - kd.internalBufferSizes.push_back(params.output.Batch().v * Align(params.output.Feature().v, fsv) * intermidiate_bytes); + kernel.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); + kernel.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 1}); + kd.internalBufferSizes.push_back(params.output.Batch().v * Align(params.output.Feature().v, fsv) * + intermidiate_bytes); } if (params.mvnNormalizeVariance) { // Variance first stage @@ -237,9 +258,9 @@ KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_par 0, 0); kernel.arguments.clear(); // Clear original output argument - kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 0 }); - kernel.arguments.push_back({ ArgumentDescriptor::Types::INTERNAL_BUFFER, 1 }); - kernel.arguments.push_back({ ArgumentDescriptor::Types::INTERNAL_BUFFER, 0 }); + kernel.arguments.push_back({ArgumentDescriptor::Types::INPUT, 0}); + kernel.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 1}); + kernel.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); } if (params.mvnNormalizeVariance) { // Variance second stage @@ -260,9 +281,10 @@ KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_par 0, 0); kernel.arguments.clear(); // Clear original output argument - kernel.arguments.push_back({ ArgumentDescriptor::Types::INTERNAL_BUFFER, 0 }); - kernel.arguments.push_back({ ArgumentDescriptor::Types::INTERNAL_BUFFER, 2 }); - kd.internalBufferSizes.push_back(params.output.Batch().v * Align(params.output.Feature().v, fsv) * intermidiate_bytes); + kernel.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 0}); + kernel.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 2}); + kd.internalBufferSizes.push_back(params.output.Batch().v * Align(params.output.Feature().v, fsv) * + intermidiate_bytes); } { // Final auto cldnn_jit = GetJitConstants(orgParams, runInfo.stage_final); @@ -283,25 +305,24 @@ KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetMultiStageKernelsData(const mvn_par false, 1, GetFusedPrimitiveInputsCount(params)); - kernel.arguments.push_back({ ArgumentDescriptor::Types::INTERNAL_BUFFER, 1 }); + kernel.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 1}); if (params.mvnNormalizeVariance) { - kernel.arguments.push_back({ ArgumentDescriptor::Types::INTERNAL_BUFFER, 2 }); + kernel.arguments.push_back({ArgumentDescriptor::Types::INTERNAL_BUFFER, 2}); } } kd.intenralBufferDataType = Datatype::F32; kd.estimatedTime = estimated_time; - return { kd }; + return {kd}; } - KernelsData MVNKernel_b_fs_yx_fsv16_imad::GetKernelsData(const Params& params, const optional_params& optParams) const { const mvn_params& orgParams = static_cast(params); auto max_slm = params.engineInfo.maxLocalMemSize; auto slm_per_sg = fsv * 4; auto max_lws = params.engineInfo.maxWorkGroupSize; - auto items_num = orgParams.output.X().v * orgParams.output.Y().v; + auto items_num = orgParams.output.X().v * orgParams.output.Y().v * orgParams.output.Z().v; auto enough_slm = max_lws / simd * simd * slm_per_sg <= max_slm; auto enough_lws = max_lws / simd >= 1; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_selector.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_selector.h index 5fc42939649..52dbcd0f61c 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_selector.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_selector.h @@ -31,4 +31,4 @@ public: KernelsData GetBestKernels(const Params& params, const optional_params& options) const override; }; -} // namespace kernel_selector \ No newline at end of file +} // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_b_fs_yx_fsv16_imad.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_b_fs_yx_fsv16_imad.cl new file mode 100644 index 00000000000..25961dc8794 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_b_fs_yx_fsv16_imad.cl @@ -0,0 +1,390 @@ +// Copyright (c) 2020 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "include/common.cl" +#include "include/fetch.cl" +#include "include/imad.cl" +#include "include/mmad.cl" +#include "include/data_types.cl" + +#define AS_TYPE_N_(type, n, x) as_##type##n(x) +#define AS_TYPE_N(type, n, x) AS_TYPE_N_(type, n, x) +#define AS_INPUT0_TYPE_4(x) AS_TYPE_N(INPUT0_TYPE, 4, x) + +#define AS_FILTER_TYPE_4(x) AS_TYPE_N(FILTER_TYPE, 4, x) + +#define CEIL_DIV(a, b) (((a) + (b) - 1)/(b)) +#define ALIGN(a, b) (CEIL_DIV(a, b) * (b)) + +#define SIMD 16 +#define FSV 16 + +// int8 conv_input and weights data is packed to int32 "batches", +// int/uint pointers here instead of INPUT0_TYPE/FILTER_TYPE for convenience +__attribute__((intel_reqd_sub_group_size(SIMD))) +__attribute__((reqd_work_group_size(1, 1, FEATURE_SLM_SPLIT * SIMD))) +KERNEL(convolution_gpu_b_fs_yx_fsv16_imad)( + const __global INPUT0_TYPE *conv_input, + __global OUTPUT_TYPE *output, + const __global FILTER_TYPE *weights, +#if BIAS_TERM + const __global BIAS_TYPE *biases, +#endif +#if HAS_FUSED_OPS_DECLS + FUSED_OPS_DECLS, +#endif + uint split_idx) { + + #define LUT_VALUE_CLAMP(x) (( (IN_BLOCK_WIDTH % SIMD == 0) || ((x) < IN_BLOCK_WIDTH % SIMD) ) ? (x) : 0) + const int tmp = LUT_VALUE_CLAMP(get_sub_group_local_id()); + #undef LUT_VALUE_CLAMP + + const uint out_x = (uint)get_global_id(0) * OUT_BLOCK_WIDTH; + const uint out_y = (uint)get_global_id(1) * OUT_BLOCK_HEIGHT; + const uint out_b = (uint)(get_group_id(2) * OFM_SIZE_PER_SIMD) / ALIGN(OUTPUT_FEATURE_NUM, OFM_SIZE_PER_SIMD); + uint out_fg = (uint)(get_group_id(2) * OFM_SIZE_PER_SIMD) % ALIGN(OUTPUT_FEATURE_NUM, OFM_SIZE_PER_SIMD); + uint out_f = out_fg + get_sub_group_local_id(); + + const int input_x = out_x * STRIDE_SIZE_X - PADDING_SIZE_X; + const int input_y = out_y * STRIDE_SIZE_Y - PADDING_SIZE_Y; + +#if FEATURE_SLM_SPLIT == 1 + const uint k_start = 0; +#else + const uint k_start = get_sub_group_id() * FSV; +#endif + + uint filter_idx = GET_FILTER_OS_IS_YX_OSV16_ISV16_INDEX(FILTER, out_f, k_start, 0, 0); + const uint filter_idx_diff = (ALIGN(FILTER_IFM_NUM, 16) * FILTER_SIZE_X * FILTER_SIZE_Y * 16); + + uint input_start_idx = INPUT0_GET_INDEX(out_b, k_start, input_y, input_x); + + ACCUMULATOR_TYPE dotProd[OFM_BLOCKS_PER_SIMD][OUT_BLOCK_HEIGHT][OUT_BLOCK_WIDTH] = { }; + uint4 input_val[IN_BLOCK_HEIGHT][CEIL_DIV(IN_BLOCK_WIDTH, SIMD)]; + + __attribute__((opencl_unroll_hint(1))) + for (uint k = 0; k < CEIL_DIV(INPUT0_FEATURE_NUM, 16) / FEATURE_SLM_SPLIT; k++) { + __attribute__((opencl_unroll_hint(1))) + for (uint fyn = 0; fyn < FILTER_SIZE_Y / FILTER_SIZE_Y_UNROLL; fyn++) { + // Load input block IN_BLOCK_HEIGHT x IN_BLOCK_WIDTH, scattering width along sub-group + __attribute__((opencl_unroll_hint)) + for (uint iyb = 0; iyb < IN_BLOCK_HEIGHT; ++iyb) { + __attribute__((opencl_unroll_hint)) + for (uint ixb = 0; ixb < CEIL_DIV(IN_BLOCK_WIDTH, SIMD); ++ixb) { + uint input_idx = input_start_idx + iyb * INPUT0_Y_PITCH * FSV + ixb * SIMD * FSV; + if (ixb != CEIL_DIV(IN_BLOCK_WIDTH, SIMD) - 1) { + input_val[iyb][ixb] = vload4(0, (__global uint *)(conv_input + input_idx + get_sub_group_local_id() * 16)); + } else { + input_val[iyb][ixb] = vload4(0, (__global uint*)(conv_input + input_idx + tmp * 16)); + } + } + } + + __attribute__((opencl_unroll_hint)) + for (uint fyu = 0; fyu < FILTER_SIZE_Y_UNROLL; ++fyu) { + __attribute__((opencl_unroll_hint(FILTER_SIZE_X))) + for (uint fx = 0; fx < FILTER_SIZE_X; fx++) { + + uint4 weights_val[OFM_BLOCKS_PER_SIMD]; + __attribute__((opencl_unroll_hint)) + for (uint ofb = 0; ofb < OFM_BLOCKS_PER_SIMD; ++ofb) { + weights_val[ofb] = vload4(0, (__global uint *)(weights + filter_idx + ofb * filter_idx_diff)); + } + + __attribute__((opencl_unroll_hint)) + for (uint ive = 0; ive < 4; ive++) { + __attribute__((opencl_unroll_hint)) + for (uint ofb = 0; ofb < OFM_BLOCKS_PER_SIMD; ++ofb) { + __attribute__((opencl_unroll_hint(OUT_BLOCK_HEIGHT))) + for (uint oh = 0; oh < OUT_BLOCK_HEIGHT; ++oh) { + __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH))) + for (uint ow = 0; ow < OUT_BLOCK_WIDTH; ow++) { + const uint ow_offset = ow + OUT_BLOCK_WIDTH; + const uint y_block_idx = oh * STRIDE_SIZE_Y + fyu * DILATION_SIZE_Y; + const uint x_block_idx = ow * STRIDE_SIZE_X + fx * DILATION_SIZE_X; + const uint shuffle_wi = x_block_idx % SIMD; + const uint shuffle_idx = x_block_idx / SIMD; + + dotProd[ofb][oh][ow] = TO_ACCUMULATOR_TYPE( + IMAD(dotProd[ofb][oh][ow], + AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val[y_block_idx][shuffle_idx][ive], shuffle_wi)), + AS_FILTER_TYPE_4(weights_val[ofb][ive]))); + } + } + } + } + + filter_idx += FSV * FSV; + } + } + input_start_idx += DILATION_SIZE_Y * INPUT0_Y_PITCH * FSV; + } + input_start_idx += INPUT0_FEATURE_PITCH * FSV * FEATURE_SLM_SPLIT - (FILTER_SIZE_Y / FILTER_SIZE_Y_UNROLL) * DILATION_SIZE_Y * INPUT0_Y_PITCH * FSV; + + filter_idx += FSV * FSV * FILTER_SIZE_X * FILTER_SIZE_Y * (FEATURE_SLM_SPLIT - 1); + } + +#if FEATURE_SLM_SPLIT != 1 + // Additional local memory reduction for feature split mode +# if FEATURE_SLM_SPLIT < OFM_BLOCKS_PER_SIMD +# error convolution_gpu_b_fs_yx_fsv16_imad.cl - OFM_BLOCKS_PER_SIMD must be less or equal to FEATURE_SLM_SPLIT +# endif + + const uint partial_acc_size = (FEATURE_SLM_SPLIT - 1) * OFM_SIZE_PER_SIMD * OUT_BLOCK_HEIGHT * OUT_BLOCK_WIDTH; + __local ACCUMULATOR_TYPE partial_acc[partial_acc_size]; + + uint sgid_start_idx = get_sub_group_id(); + sgid_start_idx = sgid_start_idx == 0 ? 0 : sgid_start_idx - 1; + __local ACCUMULATOR_TYPE* partial_acc_ptr = partial_acc + sgid_start_idx * OFM_SIZE_PER_SIMD * OUT_BLOCK_HEIGHT * OUT_BLOCK_WIDTH + + get_sub_group_local_id(); + + if (get_sub_group_id() < OFM_BLOCKS_PER_SIMD) { + __attribute__((opencl_unroll_hint)) + for (uint wg = 0; wg < OFM_BLOCKS_PER_SIMD; ++wg) { + if (get_sub_group_id() == wg) { + __attribute__((opencl_unroll_hint)) + for (uint ofb = 0; ofb < wg; ++ofb) { + __attribute__((opencl_unroll_hint)) + for (uint oh = 0; oh < OUT_BLOCK_HEIGHT; ++oh) { + __attribute__((opencl_unroll_hint)) + for (uint ow = 0; ow < OUT_BLOCK_WIDTH; ++ow) { + const uint partial_acc_ptr_idx = + ofb * OUT_BLOCK_HEIGHT * OUT_BLOCK_WIDTH * SIMD + + oh * OUT_BLOCK_WIDTH * SIMD + + ow * SIMD; + partial_acc_ptr[partial_acc_ptr_idx] = dotProd[ofb][oh][ow]; + } + } + } + __attribute__((opencl_unroll_hint)) + for (uint oh = 0; oh < OUT_BLOCK_HEIGHT; ++oh) { + __attribute__((opencl_unroll_hint)) + for (uint ow = 0; ow < OUT_BLOCK_WIDTH; ++ow) { + dotProd[0][oh][ow] = dotProd[wg][oh][ow]; + } + } + __attribute__((opencl_unroll_hint)) + for (uint ofb = wg + 1; ofb < OFM_BLOCKS_PER_SIMD; ++ofb) { + __attribute__((opencl_unroll_hint)) + for (uint oh = 0; oh < OUT_BLOCK_HEIGHT; ++oh) { + __attribute__((opencl_unroll_hint)) + for (uint ow = 0; ow < OUT_BLOCK_WIDTH; ++ow) { + const uint partial_acc_ptr_idx = + ((wg != 0) ? OUT_BLOCK_WIDTH * OUT_BLOCK_HEIGHT * OFM_SIZE_PER_SIMD : 0) + + ofb * OUT_BLOCK_HEIGHT * OUT_BLOCK_WIDTH * SIMD + + oh * OUT_BLOCK_WIDTH * SIMD + + ow * SIMD; + partial_acc_ptr[partial_acc_ptr_idx] = dotProd[ofb][oh][ow]; + } + } + } + } + } + } else { + __attribute__((opencl_unroll_hint)) + for (uint ofb = 0; ofb < OFM_BLOCKS_PER_SIMD; ++ofb) { + __attribute__((opencl_unroll_hint)) + for (uint oh = 0; oh < OUT_BLOCK_HEIGHT; ++oh) { + __attribute__((opencl_unroll_hint)) + for (uint ow = 0; ow < OUT_BLOCK_WIDTH; ++ow) { + const uint partial_acc_ptr_idx = + ofb * OUT_BLOCK_HEIGHT * OUT_BLOCK_WIDTH * SIMD + + oh * OUT_BLOCK_WIDTH * SIMD + + ow * SIMD; + partial_acc_ptr[partial_acc_ptr_idx] = dotProd[ofb][oh][ow]; + } + } + } + } + + barrier(CLK_LOCAL_MEM_FENCE); + + if (get_sub_group_id() >= OFM_BLOCKS_PER_SIMD) + return; + + partial_acc_ptr = partial_acc + get_sub_group_id() * OUT_BLOCK_WIDTH * OUT_BLOCK_HEIGHT * SIMD + get_sub_group_local_id(); + __attribute__((opencl_unroll_hint)) + for (uint wg = 0; wg < FEATURE_SLM_SPLIT - 1; ++wg) { + __attribute__((opencl_unroll_hint)) + for (uint oh = 0; oh < OUT_BLOCK_HEIGHT; ++oh) { + __attribute__((opencl_unroll_hint)) + for (uint ow = 0; ow < OUT_BLOCK_WIDTH; ++ow) { + const uint partial_acc_ptr_idx = + wg * OFM_SIZE_PER_SIMD * OUT_BLOCK_HEIGHT * OUT_BLOCK_WIDTH + + oh * OUT_BLOCK_WIDTH * SIMD + + ow * SIMD; + dotProd[0][oh][ow] += partial_acc_ptr[partial_acc_ptr_idx]; + } + } + } +#endif + +#if FEATURE_SLM_SPLIT == 1 +# define OFM_VALUES_PER_WI (OFM_BLOCKS_PER_SIMD) +#else +# define OFM_VALUES_PER_WI 1 + out_f += get_sub_group_id() * SIMD; + out_fg += get_sub_group_id() * SIMD; +#endif + +#if BIAS_TERM + BIAS_TYPE bias[OFM_VALUES_PER_WI]; + __attribute__((opencl_unroll_hint)) + for (uint ofb = 0; ofb < OFM_VALUES_PER_WI; ++ofb) { + bias[ofb] = biases[out_f + ofb * SIMD]; + } +#endif + + ACTIVATION_TYPE dequantized[OFM_VALUES_PER_WI][OUT_BLOCK_HEIGHT][OUT_BLOCK_WIDTH]; + __attribute__((opencl_unroll_hint)) + for (uint ofb = 0; ofb < OFM_VALUES_PER_WI; ++ofb) { + __attribute__((opencl_unroll_hint)) + for (uint oh = 0; oh < OUT_BLOCK_HEIGHT; ++oh) { + __attribute__((opencl_unroll_hint)) + for (uint ow = 0; ow < OUT_BLOCK_WIDTH; ++ow) { + dequantized[ofb][oh][ow] = TO_ACTIVATION_TYPE(dotProd[ofb][oh][ow]); +#if BIAS_TERM + dequantized[ofb][oh][ow] += bias[ofb]; +#endif + } + } + } + + OUTPUT_TYPE result[OFM_VALUES_PER_WI][OUT_BLOCK_HEIGHT][OUT_BLOCK_WIDTH]; + __attribute__((opencl_unroll_hint)) + for (uint ofb = 0; ofb < OFM_VALUES_PER_WI; ++ofb) { +#if HAS_FUSED_OPS && FUSED_OPS_CAN_USE_PRELOAD_SCALAR + FUSED_OPS_PRELOAD_SCALAR; +#endif + __attribute__((opencl_unroll_hint)) + for (uint oh = 0; oh < OUT_BLOCK_HEIGHT; ++oh) { + __attribute__((opencl_unroll_hint)) + for (uint ow = 0; ow < OUT_BLOCK_WIDTH; ++ow) { + ACTIVATION_TYPE dequantized_val = dequantized[ofb][oh][ow]; +#if HAS_FUSED_OPS +# if FUSED_OPS_CAN_USE_PRELOAD_SCALAR + FUSED_OPS_CALC_SCALAR; +# else + FUSED_OPS_SCALAR; +# endif + result[ofb][oh][ow] = FUSED_OPS_RESULT_SCALAR; +#else + result[ofb][oh][ow] = TO_OUTPUT_TYPE(dequantized_val); +#endif + } + } + } + + uint dst_index = OUTPUT_GET_INDEX(out_b, out_fg, out_y, out_x); + + if ((OUTPUT_SIZE_X % OUT_BLOCK_WIDTH == 0 || out_x + OUT_BLOCK_WIDTH <= OUTPUT_SIZE_X) + && (OUTPUT_FEATURE_NUM % OFM_BLOCKS_PER_SIMD == 0) ) { + __attribute__((opencl_unroll_hint(OFM_VALUES_PER_WI))) + for (uint ofb = 0; ofb < OFM_VALUES_PER_WI; ofb++) { + bool good_of_block = (CEIL_DIV(OUTPUT_FEATURE_NUM, SIMD) % OFM_BLOCKS_PER_SIMD == 0) || (out_fg + ofb * SIMD <= OUTPUT_FEATURE_NUM); + if (good_of_block) { + __attribute__((opencl_unroll_hint)) + for (uint oh = 0; oh < OUT_BLOCK_HEIGHT; ++oh) { + bool good_y = (OUTPUT_SIZE_Y % OUT_BLOCK_HEIGHT == 0) || (out_y + oh < OUTPUT_SIZE_Y); + if (good_y) { + uint ow = 0; + #if OUTPUT_TYPE_SIZE == 1 + __attribute__((opencl_unroll_hint)) + for (; ow + 8 <= OUT_BLOCK_WIDTH; ow += 8) { + MAKE_VECTOR_TYPE(OUTPUT_TYPE, 8) result_val; + __attribute__((opencl_unroll_hint)) + for (uint i = 0; i < 8; ++i) { + result_val[i] = result[ofb][oh][ow + i]; + } + DT_OUTPUT_BLOCK_WRITE8(output, dst_index, result_val); + dst_index += 8 * SIMD; + } + #endif + #if OUTPUT_TYPE_SIZE <= 2 + __attribute__((opencl_unroll_hint)) + for (; ow + 4 <= OUT_BLOCK_WIDTH; ow += 4) { + MAKE_VECTOR_TYPE(OUTPUT_TYPE, 4) result_val; + __attribute__((opencl_unroll_hint)) + for (uint i = 0; i < 4; ++i) { + result_val[i] = result[ofb][oh][ow + i]; + } + DT_OUTPUT_BLOCK_WRITE4(output, dst_index, result_val); + dst_index += 4 * SIMD; + } + #endif + + __attribute__((opencl_unroll_hint)) + for (; ow + 2 <= OUT_BLOCK_WIDTH; ow += 2) { + MAKE_VECTOR_TYPE(OUTPUT_TYPE, 2) result_val; + __attribute__((opencl_unroll_hint)) + for (uint i = 0; i < 2; ++i) { + result_val[i] = result[ofb][oh][ow + i]; + } + DT_OUTPUT_BLOCK_WRITE2(output, dst_index, result_val); + dst_index += 2 * SIMD; + } + + if (OUT_BLOCK_WIDTH % 2 == 1) { + OUTPUT_TYPE result_val = result[ofb][oh][ow]; + DT_OUTPUT_BLOCK_WRITE(output, dst_index, result_val); + dst_index += 1 * SIMD; + } + } // if (good_y) + dst_index += OUTPUT_Y_PITCH * FSV - OUT_BLOCK_WIDTH * FSV; + } // for (OUT_BLOCK_HEIGHT) + } // if (good_of_block) + dst_index += OUTPUT_FEATURE_PITCH * FSV - OUTPUT_Y_PITCH * FSV * OUT_BLOCK_HEIGHT; + } // for (OFM_VALUES_PER_WI) + } else { + __attribute__((opencl_unroll_hint(OFM_VALUES_PER_WI))) + for (uint ofb = 0; ofb < OFM_VALUES_PER_WI; ofb++) { + bool good_of_block = (CEIL_DIV(OUTPUT_FEATURE_NUM, SIMD) % OFM_BLOCKS_PER_SIMD == 0) || (out_fg + ofb * SIMD <= OUTPUT_FEATURE_NUM); + if (good_of_block) { + const uint dst_index = OUTPUT_GET_INDEX(out_b, out_f + ofb * SIMD, out_y, out_x); + __attribute__((opencl_unroll_hint)) + for (uint oh = 0; oh < OUT_BLOCK_HEIGHT; ++oh) { + bool good_y = (OUTPUT_SIZE_Y % OUT_BLOCK_HEIGHT == 0) || (out_y + oh < OUTPUT_SIZE_Y); + if (good_y) { + __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH))) + for (uint ow = 0; ow < OUT_BLOCK_WIDTH; ow++) { + +#if OUTPUT_SIZE_X % OUT_BLOCK_WIDTH != 0 + if (out_x + OUT_BLOCK_WIDTH > OUTPUT_SIZE_X && ow >= OUTPUT_SIZE_X % OUT_BLOCK_WIDTH) + break; +#endif + +#if OUTPUT_FEATURE_NUM % SIMD != 0 + if (out_fg + (ofb + 1) * SIMD >= OUTPUT_FEATURE_NUM && get_sub_group_local_id() >= OUTPUT_FEATURE_NUM % SIMD) + result[ofb][oh][ow] = (OUTPUT_TYPE)0; +#endif + output[dst_index + ow * FSV + oh * OUTPUT_Y_PITCH * FSV] = result[ofb][oh][ow]; + } + } + } + } + } + } +} + +#undef AS_INPUT0_TYPE_4 +#undef AS_TYPE_N +#undef AS_TYPE_N_ +#undef AS_FILTER_TYPE_4 + +#undef CEIL_DIV +#undef ALIGN + +#undef SIMD +#undef FSV +#undef OFM_VALUES_PER_WI diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_b_fs_yx_fsv16_imad_3x3.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_b_fs_yx_fsv16_imad_3x3.cl deleted file mode 100644 index 5915c849fe6..00000000000 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_b_fs_yx_fsv16_imad_3x3.cl +++ /dev/null @@ -1,187 +0,0 @@ -// Copyright (c) 2018-2019 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "include/common.cl" -#include "include/fetch.cl" -#include "include/imad.cl" -#include "include/mmad.cl" - -#if QUANTIZATION_TERM -#define ACCUMULATOR_TYPE int -#define TO_ACCUMULATOR_TYPE(x) convert_int(x) -#define ACTIVATION_TYPE float -#define TO_ACTIVATION_TYPE(x) convert_float(x) -#else -#define ACCUMULATOR_TYPE INPUT0_TYPE -#define TO_ACCUMULATOR_TYPE(x) TO_INPUT0_TYPE(x) -#define ACTIVATION_TYPE INPUT0_TYPE -#define TO_ACTIVATION_TYPE(x) TO_INPUT0_TYPE(x) -#endif - -#define MAKE_VECTOR_TYPE(elem_type, size) CAT(elem_type, size) -#define AS_TYPE_N_(type, n, x) as_##type##n(x) -#define AS_TYPE_N(type, n, x) AS_TYPE_N_(type, n, x) -#define AS_INPUT0_TYPE_4(x) AS_TYPE_N(INPUT0_TYPE, 4, x) - -#define CEIL_DIV(a, b) (((a) + (b) - 1)/(b)) -#define ALIGN(a, b) (CEIL_DIV(a, b) * (b)) - -// int8 conv_input and weights data is packed to int32 "batches", -// int/uint pointers here instead of INPUT0_TYPE/FILTER_TYPE for convenience -__attribute__((intel_reqd_sub_group_size(16))) -__attribute__((reqd_work_group_size(1, 1, 16))) -KERNEL(convolution_gpu_b_fs_yx_fsv16_imad_3x3)( - const __global INPUT0_TYPE *conv_input, - __global OUTPUT_TYPE *output, - const __global FILTER_TYPE *weights, -#if BIAS_TERM - const __global BIAS_TYPE *biases, -#endif -#if HAS_FUSED_OPS_DECLS - FUSED_OPS_DECLS, -#endif - uint split_idx) { - - #define LUT_VALUE_CLAMP(x) ((x) < (OUT_BLOCK_WIDTH - 1) * STRIDE_SIZE_X + FILTER_SIZE_X ? (x) : 0) - const int tmp[16] = { - LUT_VALUE_CLAMP(0), - LUT_VALUE_CLAMP(1), - LUT_VALUE_CLAMP(2), - LUT_VALUE_CLAMP(3), - LUT_VALUE_CLAMP(4), - LUT_VALUE_CLAMP(5), - LUT_VALUE_CLAMP(6), - LUT_VALUE_CLAMP(7), - LUT_VALUE_CLAMP(8), - LUT_VALUE_CLAMP(9), - LUT_VALUE_CLAMP(10), - LUT_VALUE_CLAMP(11), - LUT_VALUE_CLAMP(12), - LUT_VALUE_CLAMP(13), - LUT_VALUE_CLAMP(14), - LUT_VALUE_CLAMP(15) - }; - #undef LUT_VALUE_CLAMP - - const uint out_x = (uint)get_global_id(0) * OUT_BLOCK_WIDTH; - const uint out_y = get_global_id(1); - const uint out_b = (uint)(get_group_id(2) * OFM_SIZE_PER_SIMD) / ALIGN(OUTPUT_FEATURE_NUM, OFM_SIZE_PER_SIMD); - const uint out_fg = (uint)(get_group_id(2) * OFM_SIZE_PER_SIMD) % ALIGN(OUTPUT_FEATURE_NUM, OFM_SIZE_PER_SIMD); - const uint out_f = out_fg + get_sub_group_local_id(); - ACCUMULATOR_TYPE dotProd[OUT_BLOCK_WIDTH * OFM_BLOCKS_PER_SIMD] = {0}; - const int input_x = out_x * STRIDE_SIZE_X - PADDING_SIZE_X; - - const int input_y = out_y * STRIDE_SIZE_Y - PADDING_SIZE_Y; - - uint filter_idx = GET_FILTER_OS_IS_YX_OSV16_ISV16_INDEX(FILTER, out_f, 0, 0, 0); -#if OFM_BLOCKS_PER_SIMD == 2 - uint filter_idx2 = GET_FILTER_OS_IS_YX_OSV16_ISV16_INDEX(FILTER, out_f + 16, 0, 0, 0); -#endif - - __attribute__((opencl_unroll_hint(1))) - for (uint k = 0; k < CEIL_DIV(INPUT0_FEATURE_NUM, 16); k++) { - __attribute__((opencl_unroll_hint(1))) - for (uint j = 0; j < FILTER_SIZE_Y; j++) { - uint input_idx = GET_DATA_B_FS_YX_FSV16_INDEX(INPUT0, out_b, k * 16, input_y + j, input_x + tmp[get_sub_group_local_id()]); - uint4 input_val0 = vload4(0, (__global uint *)(conv_input + input_idx)); - - __attribute__((opencl_unroll_hint(FILTER_SIZE_X))) - for (uint i = 0; i < FILTER_SIZE_X; i++) { - - uint4 weights_val = vload4(0, (__global uint *)(weights + filter_idx)); -#if OFM_BLOCKS_PER_SIMD == 2 - uint4 weights_val3 = vload4(0, (__global uint *)(weights + filter_idx2)); -#endif - - __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH))) - for (uint ow = 0; ow < OUT_BLOCK_WIDTH; ow++) { - const uint ow_offset = ow + OUT_BLOCK_WIDTH; - dotProd[ow] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s0, ow * STRIDE_SIZE_X + i)), as_char4(weights_val.s0))); - dotProd[ow] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s1, ow * STRIDE_SIZE_X + i)), as_char4(weights_val.s1))); - dotProd[ow] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s2, ow * STRIDE_SIZE_X + i)), as_char4(weights_val.s2))); - dotProd[ow] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s3, ow * STRIDE_SIZE_X + i)), as_char4(weights_val.s3))); - -#if OFM_BLOCKS_PER_SIMD == 2 - dotProd[ow_offset] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow_offset], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s0, ow * STRIDE_SIZE_X + i)), as_char4(weights_val3.s0))); - dotProd[ow_offset] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow_offset], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s1, ow * STRIDE_SIZE_X + i)), as_char4(weights_val3.s1))); - dotProd[ow_offset] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow_offset], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s2, ow * STRIDE_SIZE_X + i)), as_char4(weights_val3.s2))); - dotProd[ow_offset] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow_offset], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s3, ow * STRIDE_SIZE_X + i)), as_char4(weights_val3.s3))); -#endif - } - filter_idx += 16 * 16; -#if OFM_BLOCKS_PER_SIMD == 2 - filter_idx2 += 16 * 16; -#endif - } - } - } - -#if BIAS_TERM - BIAS_TYPE bias[OFM_BLOCKS_PER_SIMD] = { biases[out_f] -#if OFM_BLOCKS_PER_SIMD == 2 - , biases[out_f + 16] -#endif - }; -#endif - __attribute__((opencl_unroll_hint(OFM_BLOCKS_PER_SIMD))) - for (uint j = 0; j < OFM_BLOCKS_PER_SIMD; j++) { - const uint dst_index = GET_DATA_B_FS_YX_FSV16_INDEX(OUTPUT, out_b, out_f + j * 16, out_y, out_x); -#if HAS_FUSED_OPS && FUSED_OPS_CAN_USE_PRELOAD - FUSED_OPS_PRELOAD; -#endif - __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH))) - for (uint i = 0; i < OUT_BLOCK_WIDTH; i++) { - -#if OUTPUT_SIZE_X % OUT_BLOCK_WIDTH != 0 - if (out_x + OUT_BLOCK_WIDTH > OUTPUT_SIZE_X && i >= OUTPUT_SIZE_X % OUT_BLOCK_WIDTH) - break; -#endif - ACTIVATION_TYPE dequantized = (ACTIVATION_TYPE)0; -#if BIAS_TERM - dequantized = (ACTIVATION_TYPE)dotProd[OUT_BLOCK_WIDTH * j + i] + bias[j]; -#else - dequantized = (ACTIVATION_TYPE)dotProd[OUT_BLOCK_WIDTH * j + i]; -#endif - OUTPUT_TYPE result; -#if HAS_FUSED_OPS - #if FUSED_OPS_CAN_USE_PRELOAD - FUSED_OPS_CALC; - #else - FUSED_OPS; - #endif - result = FUSED_OPS_RESULT; -#else - result = TO_OUTPUT_TYPE(dequantized); -#endif - -#if OUTPUT_FEATURE_NUM % 16 != 0 - if (out_fg + j * 16 + 16 > OUTPUT_FEATURE_NUM && get_sub_group_local_id() >= OUTPUT_FEATURE_NUM % 16) - result = (OUTPUT_TYPE)0; -#endif - output[dst_index + i * 16] = result; - } - } -} - -#undef AS_INPUT0_TYPE_4 -#undef AS_TYPE_N -#undef AS_TYPE_N_ -#undef MAKE_VECTOR_TYPE -#undef TO_ACTIVATION_TYPE -#undef ACTIVATION_TYPE -#undef TO_ACCUMULATOR_TYPE -#undef ACCUMULATOR_TYPE - -#undef CEIL_DIV -#undef ALIGN diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_b_fs_yx_fsv16_imad_3x3_ks.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_b_fs_yx_fsv16_imad_3x3_ks.cl deleted file mode 100644 index df87ae04fd6..00000000000 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_b_fs_yx_fsv16_imad_3x3_ks.cl +++ /dev/null @@ -1,197 +0,0 @@ -// Copyright (c) 2018-2019 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - - -#include "include/common.cl" -#include "include/fetch.cl" -#include "include/imad.cl" -#include "include/mmad.cl" - -#if QUANTIZATION_TERM - #define ACCUMULATOR_TYPE int - #define TO_ACCUMULATOR_TYPE(x) convert_int(x) - #define ACTIVATION_TYPE float - #define TO_ACTIVATION_TYPE(x) convert_float(x) -#else - #define ACCUMULATOR_TYPE INPUT0_TYPE - #define TO_ACCUMULATOR_TYPE(x) TO_INPUT0_TYPE(x) - #define ACTIVATION_TYPE INPUT0_TYPE - #define TO_ACTIVATION_TYPE(x) TO_INPUT0_TYPE(x) -#endif - -#define MAKE_VECTOR_TYPE(elem_type, size) CAT(elem_type, size) -#define AS_TYPE_N_(type, n, x) as_##type##n(x) -#define AS_TYPE_N(type, n, x) AS_TYPE_N_(type, n, x) -#define AS_INPUT0_TYPE_4(x) AS_TYPE_N(INPUT0_TYPE, 4, x) - -#define CEIL_DIV(a, b) (((a) + (b) - 1)/(b)) - -__attribute__((intel_reqd_sub_group_size(16))) -KERNEL(convolution_gpu_b_fs_yx_fsv16_3x3_ks)( - const __global INPUT0_TYPE *conv_input, - __global OUTPUT_TYPE *output, - const __global FILTER_TYPE *weights, -#if BIAS_TERM - const __global BIAS_TYPE *biases, -#endif -#if HAS_FUSED_OPS_DECLS - FUSED_OPS_DECLS, -#endif - uint split_idx) -{ -#if OUT_BLOCK_WIDTH == 7 && STRIDE_SIZE_X == 1 - const int tmp[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0}; -#elif OUT_BLOCK_WIDTH == 7 && STRIDE_SIZE_X == 2 - const int tmp[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0}; -#elif OUT_BLOCK_WIDTH == 8 && STRIDE_SIZE_X == 1 - const int tmp[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0}; -#else // OUT_BLOCK_WIDTH == 8 && STRIDE_SIZE_X == 2 - const int tmp[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; -#endif - - const uint out_x = (uint)get_global_id(0) * OUT_BLOCK_WIDTH; - const uint out_y = get_global_id(1); - const uint out_f = (uint)(get_group_id(2) * 32 + get_sub_group_local_id()); - const uint subgroup_id = get_sub_group_id(); - const uint subgroup_local_id = get_sub_group_local_id(); - const uint feature_offset = subgroup_id * INPUT0_FEATURE_NUM / 4; - const uint out_b = (uint)(get_group_id(2) * 32) / OUTPUT_FEATURE_NUM; - - ACCUMULATOR_TYPE dotProd[OUT_BLOCK_WIDTH * 2] = { 0 }; - const int input_x = out_x * STRIDE_SIZE_X - PADDING_SIZE_X; - const int input_y = out_y * STRIDE_SIZE_Y - PADDING_SIZE_Y; - - uint filter_idx = GET_FILTER_OS_IS_YX_OSV16_ISV16_INDEX(FILTER, out_f, feature_offset, 0, 0); - uint diff_filter_idx = 16*3*3*FILTER_IFM_NUM; - - __attribute__((opencl_unroll_hint(1))) - for(uint k = 0; k < CEIL_DIV(INPUT0_FEATURE_NUM, 16)/4; k++ ) { - __attribute__((opencl_unroll_hint(1))) - for(uint j = 0; j < FILTER_SIZE_Y; j++) { - uint input_idx = GET_DATA_B_FS_YX_FSV16_INDEX(INPUT0, out_b, feature_offset + k * 16, input_y + j, input_x + tmp[subgroup_local_id]); - uint4 input_val0 = vload4(0, (__global uint *)(conv_input + input_idx)); - - __attribute__((opencl_unroll_hint(FILTER_SIZE_X))) - for(uint i = 0; i < FILTER_SIZE_X; i++) { - - uint4 weights_val = vload4(0, (__global uint*)(weights + filter_idx)); - uint4 weights_val3 = vload4(0, (__global uint *)(weights + filter_idx + diff_filter_idx)); - - __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH))) - for(uint ow = 0; ow < OUT_BLOCK_WIDTH; ow++) { - const uint ow_offset = ow + OUT_BLOCK_WIDTH; - dotProd[ow] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s0, ow * STRIDE_SIZE_X + i)), as_char4(weights_val.s0))); - dotProd[ow] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s1, ow * STRIDE_SIZE_X + i)), as_char4(weights_val.s1))); - dotProd[ow] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s2, ow * STRIDE_SIZE_X + i)), as_char4(weights_val.s2))); - dotProd[ow] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s3, ow * STRIDE_SIZE_X + i)), as_char4(weights_val.s3))); - - dotProd[ow_offset] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow_offset], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s0, ow * STRIDE_SIZE_X + i)), as_char4(weights_val3.s0))); - dotProd[ow_offset] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow_offset], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s1, ow * STRIDE_SIZE_X + i)), as_char4(weights_val3.s1))); - dotProd[ow_offset] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow_offset], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s2, ow * STRIDE_SIZE_X + i)), as_char4(weights_val3.s2))); - dotProd[ow_offset] = TO_ACCUMULATOR_TYPE(IMAD(dotProd[ow_offset], AS_INPUT0_TYPE_4(intel_sub_group_shuffle(input_val0.s3, ow * STRIDE_SIZE_X + i)), as_char4(weights_val3.s3))); - } - filter_idx += 16 * 16; - } - } - } - - //k slicing summing up with SLM - __local ACCUMULATOR_TYPE partial_acc[16 * OUT_BLOCK_WIDTH * 6]; - if(subgroup_id == 0) - { - __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH))) - for(uint i = 0; i < OUT_BLOCK_WIDTH; i++) - { - partial_acc[16 * OUT_BLOCK_WIDTH + i * 16 + subgroup_local_id] = dotProd[i + OUT_BLOCK_WIDTH]; - } - } - else if(subgroup_id == 1) - { - __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH))) - for(uint i = 0; i < OUT_BLOCK_WIDTH; i++) - { - partial_acc[i * 16 + subgroup_local_id] = dotProd[i]; - dotProd[i] = dotProd[i + OUT_BLOCK_WIDTH]; - } - } - else if (subgroup_id == 2) - { - __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH))) - for (uint i = 0; i < OUT_BLOCK_WIDTH; i++) - { - partial_acc[2 * 16 * OUT_BLOCK_WIDTH + i * 16 + subgroup_local_id] = dotProd[i]; - partial_acc[3 * 16 * OUT_BLOCK_WIDTH + i * 16 + subgroup_local_id] = dotProd[i + OUT_BLOCK_WIDTH]; - - } - } - else if (subgroup_id == 3) - { - __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH))) - for (uint i = 0; i < OUT_BLOCK_WIDTH; i++) - { - partial_acc[4 * 16 * OUT_BLOCK_WIDTH + i * 16 + subgroup_local_id] = dotProd[i]; - partial_acc[5 * 16 * OUT_BLOCK_WIDTH + i * 16 + subgroup_local_id] = dotProd[i + OUT_BLOCK_WIDTH]; - } - } - - barrier(CLK_LOCAL_MEM_FENCE); - if (subgroup_id < 2) { - __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH))) - for (uint i = 0; i < OUT_BLOCK_WIDTH; i++) - { - dotProd[i] += partial_acc[(i + subgroup_id * OUT_BLOCK_WIDTH) * 16 + subgroup_local_id]; - dotProd[i] += partial_acc[(i + (subgroup_id + 2) * OUT_BLOCK_WIDTH) * 16 + subgroup_local_id]; - dotProd[i] += partial_acc[(i + (subgroup_id + 4) * OUT_BLOCK_WIDTH) * 16 + subgroup_local_id]; - } -#if BIAS_TERM - BIAS_TYPE bias = biases[out_f + get_sub_group_id() * 16]; -#endif - -#if HAS_FUSED_OPS && FUSED_OPS_CAN_USE_PRELOAD - FUSED_OPS_PRELOAD; -#endif - const uint dst_index = GET_DATA_B_FS_YX_FSV16_INDEX(OUTPUT, out_b, out_f + subgroup_id * 16, out_y, out_x); - __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH))) - for (uint i = 0; i < OUT_BLOCK_WIDTH; i++) - { - ACTIVATION_TYPE dequantized = (ACTIVATION_TYPE)0; -#if BIAS_TERM - dequantized = (ACTIVATION_TYPE)dotProd[i] + bias; -#else - dequantized = (ACTIVATION_TYPE)dotProd[i]; -#endif -#if HAS_FUSED_OPS - #if FUSED_OPS_CAN_USE_PRELOAD - FUSED_OPS_CALC; - #else - FUSED_OPS; - #endif - output[dst_index + i * 16] = FUSED_OPS_RESULT; -#else - output[dst_index + i * 16] = TO_OUTPUT_TYPE(dequantized); -#endif - } - } -} - -#undef AS_INPUT0_TYPE_4 -#undef AS_TYPE_N -#undef AS_TYPE_N_ -#undef MAKE_VECTOR_TYPE -#undef TO_ACTIVATION_TYPE -#undef ACTIVATION_TYPE -#undef TO_ACCUMULATOR_TYPE -#undef ACCUMULATOR_TYPE - -#undef CEIL_DIV diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/mvn_gpu_b_fs_yx_fsv16_imad.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/mvn_gpu_b_fs_yx_fsv16_imad.cl index 4867898bafb..c334425d3ca 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/mvn_gpu_b_fs_yx_fsv16_imad.cl +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/mvn_gpu_b_fs_yx_fsv16_imad.cl @@ -58,10 +58,11 @@ // If required analogously the mvn_var_1 and mvn_var_2 kernels should be enqueud, additionally providing results from // mvn_mean_2 kernel. // -// Finally the mvn_final kernel should be enqueued with provided buffers with outputs from previous kernels (mvn_mean_2, mvn_var_2). -// To enable parallel mode PRECALC_MEAN and optionally PRECALC_VARIANCE definitions should be used. -// As at this stage there is no further need to synchronize and this kernel will perform simple normalization given known mean and inverse of variance. -// Due to this this kernel can be enqueued with full paralellization, not limiting it to single work-group. +// Finally the mvn_final kernel should be enqueued with provided buffers with outputs from previous kernels +// (mvn_mean_2, mvn_var_2). To enable parallel mode PRECALC_MEAN and optionally PRECALC_VARIANCE definitions should be +// used. As at this stage there is no further need to synchronize and this kernel will perform simple normalization +// given known mean and inverse of variance. Due to this this kernel can be enqueued with full paralellization, not +// limiting it to single work-group. // lws: SIMD x 1 x 1 // gws: (x * y) / SIMD * SIMD x feature x batch // @@ -73,7 +74,6 @@ // In parallel mode this must be equal to LWS * ITEM_GROUPS, except in mvn_final kernel where it has no restrictions. // ITEM_GROUPS - Number of work-groups performing accumulation in parallel mode. Should be the same in both stages of parallel kernels. - #define FSV 16 #define INPUT_SLICE_PITCH 16 #define SG_NUM (LWS / SIMD) @@ -88,7 +88,7 @@ #define TO_MEAN_PACKED_TYPE CAT(convert_, MEAN_PACKED_TYPE) -#define ITEMS_NUM (OUTPUT_SIZE_X * OUTPUT_SIZE_Y) +#define ITEMS_NUM (OUTPUT_SIZE_X * OUTPUT_SIZE_Y * OUTPUT_SIZE_Z) #define CEIL_DIV(a, b) (((a) + (b) - 1) / (b)) @@ -115,8 +115,11 @@ KERNEL(mvn_mean_1)(const __global INPUT0_TYPE* input, const uint sgid = get_sub_group_id(); const uint sglid = get_sub_group_local_id(); +#if INPUT0_DIMS == 5 + const uint data_sets_offset = INPUT0_GET_INDEX(b, f, 0, 0, 0); +#else // INPUT0_DIMS == 4 const uint data_sets_offset = INPUT0_GET_INDEX(b, f, 0, 0); - +#endif INT_PACKED_TYPE partial_sum = FUNC_CALL(accumulate_sum_input)(input, data_sets_offset, get_global_id(0)); @@ -198,8 +201,11 @@ KERNEL(mvn_var_1)(const __global INPUT0_TYPE* input, const uint sgid = get_sub_group_id(); const uint sglid = get_sub_group_local_id(); +#if INPUT0_DIMS == 5 + const uint data_sets_offset = INPUT0_GET_INDEX(b, f, 0, 0, 0); +#else // INPUT0_DIMS == 4 const uint data_sets_offset = INPUT0_GET_INDEX(b, f, 0, 0); - +#endif MEAN_TYPE mean = means[flat_data_set_group * FSV + sglid]; MEAN_PACKED_TYPE partial_sum = FUNC_CALL(accumulate_sum_sq_dev)(input, data_sets_offset, get_global_id(0), mean); @@ -312,7 +318,11 @@ KERNEL(mvn_final)( const uint sgid = get_sub_group_id() + items_group * SG_NUM; const uint sglid = get_sub_group_local_id(); +#if INPUT0_DIMS == 5 + const uint data_sets_offset = INPUT0_GET_INDEX(b, f, 0, 0, 0); +#else // INPUT0_DIMS == 4 const uint data_sets_offset = INPUT0_GET_INDEX(b, f, 0, 0); +#endif uint input_offset; #if (!PRECALC_MEAN || (NORMALIZE_VARIANCE && !PRECALC_VARIANCE)) && SG_NUM != 1 @@ -348,7 +358,11 @@ KERNEL(mvn_final)( #if OUTPUT_IS_FP input_offset = data_sets_offset + sgid * SIMD * FSV; uint output_spatial_base = sgid * SIMD; +#if OUTPUT_DIMS == 5 + uint output_offset = OUTPUT_GET_INDEX(b, f, 0, 0, 0) + sgid * SIMD * FSV; +#else // OUTPUT_DIMS == 4 uint output_offset = OUTPUT_GET_INDEX(b, f, 0, 0) + sgid * SIMD * FSV; +#endif // For fused ops to align with non-fp path const uint set_idx = sglid; @@ -360,18 +374,25 @@ KERNEL(mvn_final)( uint output_spatial = output_spatial_base + si; MEAN_TYPE normalized = (TO_MEAN_TYPE(in_pack[si]) - mean) * inv_variance; OUTPUT_TYPE result; -#if HAS_FUSED_OPS +# if HAS_FUSED_OPS FUSED_OPS; result = FUSED_OPS_RESULT; -#else +# else result = TO_OUTPUT_TYPE(normalized); -#endif +# endif #if !OUTPUT_PAD_IN_ITEMS DT_OUTPUT_BLOCK_WRITE(output, output_offset + si * SIMD, result); #else +# if OUTPUT_DIMS == 5 + uint z = output_spatial / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y); + uint y = (output_spatial / OUTPUT_SIZE_X) % OUTPUT_SIZE_Y; + uint x = output_spatial % OUTPUT_SIZE_X; + output_offset = OUTPUT_GET_INDEX(b, f, z, y, x); +# else // OUTPUT_DIMS == 4 uint x = output_spatial % OUTPUT_SIZE_X; uint y = output_spatial / OUTPUT_SIZE_X; output_offset = OUTPUT_GET_INDEX(b, f, y, x); +# endif DT_OUTPUT_BLOCK_WRITE(output, output_offset, result); #endif } @@ -396,24 +417,29 @@ KERNEL(mvn_final)( uint output_spatial = output_spatial_base + si; MEAN_TYPE normalized = (TO_MEAN_TYPE(in_pack[si]) - mean) * inv_variance; OUTPUT_TYPE result; -#if HAS_FUSED_OPS +# if HAS_FUSED_OPS FUSED_OPS; result = FUSED_OPS_RESULT; -#else +# else result = TO_OUTPUT_TYPE(normalized); -#endif +# endif #if !OUTPUT_PAD_IN_ITEMS DT_OUTPUT_BLOCK_WRITE(output, output_offset + si * SIMD, result); #else +# if OUTPUT_DIMS == 5 + uint z = output_spatial / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y); + uint y = (output_spatial / OUTPUT_SIZE_X) % OUTPUT_SIZE_Y; + uint x = output_spatial % OUTPUT_SIZE_X; + output_offset = OUTPUT_GET_INDEX(b, f, z, y, x); +# else // OUTPUT_DIMS == 4 uint x = output_spatial % OUTPUT_SIZE_X; uint y = output_spatial / OUTPUT_SIZE_X; output_offset = OUTPUT_GET_INDEX(b, f, y, x); +# endif DT_OUTPUT_BLOCK_WRITE(output, output_offset, result); #endif } - } else if (lws_uniform_leftovers > 0 && - sg_uniform_leftovers > 0 && - sgid == lws_uniform_leftovers_full_simds) { + } else if (lws_uniform_leftovers > 0 && sg_uniform_leftovers > 0 && sgid == lws_uniform_leftovers_full_simds) { // TODO: May be worth to consider the data here as across sub-group // Rest of leftovers, still use whole sub-group, but change addresses to not load extra data. INPUT_PACKED_TYPE in_pack; @@ -454,25 +480,36 @@ KERNEL(mvn_final)( uint output_spatial = output_spatial_base + si; MEAN_TYPE normalized = (TO_MEAN_TYPE(in_pack[si]) - mean) * inv_variance; OUTPUT_TYPE result; -#if HAS_FUSED_OPS - FUSED_OPS; - result = FUSED_OPS_RESULT; -#else - result = TO_OUTPUT_TYPE(normalized); -#endif +# if HAS_FUSED_OPS + FUSED_OPS; + result = FUSED_OPS_RESULT; +# else + result = TO_OUTPUT_TYPE(normalized); +# endif #if !OUTPUT_PAD_IN_ITEMS DT_OUTPUT_BLOCK_WRITE(output, output_offset + si * SIMD, result); #else +# if OUTPUT_DIMS == 5 + uint z = output_spatial / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y); + uint y = (output_spatial / OUTPUT_SIZE_X) % OUTPUT_SIZE_Y; + uint x = output_spatial % OUTPUT_SIZE_X; + output_offset = OUTPUT_GET_INDEX(b, f, z, y, x); +# else // OUTPUT_DIMS == 4 uint x = output_spatial % OUTPUT_SIZE_X; uint y = output_spatial / OUTPUT_SIZE_X; output_offset = OUTPUT_GET_INDEX(b, f, y, x); +# endif DT_OUTPUT_BLOCK_WRITE(output, output_offset, result); #endif } } -#else // => !OUTPUT_IS_FP +#else // => !OUTPUT_IS_FP input_offset = data_sets_offset + sgid * SIMD * FSV; +#if OUTPUT_DIMS == 5 + uint output_offset = OUTPUT_GET_INDEX(b, f, 0, 0, 0) + sgid * SIMD * FSV; +#else // OUTPUT_DIMS == 4 uint output_offset = OUTPUT_GET_INDEX(b, f, 0, 0) + sgid * SIMD * FSV; +#endif uint output_spatial = sgid * SIMD + sglid; for (uint spatial_idx = 0; spatial_idx < ITEMS_NUM / GWS; ++spatial_idx) { @@ -482,19 +519,26 @@ KERNEL(mvn_final)( __attribute__((opencl_unroll_hint)) for (uint set_idx = 0; set_idx < FSV; ++set_idx) { MEAN_TYPE normalized = (TO_MEAN_TYPE(in_pack[set_idx]) - intel_sub_group_shuffle(mean, set_idx)) * intel_sub_group_shuffle(inv_variance, set_idx); - #if HAS_FUSED_OPS +# if HAS_FUSED_OPS FUSED_OPS; result[set_idx] = FUSED_OPS_RESULT; - #else +# else result[set_idx] = TO_OUTPUT_TYPE(normalized); - #endif +# endif } #if !OUTPUT_PAD_IN_ITEMS ((__global OUTPUT_PACKED_TYPE*)(output + output_offset))[sglid] = result; #else +# if OUTPUT_DIMS == 5 + uint z = output_spatial / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y); + uint y = (output_spatial / OUTPUT_SIZE_X) % OUTPUT_SIZE_Y; + uint x = output_spatial % OUTPUT_SIZE_X; + output_offset = OUTPUT_GET_INDEX(b, f, z, y, x); +# else // OUTPUT_DIMS == 4 uint x = output_spatial % OUTPUT_SIZE_X; uint y = output_spatial / OUTPUT_SIZE_X; output_offset = OUTPUT_GET_INDEX(b, f, y, x); +# endif ((__global OUTPUT_PACKED_TYPE*)(output + output_offset))[0] = result; #endif @@ -518,24 +562,29 @@ KERNEL(mvn_final)( __attribute__((opencl_unroll_hint)) for (uint set_idx = 0; set_idx < FSV; ++set_idx) { MEAN_TYPE normalized = (TO_MEAN_TYPE(in_pack[set_idx]) - intel_sub_group_shuffle(mean, set_idx)) * intel_sub_group_shuffle(inv_variance, set_idx); - #if HAS_FUSED_OPS +# if HAS_FUSED_OPS FUSED_OPS; result[set_idx] = FUSED_OPS_RESULT; - #else +# else result[set_idx] = TO_OUTPUT_TYPE(normalized); - #endif +# endif } #if !OUTPUT_PAD_IN_ITEMS ((__global OUTPUT_PACKED_TYPE*)(output + output_offset))[sglid] = result; #else +# if OUTPUT_DIMS == 5 + uint z = output_spatial / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y); + uint y = (output_spatial / OUTPUT_SIZE_X) % OUTPUT_SIZE_Y; + uint x = output_spatial % OUTPUT_SIZE_X; + output_offset = OUTPUT_GET_INDEX(b, f, z, y, x); +# else // OUTPUT_DIMS == 4 uint x = output_spatial % OUTPUT_SIZE_X; uint y = output_spatial / OUTPUT_SIZE_X; output_offset = OUTPUT_GET_INDEX(b, f, y, x); +# endif ((__global OUTPUT_PACKED_TYPE*)(output + output_offset))[0] = result; #endif - } else if (lws_uniform_leftovers > 0 && - sg_uniform_leftovers > 0 && - sgid == lws_uniform_leftovers_full_simds) { + } else if (lws_uniform_leftovers > 0 && sg_uniform_leftovers > 0 && sgid == lws_uniform_leftovers_full_simds) { // TODO: May be worth to consider the data here as across sub-group // Rest of leftovers, still use whole sub-group, but change addresses to not load extra data. INPUT_PACKED_TYPE in_pack = ((const __global INPUT_PACKED_TYPE*)(input + input_offset))[sglid % sg_uniform_leftovers]; @@ -544,20 +593,27 @@ KERNEL(mvn_final)( __attribute__((opencl_unroll_hint)) for (uint set_idx = 0; set_idx < FSV; ++set_idx) { MEAN_TYPE normalized = (TO_MEAN_TYPE(in_pack[set_idx]) - intel_sub_group_shuffle(mean, set_idx)) * intel_sub_group_shuffle(inv_variance, set_idx); - #if HAS_FUSED_OPS +# if HAS_FUSED_OPS FUSED_OPS; result[set_idx] = FUSED_OPS_RESULT; - #else +# else result[set_idx] = TO_OUTPUT_TYPE(normalized); - #endif +# endif } if (sglid < sg_uniform_leftovers) { #if !OUTPUT_PAD_IN_ITEMS ((__global OUTPUT_PACKED_TYPE*)(output + output_offset))[sglid] = result; #else +# if OUTPUT_DIMS == 5 + uint z = output_spatial / (OUTPUT_SIZE_X * OUTPUT_SIZE_Y); + uint y = (output_spatial / OUTPUT_SIZE_X) % OUTPUT_SIZE_Y; + uint x = output_spatial % OUTPUT_SIZE_X; + output_offset = OUTPUT_GET_INDEX(b, f, z, y, x); +# else // OUTPUT_DIMS == 4 uint x = output_spatial % OUTPUT_SIZE_X; uint y = output_spatial / OUTPUT_SIZE_X; output_offset = OUTPUT_GET_INDEX(b, f, y, x); +# endif ((__global OUTPUT_PACKED_TYPE*)(output + output_offset))[0] = result; #endif } diff --git a/inference-engine/thirdparty/clDNN/src/gpu/activation_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/activation_gpu.cpp index 6775ba48c96..0137dc261c5 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/activation_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/activation_gpu.cpp @@ -111,6 +111,7 @@ attach_activation_gpu::attach_activation_gpu() { { std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), val_fw }, { std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), val_fw }, { std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), val_fw }, + { std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16), val_fw }, { std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16), val_fw }, { std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16), val_fw }, { std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_zyx_bsv16_fsv16), val_fw }, diff --git a/inference-engine/thirdparty/clDNN/src/gpu/mvn_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/mvn_gpu.cpp index 46610452f9b..429201bb7a9 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/mvn_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/mvn_gpu.cpp @@ -88,6 +88,10 @@ attach_mvn_gpu::attach_mvn_gpu() { mvn_gpu::create); implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), mvn_gpu::create); + implementation_map::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16), + mvn_gpu::create); + implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), + mvn_gpu::create); implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16), mvn_gpu::create); implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16), diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_buffer_fusing.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_buffer_fusing.cpp index 6425e7ec81c..1e1ff1e7299 100644 --- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_buffer_fusing.cpp +++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_buffer_fusing.cpp @@ -280,6 +280,10 @@ void prepare_buffer_fusing::run(program_impl& p) { if (usr_layout.format == format::b_fs_yx_fsv16 && (opt_lower_pad % 16 != 0 || opt_upper_pad % 16 != 0)) return; + if (input_layout.data_padding.lower_size().batch[0] != 0 || input_layout.data_padding.upper_size().batch[0] != 0 || + input_layout.data_padding.lower_size().spatial[0] != 0 || input_layout.data_padding.upper_size().spatial[0] != 0 || + input_layout.data_padding.lower_size().spatial[1] != 0 || input_layout.data_padding.upper_size().spatial[1] != 0) + return; } if (format == format::bfyx && crop_size.batch[0] == input_layout.size.batch[0] && diff --git a/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp b/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp index 02d5dfc4649..71fc7e4ee23 100644 --- a/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp +++ b/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp @@ -154,6 +154,14 @@ bool layout_optimizer::can_fuse_reorder(program_node& prev, program_node& next, auto next_output_layout = next.get_output_layout(); auto prev_dt = prev.get_output_layout().data_type; + auto is_input_idx = [&](size_t idx) -> bool { + if (&next.get_dependency(idx) == &prev) + return true; + if (next.get_dependency(idx).is_type() && &next.get_dependency(idx).get_dependency(0) == &prev) + return true; + return false; + }; + if (next.is_type()) return true; @@ -190,6 +198,9 @@ bool layout_optimizer::can_fuse_reorder(program_node& prev, program_node& next, if (next.is_type() && fmt_prev == format::b_fs_yx_fsv4 && fmt_next == format::byxf_af32 && next.as().get_groups() == 1) return true; + if (next.is_type() && fmt_prev == format::b_fs_yx_fsv16 && fmt_next == format::b_fs_yx_fsv4 && is_input_idx(0)) + return true; + if (next.is_type() && fmt_prev == format::bfyx && prev.is_input() && prev_dt == data_types::u8) return true; diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp index f623585f43b..1098cecbe61 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp @@ -8318,6 +8318,8 @@ INSTANTIATE_TEST_CASE_P( .all_test_params(format::b_fs_yx_fsv16) .add(convolution_random_test_all_params{ 1, 89, 3, { 1, 1 }, { 3, 3 }, { 1, 1 }, { -1, -1 }, { 1, 1 }, true, 1, format::b_fs_yx_fsv4, false, false, false }) + .add(convolution_random_test_all_params{ + 1, 16, 32, { 3, 3 }, { 17, 17 }, { 1, 1 }, { -8, -8 }, { 1, 1 }, true, 1, format::b_fs_yx_fsv16, false, false, true }) ), to_string_convolution_all_params ); diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/crop_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/crop_gpu_test.cpp index 50ab7f31397..5ad4ed4dca3 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/crop_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/crop_gpu_test.cpp @@ -20,6 +20,7 @@ #include #include "api/crop.hpp" #include +#include #include #include #include @@ -587,6 +588,46 @@ TEST(crop_gpu, basic_in1x4x1x1_split) { EXPECT_EQ(output_ptr_2[i], out2[i]); } +TEST(crop_gpu, basic_in1x4x1x1_crop_pad) { + const auto& engine = get_test_engine(); + + auto batch_num = 1; + auto feature_num = 4; + auto x_size = 1; + auto y_size = 1; + + auto crop_batch_num = 1; + auto crop_feature_num_1 = 3; + auto crop_x_size = 1; + auto crop_y_size = 1; + auto feature_offset_1 = 0; + auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } }); + + padding in_pad({0, 0, 1, 1}, {0, 0, 1, 1}); + auto padded_layout = input.get_layout().with_padding(in_pad); + topology topology; + topology.add(input_layout("input", input.get_layout())); + topology.add(reorder("input_reorder", "input", padded_layout)); + topology.add(crop("crop1", "input_reorder", tensor(batch(crop_batch_num), spatial(crop_x_size, crop_y_size), feature(crop_feature_num_1)), { tensor(feature(feature_offset_1), spatial(0,0),batch(0)) })); + topology.add(reorder("out_reorder", "crop1", format::bfyx, data_types::f32)); + + std::vector input_vec = { -1.f, 2.f, -3.f, 4.f }; + std::vector out1 = { -1.f, 2.f,-3.f }; + set_values(input, input_vec); + build_options bo; + bo.set_option(build_option::optimize_data(true)); + + network network(engine, topology, bo); + network.set_input_data("input", input); + auto outputs = network.execute(); + + auto output = outputs.at("out_reorder").get_memory(); + auto output_ptr = output.pointer(); + + for (size_t i = 0; i < out1.size();i++) + EXPECT_EQ(output_ptr[i], out1[i]); +} + TEST(crop_gpu, basic_int_in1x4x1x1_split) { // Tests split with crop implementation // _CROP_1(1x3x1x1,offset(0x0x0x0)) diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/mvn_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/mvn_gpu_test.cpp index 35c9ba8e2ae..793f93b0d54 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/mvn_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/mvn_gpu_test.cpp @@ -31,16 +31,15 @@ using namespace cldnn; -class mvn_gpu_test : public ::testing::TestWithParam -{ -}; +class mvn_gpu_test : public ::testing::TestWithParam {}; template -void mvn_compute_mean_accross_channels(cldnn::memory &output, bool normalize_variance) { +void mvn_compute_mean_accross_channels(cldnn::memory& output, bool normalize_variance) { auto output_size = output.get_layout().size; uint32_t batch_size = output_size.batch[0]; uint32_t feature_size = output_size.feature[0]; + uint32_t z_size = output_size.spatial[2]; uint32_t y_size = output_size.spatial[1]; uint32_t x_size = output_size.spatial[0]; @@ -48,32 +47,29 @@ void mvn_compute_mean_accross_channels(cldnn::memory &output, bool normalize_var float err_margin = output.get_layout().data_type == data_types::f32 ? 1e-03F : 1e-02F; - for (uint32_t b = 0; b < batch_size; ++b) - { + for (uint32_t b = 0; b < batch_size; ++b) { float sum = 0.f; float variance = 0.f; - for (uint32_t f = 0; f < feature_size; ++f) - { - for (uint32_t y = 0; y < y_size; ++y) - { - for (uint32_t x = 0; x < x_size; ++x) - { - auto index_tensor = tensor(batch(b), feature(f), spatial(x, y, 0, 0)); - size_t data_index = output.get_layout().get_linear_offset(index_tensor); - float data = static_cast(buff[data_index]); - sum += data; - if (normalize_variance) - variance += data*data; + for (uint32_t f = 0; f < feature_size; ++f) { + for (uint32_t z = 0; z < z_size; z++) { + for (uint32_t y = 0; y < y_size; ++y) { + for (uint32_t x = 0; x < x_size; ++x) { + auto index_tensor = tensor(batch(b), feature(f), spatial(x, y, z, 0)); + size_t data_index = output.get_layout().get_linear_offset(index_tensor); + float data = static_cast(buff[data_index]); + sum += data; + if (normalize_variance) + variance += data * data; + } } } } - sum /= feature_size * y_size * x_size; + sum /= feature_size * y_size * x_size * z_size; T result_sum = static_cast(sum); EXPECT_NEAR(result_sum, 0.f, err_margin) << "at b=" << b; - if (normalize_variance) - { - variance /= feature_size * y_size * x_size; + if (normalize_variance) { + variance /= feature_size * y_size * x_size * z_size; T result_variance = static_cast(variance); EXPECT_NEAR(result_variance, 1.f, err_margin) << " at b=" << b; } @@ -81,11 +77,12 @@ void mvn_compute_mean_accross_channels(cldnn::memory &output, bool normalize_var } template -void mvn_compute_mean_within_channels(cldnn::memory &output, bool normalize_variance) { +void mvn_compute_mean_within_channels(cldnn::memory& output, bool normalize_variance) { auto output_size = output.get_layout().size; uint32_t batch_size = output_size.batch[0]; uint32_t feature_size = output_size.feature[0]; + uint32_t z_size = output_size.spatial[2]; uint32_t y_size = output_size.spatial[1]; uint32_t x_size = output_size.spatial[0]; @@ -93,31 +90,28 @@ void mvn_compute_mean_within_channels(cldnn::memory &output, bool normalize_vari float err_margin = output.get_layout().data_type == data_types::f32 ? 1e-03F : 1e-02F; - for (uint32_t b = 0; b < batch_size; ++b) - { - for (uint32_t f = 0; f < feature_size; ++f) - { + for (uint32_t b = 0; b < batch_size; ++b) { + for (uint32_t f = 0; f < feature_size; ++f) { float sum = 0.f; float variance = 0.f; - for (uint32_t y = 0; y < y_size; ++y) - { - for (uint32_t x = 0; x < x_size; ++x) - { - auto index_tensor = tensor(batch(b), feature(f), spatial(x, y, 0, 0)); - size_t data_index = output.get_layout().get_linear_offset(index_tensor); - float data = static_cast(buff[data_index]); - sum += data; - if (normalize_variance) - variance += data*data; + for (uint32_t z = 0; z < z_size; ++z) { + for (uint32_t y = 0; y < y_size; ++y) { + for (uint32_t x = 0; x < x_size; ++x) { + auto index_tensor = tensor(batch(b), feature(f), spatial(x, y, z, 0)); + size_t data_index = output.get_layout().get_linear_offset(index_tensor); + float data = static_cast(buff[data_index]); + sum += data; + if (normalize_variance) + variance += data * data; + } } } - sum /= y_size * x_size; + sum /= y_size * x_size * z_size; T result_sum = static_cast(sum); EXPECT_NEAR(result_sum, 0.f, err_margin) << "at b=" << b << ", f=" << f; - if (normalize_variance) - { - variance /= y_size * x_size; + if (normalize_variance) { + variance /= y_size * x_size * z_size; T result_variance = static_cast(variance); EXPECT_NEAR(result_variance, 1.f, err_margin) << " at b=" << b << ", f=" << f; } @@ -125,15 +119,14 @@ void mvn_compute_mean_within_channels(cldnn::memory &output, bool normalize_vari } } -TEST(mvn_gpu_test, mvn_test_across_channels_bfyx) -{ - //mvn accross channels fp32 test with normalize_variance set to false +TEST(mvn_gpu_test, mvn_test_across_channels_bfyx) { + // mvn accross channels fp32 test with normalize_variance set to false using namespace cldnn; using namespace tests; const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 7, 10, 17, 13 } }); + auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {7, 10, 17, 13}}); tests::set_random_values(input, true, 8, 100); @@ -153,15 +146,14 @@ TEST(mvn_gpu_test, mvn_test_across_channels_bfyx) mvn_compute_mean_accross_channels(output, false); } -TEST(mvn_gpu_test, mvn_test_across_channels_bfyx_fp16) -{ - //mvn accross channels fp16 test with normalize_variance set to false +TEST(mvn_gpu_test, mvn_test_across_channels_bfyx_fp16) { + // mvn accross channels fp16 test with normalize_variance set to false using namespace cldnn; using namespace tests; const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f16, format::bfyx,{ 7, 10, 17, 13 } }); + auto input = memory::allocate(engine, {data_types::f16, format::bfyx, {7, 10, 17, 13}}); tests::set_random_values(input, true, 8, 100); @@ -181,15 +173,14 @@ TEST(mvn_gpu_test, mvn_test_across_channels_bfyx_fp16) mvn_compute_mean_accross_channels(output, false); } -TEST(mvn_gpu_test, mvn_test_across_channels_bfyx_normalize_variance) -{ - //mvn accross channels fp32 test with normalize_variance set to true +TEST(mvn_gpu_test, mvn_test_across_channels_bfyx_normalize_variance) { + // mvn accross channels fp32 test with normalize_variance set to true using namespace cldnn; using namespace tests; const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 7, 10, 17, 13 } }); + auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {7, 10, 17, 13}}); tests::set_random_values(input, true, 8, 100); @@ -209,15 +200,14 @@ TEST(mvn_gpu_test, mvn_test_across_channels_bfyx_normalize_variance) mvn_compute_mean_accross_channels(output, true); } -TEST(mvn_gpu_test, mvn_test_across_channels_bfyx_normalize_variance_fp16) -{ - //mvn accross channels fp16 test with normalize_variance set to true +TEST(mvn_gpu_test, mvn_test_across_channels_bfyx_normalize_variance_fp16) { + // mvn accross channels fp16 test with normalize_variance set to true using namespace cldnn; using namespace tests; const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f16, format::bfyx,{ 7, 10, 17, 13 } }); + auto input = memory::allocate(engine, {data_types::f16, format::bfyx, {7, 10, 17, 13}}); tests::set_random_values(input, true, 8, 100); @@ -237,15 +227,14 @@ TEST(mvn_gpu_test, mvn_test_across_channels_bfyx_normalize_variance_fp16) mvn_compute_mean_accross_channels(output, true); } -TEST(mvn_gpu_test, mvn_test_within_channels_bfyx) -{ - //mvn within channels fp32 test with normalize_variance set to false +TEST(mvn_gpu_test, mvn_test_within_channels_bfyx) { + // mvn within channels fp32 test with normalize_variance set to false using namespace cldnn; using namespace tests; const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 7, 10, 17, 13 } }); + auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {7, 10, 17, 13}}); tests::set_random_values(input, true, 8, 100); @@ -265,15 +254,14 @@ TEST(mvn_gpu_test, mvn_test_within_channels_bfyx) mvn_compute_mean_within_channels(output, false); } -TEST(mvn_gpu_test, mvn_test_within_channels_bfyx_fp16) -{ - //mvn within channels fp16 test with normalize_variance set to false +TEST(mvn_gpu_test, mvn_test_within_channels_bfyx_fp16) { + // mvn within channels fp16 test with normalize_variance set to false using namespace cldnn; using namespace tests; const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f16, format::bfyx,{ 7, 10, 17, 13 } }); + auto input = memory::allocate(engine, {data_types::f16, format::bfyx, {7, 10, 17, 13}}); tests::set_random_values(input, true, 8, 100); @@ -293,15 +281,14 @@ TEST(mvn_gpu_test, mvn_test_within_channels_bfyx_fp16) mvn_compute_mean_within_channels(output, false); } -TEST(mvn_gpu_test, mvn_test_within_channels_bfyx_normalize_variance) -{ - //mvn within channels fp32 test with normalize_variance set to true +TEST(mvn_gpu_test, mvn_test_within_channels_bfyx_normalize_variance) { + // mvn within channels fp32 test with normalize_variance set to true using namespace cldnn; using namespace tests; const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 7, 10, 17, 13 } }); + auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {7, 10, 17, 13}}); tests::set_random_values(input, true, 8, 100); @@ -321,15 +308,14 @@ TEST(mvn_gpu_test, mvn_test_within_channels_bfyx_normalize_variance) mvn_compute_mean_within_channels(output, true); } -TEST(mvn_gpu_test, mvn_test_within_channels_bfyx_normalize_variance_fp16) -{ - //mvn within channels fp16 test with normalize_variance set to true +TEST(mvn_gpu_test, mvn_test_within_channels_bfyx_normalize_variance_fp16) { + // mvn within channels fp16 test with normalize_variance set to true using namespace cldnn; using namespace tests; const auto& engine = get_test_engine(); - auto input = memory::allocate(engine, { data_types::f16, format::bfyx,{ 7, 10, 17, 13 } }); + auto input = memory::allocate(engine, {data_types::f16, format::bfyx, {7, 10, 17, 13}}); tests::set_random_values(input, true, 8, 100); @@ -360,16 +346,18 @@ struct mvn_basic_test_params { struct mvn_random_test : ::testing::TestWithParam { template - void fill_data(memory& mem, const tests::VVVVF& data) { + void fill_data(memory& mem, const tests::VVVVVF& data) { auto size = mem.get_layout().size; auto ptr = mem.pointer(); for (size_t bi = 0; bi < static_cast(size.batch[0]); ++bi) { for (size_t fi = 0; fi < static_cast(size.feature[0]); ++fi) { - for (size_t yi = 0; yi < static_cast(size.spatial[1]); ++yi) { - for (size_t xi = 0; xi < static_cast(size.spatial[0]); ++xi) { - auto tensor_addr = tensor(batch(bi), feature(fi), spatial(xi, yi, 0, 0)); - auto offset = mem.get_layout().get_linear_offset(tensor_addr); - ptr[offset] = data[bi][fi][xi][yi]; + for (size_t zi = 0; zi < static_cast(size.spatial[2]); ++zi) { + for (size_t yi = 0; yi < static_cast(size.spatial[1]); ++yi) { + for (size_t xi = 0; xi < static_cast(size.spatial[0]); ++xi) { + auto tensor_addr = tensor(batch(bi), feature(fi), spatial(xi, yi, zi, 0)); + auto offset = mem.get_layout().get_linear_offset(tensor_addr); + ptr[offset] = data[bi][fi][xi][yi][zi]; + } } } } @@ -379,7 +367,14 @@ struct mvn_random_test : ::testing::TestWithParam { template void fill_random_data(memory& mem, int min, int max, int k = 8) { auto size = mem.get_layout().size; - auto input_data = tests::generate_random_4d(size.batch[0], size.feature[0], size.spatial[0], size.spatial[1], min, max, k); + auto input_data = tests::generate_random_5d(size.batch[0], + size.feature[0], + size.spatial[0], + size.spatial[1], + size.spatial[2], + min, + max, + k); fill_data(mem, input_data); } @@ -403,23 +398,23 @@ struct mvn_random_test : ::testing::TestWithParam { auto& size = params.input_size; auto& output_pad = params.output_pad; - auto input = memory::allocate(eng, { params.input_type, params.input_format, size }); + auto input = memory::allocate(eng, {params.input_type, params.input_format, size}); switch (params.input_type) { - case data_types::f32: - fill_random_data(input, -127, 127); - break; - case data_types::f16: - fill_random_data(input, -127, 127); - break; - case data_types::i8: - fill_random_data(input, -127, 127); - break; - case data_types::u8: - fill_random_data(input, -127, 127); - break; - default: - break; + case data_types::f32: + fill_random_data(input, -127, 127); + break; + case data_types::f16: + fill_random_data(input, -127, 127); + break; + case data_types::i8: + fill_random_data(input, -127, 127); + break; + case data_types::u8: + fill_random_data(input, -127, 127); + break; + default: + break; } topology topo; @@ -453,23 +448,31 @@ struct mvn_test_case_generator : std::vector { } mvn_test_case_generator& smoke_tests(format::type fmt, data_types in_dt) { - push_back(mvn_basic_test_params{ fmt, in_dt, {7, 10, 17, 13}, false, false, padding() }); - push_back(mvn_basic_test_params{ fmt, in_dt, {7, 10, 17, 13}, false, true, padding() }); - push_back(mvn_basic_test_params{ fmt, in_dt, {7, 10, 17, 13}, true, false, padding() }); - push_back(mvn_basic_test_params{ fmt, in_dt, {7, 10, 17, 13}, true, true, padding() }); + push_back(mvn_basic_test_params{fmt, in_dt, {7, 10, 17, 13}, false, false, padding()}); + push_back(mvn_basic_test_params{fmt, in_dt, {7, 10, 17, 13}, false, true, padding()}); + push_back(mvn_basic_test_params{fmt, in_dt, {7, 10, 17, 13}, true, false, padding()}); + push_back(mvn_basic_test_params{fmt, in_dt, {7, 10, 17, 13}, true, true, padding()}); + return *this; + } + + mvn_test_case_generator& zyx_tests(format::type fmt, data_types in_dt) { + push_back(mvn_basic_test_params{fmt, in_dt, {2, 17, 1, 67, 71}, false, false, padding()}); + push_back(mvn_basic_test_params{fmt, in_dt, {2, 17, 1, 67, 71}, false, true, padding()}); + push_back(mvn_basic_test_params{fmt, in_dt, {2, 17, 5, 67, 71}, false, false, padding()}); + push_back(mvn_basic_test_params{fmt, in_dt, {2, 17, 5, 67, 71}, false, true, padding()}); return *this; } mvn_test_case_generator& extended_tests(format::type fmt, data_types in_dt) { - push_back(mvn_basic_test_params{ fmt, in_dt, {2, 17, 67, 71}, false, false, padding() }); - push_back(mvn_basic_test_params{ fmt, in_dt, {2, 17, 67, 71}, false, true, padding() }); - push_back(mvn_basic_test_params{ fmt, in_dt, {2, 17, 67, 71}, true, false, padding() }); - push_back(mvn_basic_test_params{ fmt, in_dt, {2, 17, 67, 71}, true, true, padding() }); + push_back(mvn_basic_test_params{fmt, in_dt, {2, 17, 67, 71}, false, false, padding()}); + push_back(mvn_basic_test_params{fmt, in_dt, {2, 17, 67, 71}, false, true, padding()}); + push_back(mvn_basic_test_params{fmt, in_dt, {2, 17, 67, 71}, true, false, padding()}); + push_back(mvn_basic_test_params{fmt, in_dt, {2, 17, 67, 71}, true, true, padding()}); // output padding - push_back(mvn_basic_test_params{ fmt, in_dt, {2, 17, 67, 71}, false, false, padding({0, 0, 1, 1}) }); - push_back(mvn_basic_test_params{ fmt, in_dt, {2, 17, 67, 71}, false, true, padding({0, 0, 1, 1}) }); - push_back(mvn_basic_test_params{ fmt, in_dt, {2, 17, 67, 71}, true, false, padding({0, 0, 1, 1}) }); - push_back(mvn_basic_test_params{ fmt, in_dt, {2, 17, 67, 71}, true, true, padding({0, 0, 1, 1}) }); + push_back(mvn_basic_test_params{fmt, in_dt, {2, 17, 67, 71}, false, false, padding({0, 0, 1, 1})}); + push_back(mvn_basic_test_params{fmt, in_dt, {2, 17, 67, 71}, false, true, padding({0, 0, 1, 1})}); + push_back(mvn_basic_test_params{fmt, in_dt, {2, 17, 67, 71}, true, false, padding({0, 0, 1, 1})}); + push_back(mvn_basic_test_params{fmt, in_dt, {2, 17, 67, 71}, true, true, padding({0, 0, 1, 1})}); return *this; } @@ -477,16 +480,18 @@ struct mvn_test_case_generator : std::vector { INSTANTIATE_TEST_CASE_P(smoke, mvn_random_test, - testing::ValuesIn( - mvn_test_case_generator() - .smoke_tests(format::b_fs_yx_fsv16, data_types::i8) - .smoke_tests(format::b_fs_yx_fsv16, data_types::u8) - ), ); + testing::ValuesIn(mvn_test_case_generator() + .smoke_tests(format::b_fs_yx_fsv16, data_types::i8) + .smoke_tests(format::b_fs_yx_fsv16, data_types::u8)), ); + +INSTANTIATE_TEST_CASE_P(zyx, + mvn_random_test, + testing::ValuesIn(mvn_test_case_generator() + .zyx_tests(format::b_fs_zyx_fsv16, data_types::i8) + .zyx_tests(format::b_fs_zyx_fsv16, data_types::u8)), ); INSTANTIATE_TEST_CASE_P(extended, mvn_random_test, - testing::ValuesIn( - mvn_test_case_generator() - .extended_tests(format::b_fs_yx_fsv16, data_types::i8) - .extended_tests(format::b_fs_yx_fsv16, data_types::u8) - ), ); + testing::ValuesIn(mvn_test_case_generator() + .extended_tests(format::b_fs_yx_fsv16, data_types::i8) + .extended_tests(format::b_fs_yx_fsv16, data_types::u8)), ); diff --git a/inference-engine/thirdparty/mkl-dnn b/inference-engine/thirdparty/mkl-dnn index 1093f242dec..9097834a586 160000 --- a/inference-engine/thirdparty/mkl-dnn +++ b/inference-engine/thirdparty/mkl-dnn @@ -1 +1 @@ -Subproject commit 1093f242dec18e9d45c60b14370e24431384ea54 +Subproject commit 9097834a5860fcf2ccbbd856e1e111bf0124c2de diff --git a/inference-engine/thirdparty/movidius/mvnc/CMakeLists.txt b/inference-engine/thirdparty/movidius/mvnc/CMakeLists.txt index 9b997a0aa2b..02352ca1fd4 100644 --- a/inference-engine/thirdparty/movidius/mvnc/CMakeLists.txt +++ b/inference-engine/thirdparty/movidius/mvnc/CMakeLists.txt @@ -16,10 +16,10 @@ add_library(${TARGET_NAME} STATIC ${MVNC_SOURCES}) target_include_directories(${TARGET_NAME} PUBLIC "include" + ${WATCHDOG_INCLUDE} PRIVATE ${XLINK_INCLUDE} - ${XLINK_PLATFORM_INCLUDE} - ${WATCHDOG_INCLUDE}) + ${XLINK_PLATFORM_INCLUDE}) target_compile_definitions(${TARGET_NAME} PRIVATE diff --git a/inference-engine/thirdparty/movidius/mvnc/include/mvnc.h b/inference-engine/thirdparty/movidius/mvnc/include/mvnc.h index cb791eecb01..a3f6d2d3f11 100644 --- a/inference-engine/thirdparty/movidius/mvnc/include/mvnc.h +++ b/inference-engine/thirdparty/movidius/mvnc/include/mvnc.h @@ -10,6 +10,8 @@ extern "C" { #endif +#include "watchdog/watchdog.h" + #define NC_THERMAL_BUFFER_SIZE 100 #define NC_DEBUG_BUFFER_SIZE 120 #define NC_MAX_DEVICES (32) @@ -159,6 +161,12 @@ struct ncDeviceDescr_t { char name[NC_MAX_NAME_SIZE]; }; +typedef struct ncDeviceOpenParams { + WatchdogHndl_t* watchdogHndl; + int watchdogInterval; + const char* customFirmwareDirectory; +} ncDeviceOpenParams_t; + typedef enum { NC_FIFO_HOST_RO = 0, // fifo can be read through the API but can not be // written ( graphs can read and write data ) @@ -201,7 +209,7 @@ MVNC_EXPORT_API ncStatus_t ncSetDeviceConnectTimeout(int deviceConnectTimeoutSec * If NULL or empty, default path searching behavior will be used. */ MVNC_EXPORT_API ncStatus_t ncDeviceOpen(struct ncDeviceHandle_t **deviceHandlePtr, - struct ncDeviceDescr_t in_ncDeviceDesc, int watchdogInterval, const char* customFirmwareDirectory); + struct ncDeviceDescr_t in_ncDeviceDesc, ncDeviceOpenParams_t deviceOpenParams); /** * @brief Returns a description of all available devices in the system @@ -215,7 +223,7 @@ MVNC_EXPORT_API ncStatus_t ncAvailableDevices(struct ncDeviceDescr_t *deviceDesc /** * @brief Close device and destroy handler */ -MVNC_EXPORT_API ncStatus_t ncDeviceClose(struct ncDeviceHandle_t **deviceHandle); +MVNC_EXPORT_API ncStatus_t ncDeviceClose(struct ncDeviceHandle_t **deviceHandle, WatchdogHndl_t* watchdogHndl); // Graph MVNC_EXPORT_API ncStatus_t ncGraphCreate(const char* name, struct ncGraphHandle_t **graphHandle); diff --git a/inference-engine/thirdparty/movidius/mvnc/include/ncPrivateTypes.h b/inference-engine/thirdparty/movidius/mvnc/include/ncPrivateTypes.h index ef9ce2ee621..e539788e676 100644 --- a/inference-engine/thirdparty/movidius/mvnc/include/ncPrivateTypes.h +++ b/inference-engine/thirdparty/movidius/mvnc/include/ncPrivateTypes.h @@ -54,7 +54,7 @@ struct _devicePrivate_t { deviceCapabilities_t dev_attr; ncDeviceState_t state; uint32_t device_id; - wd_context watchdog_ctx; + WdDeviceHndl_t* watchdog_device; int wd_interval; }; diff --git a/inference-engine/thirdparty/movidius/mvnc/include/watchdog/watchdog.h b/inference-engine/thirdparty/movidius/mvnc/include/watchdog/watchdog.h index 596d43f01e6..09c3d7dde80 100644 --- a/inference-engine/thirdparty/movidius/mvnc/include/watchdog/watchdog.h +++ b/inference-engine/thirdparty/movidius/mvnc/include/watchdog/watchdog.h @@ -5,48 +5,42 @@ #ifndef MVNC_WATCHDOG_H #define MVNC_WATCHDOG_H -#include #ifdef __cplusplus -# define WD_API extern "C" -# else -# define WD_API +extern "C" +{ #endif -/** -* @brief default ping interval is 1 second -*/ -#define WATCHDOG_PING_INTERVAL_MS 1000 +typedef struct _WatchdogHndl_t WatchdogHndl_t; -typedef struct wd_context_tag { - void * opaque; -} wd_context; +typedef struct _WdDeviceHndl_t { + void* m_device; +} WdDeviceHndl_t; typedef enum { WD_ERRNO = 0, WD_NOTINITIALIZED, - WD_DUPLICATE, WD_FAIL } wd_error_t; -/** - * @brief initializes watchdog context, required to be called before any other WD API calls - * @return - */ -WD_API wd_error_t watchdog_init_context(wd_context *ctx); +wd_error_t watchdog_create(WatchdogHndl_t** out_watchdogHndl); +void watchdog_destroy(WatchdogHndl_t* watchdogHndl); /** * @brief Creates watchdog thread, if not created, and registers new watchee device, and initialise opaque handle to it. * To avoid a memory leak, the registered device must be unregister with watchdog_unregister_device(). - * @param d - newly connected device descriptor + * @param deviceHandle - newly connected device descriptor * @return */ -WD_API wd_error_t watchdog_register_device(wd_context *ctx, devicePrivate_t *d); +wd_error_t watchdog_register_device(WatchdogHndl_t* watchdogHndl, WdDeviceHndl_t* deviceHandle); /** * @brief remove watch_dog device from the list, and might stop watchdog worker thread * @return result of operation */ -WD_API wd_error_t watchdog_unregister_device(wd_context *ctx); +wd_error_t watchdog_unregister_device(WatchdogHndl_t* watchdogHndl, WdDeviceHndl_t* deviceHandle); +#ifdef __cplusplus +} +#endif #endif // MVNC_WATCHDOG_H diff --git a/inference-engine/thirdparty/movidius/mvnc/include/watchdog/watchdogPrivate.hpp b/inference-engine/thirdparty/movidius/mvnc/include/watchdog/watchdogPrivate.hpp index 0c5e91be5ad..99b516fc477 100644 --- a/inference-engine/thirdparty/movidius/mvnc/include/watchdog/watchdogPrivate.hpp +++ b/inference-engine/thirdparty/movidius/mvnc/include/watchdog/watchdogPrivate.hpp @@ -5,6 +5,17 @@ #pragma once #include +#include +#include +#include +#include + +#define MVLOG_UNIT_NAME watchdog +#include "XLinkLog.h" + +#if defined(_WIN32) +#include "win_synchapi.h" +#endif // defined(_WIN32) namespace Watchdog { @@ -12,15 +23,11 @@ namespace Watchdog { * @brief represents watchdog device interface to be registered within watchdog worker */ class IDevice { - public: +public: using time_point = std::chrono::steady_clock::time_point; virtual ~IDevice() = default; - /** - * @brief depending on implementation watchdog device shouldn't have interval longer than that - */ - virtual void setInterval(const std::chrono::milliseconds msInterval) noexcept = 0; /** * @brief watchdog request device to keep alive with current timestamp */ @@ -39,4 +46,47 @@ class IDevice { virtual void *getHandle() const noexcept = 0; }; +class AutoScope { +public: + explicit AutoScope(const std::function& func) : _func(func) {} + ~AutoScope() { _func(); } + + AutoScope(const AutoScope&) = delete; + AutoScope(AutoScope&&) = delete; + AutoScope& operator=(const AutoScope&) = delete; + AutoScope& operator=(AutoScope&&) = delete; +private: + std::function _func; +}; + +class CustomUniqueLock { +public: + explicit CustomUniqueLock(pthread_mutex_t* mutex) + :m_mutex(mutex) { + if(m_mutex == nullptr) { + throw std::runtime_error("mutex should not be null"); + } + + int rc = pthread_mutex_lock(m_mutex); + if (rc != 0) { + throw std::runtime_error(std::string("failed to lock mutex. rc: ") + strerror(rc)); + } + }; + + ~CustomUniqueLock() { + int rc = pthread_mutex_unlock(m_mutex); + if (rc != 0) { + mvLog(MVLOG_ERROR, "failed to unlock mutex. rc: %s", strerror(rc)); + } + } + + CustomUniqueLock(const CustomUniqueLock&) = delete; + CustomUniqueLock(const CustomUniqueLock&&) = delete; + CustomUniqueLock& operator=(const CustomUniqueLock&) = delete; + CustomUniqueLock& operator=(const CustomUniqueLock&&) = delete; + +private: + pthread_mutex_t* m_mutex = nullptr; +}; + } // namespace Watchdog diff --git a/inference-engine/thirdparty/movidius/mvnc/include/watchdog/xlink_device.h b/inference-engine/thirdparty/movidius/mvnc/include/watchdog/xlink_device.h new file mode 100644 index 00000000000..bff0b59b4a0 --- /dev/null +++ b/inference-engine/thirdparty/movidius/mvnc/include/watchdog/xlink_device.h @@ -0,0 +1,25 @@ +// Copyright (C) 2018-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#ifndef MVNC_XLINK_DEVICE_H +#define MVNC_XLINK_DEVICE_H + +#include "mvnc.h" +#include "watchdog.h" + +#ifdef __cplusplus +extern "C" +{ +#endif + +#define WATCHDOG_MAX_PING_INTERVAL_MS 1000 + +wd_error_t xlink_device_create(WdDeviceHndl_t** out_deviceHandle, devicePrivate_t* pDevice); +void xlink_device_destroy(WdDeviceHndl_t* deviceHandle); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/inference-engine/thirdparty/movidius/mvnc/src/mvnc_api.c b/inference-engine/thirdparty/movidius/mvnc/src/mvnc_api.c index 4001024dea5..39007833881 100644 --- a/inference-engine/thirdparty/movidius/mvnc/src/mvnc_api.c +++ b/inference-engine/thirdparty/movidius/mvnc/src/mvnc_api.c @@ -36,6 +36,7 @@ #include "XLinkMacros.h" #include "XLinkStringUtils.h" #include "watchdog.h" +#include "xlink_device.h" #define THERMAL_BUFFER_SIZE 100 #define THERMAL_THROTTLING_BUFFER_SIZE (THERMAL_BUFFER_SIZE + sizeof(int)) @@ -660,7 +661,7 @@ ncStatus_t ncSetDeviceConnectTimeout(int deviceConnectTimeoutSec) { } ncStatus_t ncDeviceOpen(struct ncDeviceHandle_t **deviceHandlePtr, - struct ncDeviceDescr_t in_ncDeviceDesc, int watchdogInterval, const char* customFirmwareDirectory) { + struct ncDeviceDescr_t in_ncDeviceDesc, ncDeviceOpenParams_t deviceOpenParams) { //---------------------------------------------------------- // Check input @@ -669,7 +670,11 @@ ncStatus_t ncDeviceOpen(struct ncDeviceHandle_t **deviceHandlePtr, deviceDesc_t in_deviceDesc = {0}; copyNcDeviceDescrToXLink(&in_ncDeviceDesc, &in_deviceDesc); + int watchdogInterval = deviceOpenParams.watchdogInterval; + const char* customFirmwareDirectory = deviceOpenParams.customFirmwareDirectory; + CHECK_HANDLE_CORRECT_RC(deviceHandlePtr, NC_INVALID_PARAMETERS); + CHECK_HANDLE_CORRECT_RC(deviceOpenParams.watchdogHndl, NC_INVALID_PARAMETERS); if (watchdogInterval < 0) { mvLog(MVLOG_ERROR, "Invalid watchdogInterval"); return NC_INVALID_PARAMETERS; @@ -1094,8 +1099,12 @@ ncStatus_t ncDeviceOpen(struct ncDeviceHandle_t **deviceHandlePtr, d->device_mon_stream_id = deviceMonitorStreamId; #if !(defined(NO_BOOT)) - watchdog_init_context(&d->watchdog_ctx); - watchdog_register_device(&d->watchdog_ctx, d); + wd_error_t wd_rc = xlink_device_create(&d->watchdog_device, d); + if (wd_rc) { + mvLog(MVLOG_WARN, "watchdog is not started for device %p", d->xlink); + } else { + watchdog_register_device(deviceOpenParams.watchdogHndl, d->watchdog_device); + } #endif getDevAttributes(d); @@ -1110,7 +1119,10 @@ ncStatus_t ncDeviceOpen(struct ncDeviceHandle_t **deviceHandlePtr, CHECK_STREAM_ID(graphMonitorStreamId, { printfOverXLinkClose(d); // TODO NO_BOOT case - watchdog_unregister_device(&d->watchdog_ctx); + if (d->watchdog_device != NULL) { + watchdog_unregister_device(deviceOpenParams.watchdogHndl, d->watchdog_device); + xlink_device_destroy(d->watchdog_device); + } CHECK_MUTEX_SUCCESS(pthread_mutex_destroy(&d->dev_data_m)); CHECK_MUTEX_SUCCESS(pthread_mutex_destroy(&d->dev_stream_m)); CHECK_MUTEX_SUCCESS(pthread_mutex_destroy(&d->graph_stream_m)); @@ -1124,7 +1136,10 @@ ncStatus_t ncDeviceOpen(struct ncDeviceHandle_t **deviceHandlePtr, #else CHECK_STREAM_ID(graphMonitorStreamId, { // TODO NO_BOOT case - watchdog_unregister_device(&d->watchdog_ctx); + if (d->watchdog_device != NULL) { + watchdog_unregister_device(deviceOpenParams.watchdogHndl, d->watchdog_device); + xlink_device_destroy(d->watchdog_device); + } CHECK_MUTEX_SUCCESS(pthread_mutex_destroy(&d->dev_data_m)); CHECK_MUTEX_SUCCESS(pthread_mutex_destroy(&d->dev_stream_m)); CHECK_MUTEX_SUCCESS(pthread_mutex_destroy(&d->graph_stream_m)); @@ -1654,7 +1669,7 @@ static ncStatus_t destroyDeviceHandle(struct ncDeviceHandle_t **deviceHandlePtr) } -ncStatus_t ncDeviceClose(struct ncDeviceHandle_t **deviceHandlePtr) { +ncStatus_t ncDeviceClose(struct ncDeviceHandle_t **deviceHandlePtr, WatchdogHndl_t* watchdogHndl) { int found = 0; XLinkError_t rc = X_LINK_SUCCESS; @@ -1732,7 +1747,10 @@ ncStatus_t ncDeviceClose(struct ncDeviceHandle_t **deviceHandlePtr) { #endif #if !defined(NO_BOOT) - watchdog_unregister_device(&d->watchdog_ctx); + if (d->watchdog_device != NULL) { + watchdog_unregister_device(watchdogHndl, d->watchdog_device); + xlink_device_destroy(d->watchdog_device); + } #endif // Save all devices before reset diff --git a/inference-engine/thirdparty/movidius/mvnc/src/watchdog/watchdog.cpp b/inference-engine/thirdparty/movidius/mvnc/src/watchdog/watchdog.cpp index 35e0316b2a2..dda87de61ac 100644 --- a/inference-engine/thirdparty/movidius/mvnc/src/watchdog/watchdog.cpp +++ b/inference-engine/thirdparty/movidius/mvnc/src/watchdog/watchdog.cpp @@ -2,32 +2,20 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "watchdog.h" +#include "watchdogPrivate.hpp" + #include -#include #include #include #include -#include -#include -#include -#include +#include #include -#include -#include -#include -#include -#include -#include +#include +#include #define MVLOG_UNIT_NAME watchdog #include "XLinkLog.h" -#include "XLink.h" -#include "XLinkPrivateDefines.h" -#include "XLinkErrorUtils.h" - -#if defined(_WIN32) -#include "win_synchapi.h" -#endif // defined(_WIN32) namespace { @@ -35,556 +23,403 @@ using namespace std; using namespace chrono; using namespace Watchdog; -/** - * @brief implementation of watchdog device using xlink representation of it - */ -class XLinkDevice : public IDevice { - _devicePrivate_t privateDevice; - using time_point = std::chrono::steady_clock::time_point; - time_point lastPongTime = time_point::min(); - time_point lastPingTime = time_point::min(); - enum : int { deviceHangTimeout = 12000}; - -public: - explicit XLinkDevice(devicePrivate_t *pDevice) - : privateDevice(*pDevice) { - setInterval(milliseconds(privateDevice.wd_interval)); - } - - void setInterval(const std::chrono::milliseconds msInterval) noexcept override { - privateDevice.wd_interval = std::max(static_cast(msInterval.count()), WATCHDOG_PING_INTERVAL_MS); - } - - void keepAlive(const time_point ¤t_time) noexcept override { - bool bPong = sendPingMessage(); - // we consider that as first pong time even if it wasn't happen as beginning of boot - if (lastPongTime == time_point::min()) { - lastPongTime = current_time; - } - - lastPingTime = current_time; - - int diff = duration_cast(current_time - lastPongTime).count(); - - if (bPong) { - lastPongTime = current_time; - mvLog(MVLOG_INFO, "[%p] device, ping succeed after %d ms\n", privateDevice.xlink, diff); - } else { - mvLog(MVLOG_WARN, "[%p] device, no response for %d ms\n", privateDevice.xlink, diff); - } - } - - milliseconds dueIn(const time_point ¤t_time) const noexcept override { - if (lastPingTime == time_point::min()) - return milliseconds::zero(); - - // overdue - if (current_time - lastPingTime > std::chrono::milliseconds(privateDevice.wd_interval)) { - return milliseconds::zero(); - } - - return duration_cast(lastPingTime + std::chrono::milliseconds(privateDevice.wd_interval) - current_time); - } - - /** - * @brief means device is hanging - */ - bool isTimeout() const noexcept override { - if (lastPongTime > lastPingTime) return false; - if (lastPingTime - lastPongTime > milliseconds(deviceHangTimeout)) { - // cleaning xlink connection - allowing abort all semaphores waiting in other threads - XLinkResetAll(); - return true; - } - return false; - } - - /** - * @brief gets some opaque handle that clearly destinguesh one device previate_t from another - */ - void *getHandle() const noexcept override { - return privateDevice.xlink; - } - -private: - bool sendPingMessage() { - XLinkError_t rc = X_LINK_SUCCESS; - XLINK_RET_ERR_IF(pthread_mutex_lock(&privateDevice.dev_stream_m), false); - - deviceCommand_t config = {}; - config.type = DEVICE_WATCHDOG_PING; - - // xlink ping acknowledge interval shouldn't be more then expected ping interval - rc = XLinkWriteDataWithTimeout(privateDevice.device_mon_stream_id, (const uint8_t*)&config, sizeof(config), deviceHangTimeout); - - if(pthread_mutex_unlock(&privateDevice.dev_stream_m) != 0) { - mvLog(MVLOG_ERROR, "Failed to unlock privateDevice.dev_stream_m"); - } - - if (rc != X_LINK_SUCCESS) { - mvLog(MVLOG_ERROR, "Failed send ping message: %s", XLinkErrorToStr(rc)); - return false; - } - return true; - } -}; - /** * @brief when device just added into watchdog, it should not be due interval at all */ class NoDueOnFirstCall : public IDevice { - std::shared_ptr original; - bool bFirstCall = false; - public: - NoDueOnFirstCall(const std::shared_ptr & original) : original(original) {} - void setInterval(const std::chrono::milliseconds msInterval) noexcept override { - original->setInterval(msInterval); +public: + NoDueOnFirstCall(IDevice* original) : m_originalPtr(original) {} + + void keepAlive(const time_point& current_time) noexcept override { + m_originalPtr->keepAlive(current_time); + m_firstCall = true; } - void keepAlive(const time_point ¤t_time) noexcept override { - original->keepAlive(current_time); - bFirstCall = true; - } - std::chrono::milliseconds dueIn(const time_point ¤t_time) const noexcept override { - if (!bFirstCall) { + + milliseconds dueIn(const time_point& current_time) const noexcept override { + if (!m_firstCall) { return milliseconds::zero(); } - return original->dueIn(current_time); + + return m_originalPtr->dueIn(current_time); } + bool isTimeout() const noexcept override { - return original->isTimeout(); - } - void *getHandle() const noexcept override { - return original->getHandle(); - } -}; - -class CustomUniqueLock { -public: - explicit CustomUniqueLock(pthread_mutex_t* mutex) - :m_mutex(mutex) { - if(m_mutex == nullptr) { - throw std::runtime_error("mutex should not be null"); - } - - int rc = pthread_mutex_lock(m_mutex); - if (rc != 0) { - throw std::runtime_error(std::string("failed to lock mutex. rc: ") + strerror(rc)); - } - }; - - ~CustomUniqueLock() { - int rc = pthread_mutex_unlock(m_mutex); - if (rc != 0) { - mvLog(MVLOG_ERROR, "failed to unlock mutex. rc: %s", strerror(rc)); - } + return m_originalPtr->isTimeout(); } - CustomUniqueLock(const CustomUniqueLock&) = delete; - CustomUniqueLock(const CustomUniqueLock&&) = delete; - CustomUniqueLock& operator=(const CustomUniqueLock&) = delete; - CustomUniqueLock& operator=(const CustomUniqueLock&&) = delete; + void* getHandle() const noexcept override { + return m_originalPtr->getHandle(); + } private: - pthread_mutex_t* m_mutex = nullptr; -}; - -static void * WD_OPAQUE_MAGIC = reinterpret_cast(0xdeadbeaf); - -struct wd_context_opaque { - void * magic = WD_OPAQUE_MAGIC; - IDevice * actual = nullptr; - bool destroyed = false; - void *handleCached = nullptr; + IDevice* m_originalPtr; + bool m_firstCall = false; }; class WatchdogImpl { - using wd_context_as_tuple = std::tuple, bool*, void*>; +public: + WatchdogImpl(); + ~WatchdogImpl(); - using Devices = std::list; - Devices watchedDevices; - std::atomic_bool threadRunning {false}; - - pthread_mutex_t routineLock; - pthread_cond_t wakeUpPingThread; - std::thread poolThread; + bool registerDevice(IDevice* device); + bool removeDevice(IDevice* device); WatchdogImpl(const WatchdogImpl&) = delete; WatchdogImpl(WatchdogImpl&&) = delete; WatchdogImpl& operator = (const WatchdogImpl&) = delete; WatchdogImpl& operator = (WatchdogImpl&&) = delete; - class AutoScope { - public: - explicit AutoScope(const std::function& func) : _func(func) {} - ~AutoScope() { _func(); } - - AutoScope(const AutoScope&) = delete; - AutoScope& operator=(const AutoScope&) = delete; - private: - std::function _func; - }; +private: + void waitFor(const milliseconds sleepInterval); + void watchdogRoutine() noexcept; private: + using Devices = std::vector>; + using DevicesMap = std::unordered_map>; - WatchdogImpl() { - int rc = pthread_mutex_init(&routineLock, NULL); - if (rc != 0) { - throw std::runtime_error("failed to initialize \"routineLock\" mutex. rc: " + std::to_string(rc)); - } + Devices watchedDevices; + DevicesMap removedDevices; + std::atomic_bool threadRunning {false}; + + pthread_mutex_t routineLock; + pthread_cond_t wakeUpPingThread; + std::thread poolThread; +}; + +//------------- Watchdog implementation ------------- + +WatchdogImpl::WatchdogImpl() { + int rc = pthread_mutex_init(&routineLock, NULL); + if (rc != 0) { + throw std::runtime_error("failed to initialize \"routineLock\" mutex. rc: " + std::to_string(rc)); + } #if !(defined(__APPLE__) || defined(_WIN32)) - pthread_condattr_t attr; - rc = pthread_condattr_init(&attr); - if (rc != 0) { - throw std::runtime_error("failed to initialize condition variable attribute. rc: " + std::to_string(rc)); - } - AutoScope attrDestroy([&attr]{ - if (pthread_condattr_destroy(&attr) != 0) - mvLog(MVLOG_ERROR, "Failed to destroy condition variable attribute."); - }); + pthread_condattr_t attr; + rc = pthread_condattr_init(&attr); + if (rc != 0) { + throw std::runtime_error("failed to initialize condition variable attribute. rc: " + std::to_string(rc)); + } - rc = pthread_condattr_setclock(&attr, CLOCK_MONOTONIC); - if (rc != 0) { - throw std::runtime_error("failed to set condition variable clock. rc: " + std::to_string(rc)); - } + AutoScope attrDestroy([&attr]{ + if (pthread_condattr_destroy(&attr) != 0) + mvLog(MVLOG_ERROR, "Failed to destroy condition variable attribute."); + }); + + rc = pthread_condattr_setclock(&attr, CLOCK_MONOTONIC); + if (rc != 0) { + throw std::runtime_error("failed to set condition variable clock. rc: " + std::to_string(rc)); + } #endif // !(defined(__APPLE__) || defined(_WIN32)) - rc = pthread_cond_init(&wakeUpPingThread, NULL); - if (rc != 0) { - throw std::runtime_error("failed to initialize \"wakeUpPingThread\" condition variable. rc: " + std::to_string(rc)); + rc = pthread_cond_init(&wakeUpPingThread, NULL); + if (rc != 0) { + throw std::runtime_error("failed to initialize \"wakeUpPingThread\" condition variable. rc: " + std::to_string(rc)); + } +} + +WatchdogImpl::~WatchdogImpl() { + mvLog(MVLOG_INFO, "watchdog terminated\n"); + try + { + CustomUniqueLock lock {&routineLock}; + for (auto &item : watchedDevices) { + mvLog(MVLOG_WARN, "[%p] device, stop watching due to watchdog termination\n", item->getHandle()); } + } catch (const std::exception & ex) { + mvLog(MVLOG_ERROR, "error %s", ex.what()); + } catch (...) { + mvLog(MVLOG_ERROR, "unknown error"); } -public: - - static WatchdogImpl &instance() { - static WatchdogImpl watchdog; - return watchdog; + threadRunning = false; + int rc = pthread_cond_broadcast(&wakeUpPingThread); + if (rc != 0) { + mvLog(MVLOG_WARN, "failed to unblock threads blocked on the \"wakeUpPingThread\". rc=%d", rc); } + if (poolThread.joinable()) { + poolThread.join(); + } - ~WatchdogImpl() { - mvLog(MVLOG_INFO, "watchdog terminated\n"); - try - { - CustomUniqueLock lock {&routineLock}; - for (auto &item : watchedDevices) { - *std::get<1>(item) = true; - mvLog(MVLOG_WARN, "[%p] device, stop watching due to watchdog termination\n", std::get<2>(item)); - } - } catch (const std::exception & ex) { - mvLog(MVLOG_ERROR, "error %s", ex.what()); - } catch (...) { - mvLog(MVLOG_ERROR, "unknown error"); - } + rc = pthread_mutex_destroy(&routineLock); + if (rc != 0) { + mvLog(MVLOG_WARN, "failed to destroy the \"routineLock\". rc=%d", rc); + } - threadRunning = false; - int rc = pthread_cond_broadcast(&wakeUpPingThread); - if (rc != 0) { - mvLog(MVLOG_WARN, "failed to unblock threads blocked on the \"wakeUpPingThread\". rc=%d", rc); - } + rc = pthread_cond_destroy(&wakeUpPingThread); + if (rc != 0) { + mvLog(MVLOG_WARN, "failed to destroy the \"wakeUpPingThread\". rc=%d", rc); + } +} - rc = pthread_mutex_destroy(&routineLock); - if (rc != 0) { - mvLog(MVLOG_WARN, "failed to destroy the \"routineLock\". rc=%d", rc); - } +bool WatchdogImpl::registerDevice(IDevice* device) { + mvLog(MVLOG_INFO, "register device: %p\n", &device); - rc = pthread_cond_destroy(&wakeUpPingThread); - if (rc != 0) { - mvLog(MVLOG_WARN, "failed to destroy the \"wakeUpPingThread\". rc=%d", rc); - } + CustomUniqueLock lock {&routineLock}; + if (!threadRunning) { if (poolThread.joinable()) { poolThread.join(); } - } + threadRunning = true; -public: - void *register_device(std::shared_ptr device) { - CustomUniqueLock lock {&routineLock}; - std::unique_ptr ctx (new wd_context_opaque); - - // rare case of exact pointer address collision - if (ctx.get() == WD_OPAQUE_MAGIC) { - std::unique_ptr ctx2(new wd_context_opaque); - ctx.reset(ctx2.release()); - } - - if (!threadRunning) { - if (poolThread.joinable()) { - poolThread.join(); - } - threadRunning = true; - - poolThread = std::thread([this]() { - if (pthread_setname_np( + poolThread = std::thread([this]() { + if (pthread_setname_np( #ifndef __APPLE__ - pthread_self(), + pthread_self(), #endif - "WatchdogThread") != 0) { - perror("Setting name for watchdog thread failed"); - } - watchdog_routine(); - }); - } else { - // wake up thread - int rc = pthread_cond_broadcast(&wakeUpPingThread); - if (rc != 0) { - mvLog(MVLOG_WARN, "failed to unblock threads blocked on the \"wakeUpPingThread\". rc=%d", rc); + "WatchdogThread") != 0) { + perror("Setting name for watchdog thread failed"); } - } - - ctx->handleCached = device->getHandle(); - watchedDevices.emplace_back(device, &ctx->destroyed, ctx->handleCached); - - ctx->actual = std::get<0>(watchedDevices.back()).get(); - - return ctx.release(); + watchdogRoutine(); + }); } - void *register_device(devicePrivate_t *device) { - return register_device(std::make_shared(std::make_shared(device))); + auto it = std::find_if(std::begin(watchedDevices), + std::end(watchedDevices), + [&device](const std::shared_ptr& item) { + return item->getHandle() == device->getHandle(); + }); + + bool found = it != std::end(watchedDevices); + if (!found) { + watchedDevices.emplace_back(std::make_shared(device)); } - bool remove_device(void *opaque) { - mvLog(MVLOG_INFO, "remove_device : %p\n", opaque); - auto ptr = reinterpret_cast(opaque); - if (ptr == nullptr) { - return false; - } - - bool bFound = false; - { - CustomUniqueLock lock {&routineLock}; - - // thread already removed - if (ptr->destroyed) { - delete ptr; - return true; - } - - auto idx = std::find_if(std::begin(watchedDevices), - std::end(watchedDevices), - [ptr](const wd_context_as_tuple &item) { - return std::get<0>(item)->getHandle() == ptr->actual->getHandle(); - }); - bFound = idx != std::end(watchedDevices); - if(bFound) { - watchedDevices.erase(idx); - delete ptr; - } - } - - // wake up thread since we might select removed device as nex to be ping, and there is no more devices available - int rc = pthread_cond_broadcast(&wakeUpPingThread); - if (rc != 0) { - mvLog(MVLOG_WARN, "failed to unblock threads blocked on the \"wakeUpPingThread\". rc=%d", rc); - } - - return bFound; + int rc = pthread_cond_broadcast(&wakeUpPingThread); + if (rc != 0) { + mvLog(MVLOG_WARN, "failed to unblock threads blocked on the \"wakeUpPingThread\". rc=%d", rc); } - private: - /// @note: We are using here pthread_cond_timedwait as a replacement for condition_variable::wait_for, - /// as libstdc++ has bug not using monotonic clock. When GCC 10.x became minimum supported version, - /// that code could be removed. - void wait_for(const milliseconds sleepInterval) { - struct timespec timeToWait = {0, 0}; + return !found; +} - const auto sec = std::chrono::duration_cast(sleepInterval); +bool WatchdogImpl::removeDevice(IDevice* device) { + mvLog(MVLOG_INFO, "remove device: %p\n", &device); + + CustomUniqueLock lock {&routineLock}; + + auto it = std::find_if(std::begin(watchedDevices), + std::end(watchedDevices), + [&device](const std::shared_ptr& item) { + return item->getHandle() == device->getHandle(); + }); + + bool removed = it != std::end(watchedDevices); + if (removed) { + watchedDevices.erase(it); + } else if (removedDevices.count(device->getHandle())) { + removedDevices.erase(device->getHandle()); + removed = true; + } + + // wake up thread since we might select removed device as nex to be ping, and there is no more devices available + int rc = pthread_cond_broadcast(&wakeUpPingThread); + if (rc != 0) { + mvLog(MVLOG_WARN, "failed to unblock threads blocked on the \"wakeUpPingThread\". rc=%d", rc); + } + + return removed; +} + +void WatchdogImpl::waitFor(const milliseconds sleepInterval) { + struct timespec timeToWait = {0, 0}; + + const auto sec = std::chrono::duration_cast(sleepInterval); #if (defined(__APPLE__) || defined(_WIN32)) - timeToWait.tv_sec = sec.count(); - timeToWait.tv_nsec = - std::chrono::duration_cast(sleepInterval).count() - - std::chrono::nanoseconds(sec).count(); + timeToWait.tv_sec = sec.count(); + timeToWait.tv_nsec = + std::chrono::duration_cast(sleepInterval).count() - + std::chrono::nanoseconds(sec).count(); #else - clock_gettime(CLOCK_MONOTONIC, &timeToWait); - const auto secondInNanoSeconds = 1000000000L; - const auto nsecSum = std::chrono::duration_cast(sleepInterval).count() - - std::chrono::nanoseconds(sec).count() + timeToWait.tv_nsec; - timeToWait.tv_sec += sec.count() + nsecSum / secondInNanoSeconds; - timeToWait.tv_nsec = nsecSum % secondInNanoSeconds; + clock_gettime(CLOCK_MONOTONIC, &timeToWait); + const auto secondInNanoSeconds = 1000000000L; + const auto nsecSum = std::chrono::duration_cast(sleepInterval).count() - + std::chrono::nanoseconds(sec).count() + timeToWait.tv_nsec; + timeToWait.tv_sec += sec.count() + nsecSum / secondInNanoSeconds; + timeToWait.tv_nsec = nsecSum % secondInNanoSeconds; #endif // (defined(__APPLE__) || defined(_WIN32)) #if defined(__APPLE__) - const auto rc = pthread_cond_timedwait_relative_np(&wakeUpPingThread, &routineLock, &timeToWait); + const auto rc = pthread_cond_timedwait_relative_np(&wakeUpPingThread, &routineLock, &timeToWait); #else - const auto rc = pthread_cond_timedwait(&wakeUpPingThread, &routineLock, &timeToWait); + const auto rc = pthread_cond_timedwait(&wakeUpPingThread, &routineLock, &timeToWait); #endif // defined(__APPLE__) - if (rc != 0 && rc != ETIMEDOUT) { - throw std::runtime_error("Failed to perform wait in a loop for " + std::to_string(sleepInterval.count()) + " ms. rc: " + std::to_string(rc)); - } + + if (rc != 0 && rc != ETIMEDOUT) { + throw std::runtime_error("Failed to perform wait in a loop for " + std::to_string(sleepInterval.count()) + " ms. rc: " + std::to_string(rc)); + } +} + +void WatchdogImpl::watchdogRoutine() noexcept { + try { + mvLog(MVLOG_INFO, "thread started\n"); + + milliseconds sleepInterval; + CustomUniqueLock lock{&routineLock}; + + do { + for (auto deviceIt = watchedDevices.begin(); deviceIt != watchedDevices.end();) { + auto &device = *deviceIt; + auto isReady = device->dueIn(steady_clock::now()).count() <= 0; + if (isReady) { + auto now = steady_clock::now(); + device->keepAlive(steady_clock::now()); + mvLog(MVLOG_DEBUG, "ping completed in %ld ms\n", + duration_cast(steady_clock::now() - now).count()); + } + if (device->isTimeout()) { + mvLog(MVLOG_ERROR, "[%p] device, not respond, removing from watchdog\n", device->getHandle()); + // marking device as deleted, to prevent double resource free from wd_unregister_device + removedDevices[device->getHandle()] = device; + deviceIt = watchedDevices.erase(deviceIt); + } else { + ++deviceIt; + } + } + auto currentTime = steady_clock::now(); + auto minInterval = std::min_element(watchedDevices.begin(), watchedDevices.end(), + [¤tTime](const Devices::value_type& device1, + const Devices::value_type& device2) { + return device1->dueIn(currentTime).count() < + device2->dueIn(currentTime).count(); + }); + // if for some reason we have empty devices list but watchdog is active + if (minInterval == watchedDevices.end()) { + mvLog(MVLOG_INFO, "no active devices to watch, stopping Watchdog thread\n"); + threadRunning = false; + break; + } + + sleepInterval = (*minInterval)->dueIn(currentTime); + if (sleepInterval.count() <= 0) { + continue; + } + + mvLog(MVLOG_DEBUG, "sleep interval = %ld ms\n", sleepInterval.count()); + + waitFor(sleepInterval); + + mvLog(MVLOG_DEBUG, "waiting completed in %ld ms\n", + duration_cast(steady_clock::now() - currentTime).count()); + + } while (threadRunning); + } catch (const std::exception &ex) { + mvLog(MVLOG_ERROR, "error %s", ex.what()); + } catch (...) { + mvLog(MVLOG_ERROR, "unknown error"); } - void watchdog_routine() noexcept { - try { - mvLog(MVLOG_INFO, "thread started\n"); - - milliseconds sleepInterval; - - CustomUniqueLock lock {&routineLock}; - - do { - for (auto deviceIt = watchedDevices.begin(); deviceIt != watchedDevices.end(); ) { - auto &device = std::get<0>(*deviceIt); - auto isReady = device->dueIn(steady_clock::now()).count() == 0; - if (isReady) { - auto now = high_resolution_clock::now(); - device->keepAlive(steady_clock::now()); - mvLog(MVLOG_DEBUG, "ping completed in %ld ms\n", duration_cast(high_resolution_clock ::now()-now).count()); - } - if (device->isTimeout()) { - mvLog(MVLOG_ERROR, "[%p] device, not respond, removing from watchdog\n", device->getHandle()); - // marking device as deleted, to prevent double resource free from wd_unregister_device - *std::get<1>(*deviceIt) = true; - deviceIt = watchedDevices.erase(deviceIt); - } - else { - ++deviceIt; - } - } - auto currentTime = steady_clock::now(); - auto minInterval = std::min_element(watchedDevices.begin(), - watchedDevices.end(), - [¤tTime] (const Devices::value_type & device1, const Devices::value_type & device2) { - return std::get<0>(device1)->dueIn(currentTime).count() - < std::get<0>(device2)->dueIn(currentTime).count(); - }); - // if for some reason we have empty devices list but watchdog is active - if (minInterval == watchedDevices.end()) { - mvLog(MVLOG_INFO, "no active devices to watch, stopping Watchdog thread\n"); - threadRunning = false; - break; - } - // TODO: no timer coalescing feature, to minimized thread wakes - sleepInterval = std::get<0>(*minInterval)->dueIn(currentTime); - if (sleepInterval.count() <= 0) - continue; - - mvLog(MVLOG_DEBUG, "sleep interval = %ld ms\n", sleepInterval.count()); - wait_for(sleepInterval); - - mvLog(MVLOG_DEBUG, "waiting completed in %ld ms\n", - duration_cast(steady_clock::now() - currentTime).count()); - } while (threadRunning); - } catch (const std::exception & ex) { - mvLog(MVLOG_ERROR, "error %s", ex.what()); - } catch (...) { - mvLog(MVLOG_ERROR, "unknown error"); - } - - mvLog(MVLOG_INFO, "thread ended\n"); - } -}; + mvLog(MVLOG_INFO, "thread ended\n"); +} } // namespace -WD_API wd_error_t watchdog_init_context(wd_context *ctx) { - try { - mvLogLevelSet(MVLOG_ERROR); - mvLogDefaultLevelSet(MVLOG_ERROR); - if (!ctx) { - return WD_NOTINITIALIZED; - } - // opaque pointer initialized - if (ctx->opaque == WD_OPAQUE_MAGIC) { - mvLog(MVLOG_INFO, "watchdog context (%p) already initialized \n", ctx); - } else { - ctx->opaque = WD_OPAQUE_MAGIC; - } - return WD_ERRNO; - } catch (...) { - mvLog(MVLOG_ERROR, "failed initialize watchdog context: %p\n", ctx); +struct _WatchdogHndl_t { + WatchdogImpl* m_watchdog; +}; + +wd_error_t watchdog_create(WatchdogHndl_t** out_watchdogHndl) { + if (out_watchdogHndl == nullptr) { + return WD_NOTINITIALIZED; } + + *out_watchdogHndl = nullptr; + auto tmpWdHndl = + static_cast(malloc(sizeof(WatchdogHndl_t))); + if(tmpWdHndl == nullptr) { + return WD_FAIL; + } + + try { + tmpWdHndl->m_watchdog = new WatchdogImpl(); + *out_watchdogHndl = tmpWdHndl; + return WD_ERRNO; + } catch (const std::exception& ex) { + mvLog(MVLOG_ERROR, "error %s", ex.what()); + } catch (...) { + mvLog(MVLOG_ERROR, "unknown error"); + } + + free(tmpWdHndl); return WD_FAIL; } -WD_API wd_error_t watchdog_register_device(wd_context * ctx, devicePrivate_t *device) { +void watchdog_destroy(WatchdogHndl_t* watchdogHndl) { + if (watchdogHndl == nullptr) { + return; + } + + if (watchdogHndl->m_watchdog != nullptr) { + delete(watchdogHndl->m_watchdog); + } + + free(watchdogHndl); +} + +wd_error_t watchdog_register_device(WatchdogHndl_t* watchdogHndl, WdDeviceHndl_t* deviceHandle) { + if (watchdogHndl == nullptr) { + mvLog(MVLOG_ERROR, "watchdog handle is null\n"); + return WD_NOTINITIALIZED; + } + + if (deviceHandle == nullptr) { + mvLog(MVLOG_ERROR, "watchdog device handle is null\n"); + return WD_NOTINITIALIZED; + } + + if (deviceHandle->m_device == nullptr) { + mvLog(MVLOG_ERROR, "watchdog device not initialized. handle=%p\n", deviceHandle); + return WD_NOTINITIALIZED; + } + try { - if (!ctx) { - mvLog(MVLOG_ERROR, "watchdog context is null\n"); - return WD_NOTINITIALIZED; - } - // opaque pointer initialized - if (ctx->opaque == nullptr) { - mvLog(MVLOG_ERROR, "watchdog context (%p) not initialized \n", ctx); - return WD_NOTINITIALIZED; - } - if (device && device->wd_interval <= 0) { - mvLog(MVLOG_ERROR, "watchdog interval should be > 0, but was (%d)\n", device->wd_interval); - return WD_NOTINITIALIZED; - } - // opaque pointer initialized - if (ctx->opaque != WD_OPAQUE_MAGIC) { - auto watchee = reinterpret_cast(ctx->opaque); - // NOTE: magic field used to pass preallocated watchee - since this function only used by plugin, this is not a backdoor - if (watchee->magic == WD_OPAQUE_MAGIC) { - // actually this can represent already registered context, so need to check - // since we are adding NoDue wrapper, lets check for it - if (nullptr != dynamic_cast(watchee->actual)) { - mvLog(MVLOG_ERROR, "watchdog context (%p) already registered within watchdog\n", ctx); - return WD_DUPLICATE; - } - - // transferring interval from context - if (device) { - watchee->actual->setInterval(milliseconds(device->wd_interval)); - } - ctx->opaque = WatchdogImpl::instance().register_device( - shared_ptr(new NoDueOnFirstCall(shared_ptr(watchee->actual, [](IDevice*){})))); - - if (ctx->opaque == nullptr) { - mvLog(MVLOG_ERROR, "watchdog context (%p) not initialized \n", ctx); - } else { - return WD_ERRNO; - } - } - mvLog(MVLOG_ERROR, "watchdog context (%p) not initialized \n", ctx); - return WD_NOTINITIALIZED; - } - - if (device && device->wd_interval > 0) { - ctx->opaque = WatchdogImpl::instance().register_device(device); - } else { - ctx->opaque = nullptr; + WatchdogImpl* watchdog = watchdogHndl->m_watchdog; + auto device = reinterpret_cast(deviceHandle->m_device); + if (!watchdog->registerDevice(device)) { + mvLog(MVLOG_WARN, "cannot register device\n"); + return WD_FAIL; } return WD_ERRNO; } catch (const std::exception & ex) { mvLog(MVLOG_ERROR, "failed to register device: %s\n", ex.what()); } catch (...) { - mvLog(MVLOG_ERROR, "failed to register device context (%p)\n", ctx); + mvLog(MVLOG_ERROR, "failed to register device (%p)\n", deviceHandle); } + return WD_FAIL; } -WD_API wd_error_t watchdog_unregister_device(wd_context *ctx) { +wd_error_t watchdog_unregister_device(WatchdogHndl_t* watchdogHndl, WdDeviceHndl_t* deviceHandle) { + if (watchdogHndl == nullptr) { + mvLog(MVLOG_ERROR, "watchdog handle is null\n"); + return WD_NOTINITIALIZED; + } + + if (deviceHandle == nullptr) { + mvLog(MVLOG_ERROR, "watchdog device handle is null\n"); + return WD_NOTINITIALIZED; + } + + if (deviceHandle->m_device == nullptr) { + mvLog(MVLOG_ERROR, "watchdog device not initialized. handle=%p\n", deviceHandle); + return WD_NOTINITIALIZED; + } + try { - if (ctx == nullptr || ctx->opaque == nullptr) { - return WD_NOTINITIALIZED; - } else { - if (ctx->opaque != WD_OPAQUE_MAGIC) { - auto watchee = reinterpret_cast(ctx->opaque); - // NOTE: magic field used to pass preallocated watchee - since this function only used by plugin, this is not a backdoor - if (watchee->magic == WD_OPAQUE_MAGIC) { - if (!WatchdogImpl::instance().remove_device(ctx->opaque)) { - mvLog(MVLOG_WARN, "cannot remove device\n"); - return WD_FAIL; - } - } - } + WatchdogImpl* watchdog = watchdogHndl->m_watchdog; + auto device = reinterpret_cast(deviceHandle->m_device); + if (!watchdog->removeDevice(device)) { + mvLog(MVLOG_WARN, "cannot remove device\n"); + return WD_FAIL; } - - if (ctx != nullptr) { - // opaque pointer deleted - ctx->opaque = nullptr; - } - return WD_ERRNO; } catch (const std::exception & ex) { - mvLog(MVLOG_WARN, "error %s", ex.what()); + mvLog(MVLOG_ERROR, "error %s", ex.what()); } catch (...) { - mvLog(MVLOG_WARN, "unknown error"); + mvLog(MVLOG_ERROR, "unknown error"); } return WD_FAIL; diff --git a/inference-engine/thirdparty/movidius/mvnc/src/watchdog/xlink_device.cpp b/inference-engine/thirdparty/movidius/mvnc/src/watchdog/xlink_device.cpp new file mode 100644 index 00000000000..56623257d9c --- /dev/null +++ b/inference-engine/thirdparty/movidius/mvnc/src/watchdog/xlink_device.cpp @@ -0,0 +1,173 @@ +// Copyright (C) 2018-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "xlink_device.h" +#include "watchdog.h" +#include "watchdogPrivate.hpp" + +#include "XLink.h" +#include "XLinkPrivateDefines.h" +#include "XLinkErrorUtils.h" + +#include + +#include + +namespace { + +using namespace std; +using namespace chrono; +using namespace Watchdog; + +class XLinkDevice : public IDevice { +public: + explicit XLinkDevice(devicePrivate_t* pDevice); + + void keepAlive(const time_point& current_time) noexcept override; + + milliseconds dueIn(const time_point& current_time) const noexcept override; + bool isTimeout() const noexcept override; + + /** + * @brief gets some opaque handle that clearly distinguish one device private_t from another + */ + void* getHandle() const noexcept override; + + ~XLinkDevice() = default; + +private: + bool sendPingMessage(); + +private: + const int kDeviceHangTimeout = 12000; + + _devicePrivate_t m_devicePrivate; + + time_point m_lastPongTime = time_point::min(); + time_point m_lastPingTime = time_point::min(); +}; + +//----------------- XLinkDevice implementation --------------------- + +XLinkDevice::XLinkDevice(devicePrivate_t* pDevice) + : m_devicePrivate(*pDevice) { + if (m_devicePrivate.wd_interval <= 0) { + throw runtime_error( + "watchdog interval should be > 0, but was " + std::to_string(m_devicePrivate.wd_interval)); + } + m_devicePrivate.wd_interval = std::max(m_devicePrivate.wd_interval, WATCHDOG_MAX_PING_INTERVAL_MS); +} + +void XLinkDevice::keepAlive(const time_point ¤t_time) noexcept { + bool bPong = sendPingMessage(); + // we consider that as first pong time even if it wasn't happen as beginning of boot + if (m_lastPongTime == time_point::min()) { + m_lastPongTime = current_time; + } + + m_lastPingTime = current_time; + + int diff = duration_cast(current_time - m_lastPongTime).count(); + + if (bPong) { + m_lastPongTime = current_time; + mvLog(MVLOG_INFO, "[%p] device, ping succeed after %d ms\n", m_devicePrivate.xlink, diff); + } else { + mvLog(MVLOG_WARN, "[%p] device, no response for %d ms\n", m_devicePrivate.xlink, diff); + } +} + +milliseconds XLinkDevice::dueIn(const time_point& current_time) const noexcept { + if (m_lastPingTime == time_point::min()) { + return milliseconds::zero(); + } + + // overdue + if (current_time - m_lastPingTime > std::chrono::milliseconds(m_devicePrivate.wd_interval)) { + return milliseconds::zero(); + } + + return duration_cast(m_lastPingTime + + std::chrono::milliseconds(m_devicePrivate.wd_interval) - current_time); +} + +bool XLinkDevice::isTimeout() const noexcept { + if (m_lastPongTime > m_lastPingTime) { + return false; + } + + if (m_lastPingTime - m_lastPongTime > milliseconds(kDeviceHangTimeout)) { + // cleaning xlink connection - allowing abort all semaphores waiting in other threads + XLinkResetAll(); + return true; + } + + return false; +} + +void* XLinkDevice::getHandle() const noexcept { + return m_devicePrivate.xlink; +} + +bool XLinkDevice::sendPingMessage() { + XLINK_RET_ERR_IF(pthread_mutex_lock(&m_devicePrivate.dev_stream_m), false); + + deviceCommand_t config = {}; + config.type = DEVICE_WATCHDOG_PING; + + // xlink ping acknowledge interval shouldn't be more then expected ping interval + XLinkError_t rc = XLinkWriteDataWithTimeout(m_devicePrivate.device_mon_stream_id, + (const uint8_t*)&config, sizeof(config), kDeviceHangTimeout); + + if(pthread_mutex_unlock(&m_devicePrivate.dev_stream_m) != 0) { + mvLog(MVLOG_ERROR, "Failed to unlock m_devicePrivate.dev_stream_m"); + } + + if (rc != X_LINK_SUCCESS) { + mvLog(MVLOG_ERROR, "Failed send ping message: %s", XLinkErrorToStr(rc)); + return false; + } + + return true; +} + +} // namespace + +wd_error_t xlink_device_create(WdDeviceHndl_t** out_deviceHandle, devicePrivate_t* pDevice) { + if (out_deviceHandle == nullptr || pDevice == nullptr) { + return WD_NOTINITIALIZED; + } + + *out_deviceHandle = nullptr; + auto tmpWdDeviceHndl = + static_cast(malloc(sizeof(WdDeviceHndl_t))); + if(tmpWdDeviceHndl == nullptr) { + return WD_FAIL; + } + + try { + tmpWdDeviceHndl->m_device = new XLinkDevice(pDevice); + *out_deviceHandle = tmpWdDeviceHndl; + return WD_ERRNO; + } catch (const std::exception& ex) { + mvLog(MVLOG_ERROR, "error %s", ex.what()); + } catch (...) { + mvLog(MVLOG_ERROR, "unknown error"); + } + + free(tmpWdDeviceHndl); + return WD_FAIL; +} + +void xlink_device_destroy(WdDeviceHndl_t* deviceHandle) { + if (deviceHandle == nullptr) { + return; + } + + if (deviceHandle->m_device != nullptr) { + delete(reinterpret_cast(deviceHandle->m_device)); + } + + free(deviceHandle); +} diff --git a/inference-engine/thirdparty/movidius/mvnc/tests/cases/mvnc_common_test_cases.cpp b/inference-engine/thirdparty/movidius/mvnc/tests/cases/mvnc_common_test_cases.cpp index 8a710ac98f5..196d2acca79 100644 --- a/inference-engine/thirdparty/movidius/mvnc/tests/cases/mvnc_common_test_cases.cpp +++ b/inference-engine/thirdparty/movidius/mvnc/tests/cases/mvnc_common_test_cases.cpp @@ -20,10 +20,17 @@ void MvncTestsCommon::SetUp() { initialize_usb_boot(); ASSERT_NO_ERROR(setLogLevel(ncLogLevel)); availableDevices_ = getAmountOfDevices(); + + ASSERT_EQ(WD_ERRNO, watchdog_create(&m_watchdogHndl)); + + m_ncDeviceOpenParams.watchdogInterval = watchdogInterval; + m_ncDeviceOpenParams.customFirmwareDirectory = firmwarePath; + m_ncDeviceOpenParams.watchdogHndl = m_watchdogHndl; } void MvncTestsCommon::TearDown() { ncDeviceResetAll(); + watchdog_destroy(m_watchdogHndl); } int MvncTestsCommon::setLogLevel(const mvLog_t logLevel) { @@ -53,7 +60,7 @@ void MvncTestsCommon::openDevices(const int devicesToBoot, ncDeviceHandle_t **de ncDeviceDesc.platform = NC_ANY_PLATFORM; for (int index = 0; index < devicesToBoot; ++index) { - ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandlers[index], ncDeviceDesc, watchdogInterval, firmwarePath)); + ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandlers[index], ncDeviceDesc, m_ncDeviceOpenParams)); ASSERT_TRUE(deviceHandlers[index] != nullptr); ++amountOfBooted; } @@ -87,7 +94,7 @@ void MvncLoggingTests::SetUp() { _deviceDesc.platform = NC_ANY_PLATFORM; for (int index = 0; index < availableDevices_; ++index) { - ASSERT_NO_ERROR(ncDeviceOpen(&_deviceHandles[index], _deviceDesc, watchdogInterval, firmwarePath)); + ASSERT_NO_ERROR(ncDeviceOpen(&_deviceHandles[index], _deviceDesc, m_ncDeviceOpenParams)); } setbuf(stdout, buff); @@ -97,7 +104,7 @@ void MvncLoggingTests::SetUp() { void MvncLoggingTests::TearDown() { setbuf(stdout, NULL); for (int index = 0; index < availableDevices_; ++index) { - ASSERT_NO_ERROR(ncDeviceClose(&_deviceHandles[index])); + ASSERT_NO_ERROR(ncDeviceClose(&_deviceHandles[index], m_watchdogHndl)); } } @@ -116,7 +123,7 @@ void MvncGraphAllocations::SetUp() { void MvncGraphAllocations::TearDown() { for (int index = 0; index < _bootedDevices; ++index) { - ASSERT_NO_ERROR(ncDeviceClose(&_deviceHandle[index])); + ASSERT_NO_ERROR(ncDeviceClose(&_deviceHandle[index], m_watchdogHndl)); } _bootedDevices = 0; } diff --git a/inference-engine/thirdparty/movidius/mvnc/tests/cases/mvnc_common_test_cases.h b/inference-engine/thirdparty/movidius/mvnc/tests/cases/mvnc_common_test_cases.h index f2add94a82d..76960bec193 100644 --- a/inference-engine/thirdparty/movidius/mvnc/tests/cases/mvnc_common_test_cases.h +++ b/inference-engine/thirdparty/movidius/mvnc/tests/cases/mvnc_common_test_cases.h @@ -33,6 +33,8 @@ public: mvLog_t ncLogLevel = MVLOG_INFO; int watchdogInterval = 1000; int availableDevices_ = 0; + WatchdogHndl_t* m_watchdogHndl = nullptr; + ncDeviceOpenParams_t m_ncDeviceOpenParams = {}; ~MvncTestsCommon() override = default; MvncTestsCommon(); diff --git a/inference-engine/thirdparty/movidius/mvnc/tests/mvnc_stress_tests.cpp b/inference-engine/thirdparty/movidius/mvnc/tests/mvnc_stress_tests.cpp index 1cdaf8b66e2..1827dece606 100644 --- a/inference-engine/thirdparty/movidius/mvnc/tests/mvnc_stress_tests.cpp +++ b/inference-engine/thirdparty/movidius/mvnc/tests/mvnc_stress_tests.cpp @@ -23,8 +23,8 @@ TEST_P(MvncStressTests, OpenClose1001) { for (int i = 0; i < iterations; ++i) { printf("Iteration %d of %d\n", i, iterations); - ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath)); - ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle)); + ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, m_ncDeviceOpenParams)); + ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle, m_watchdogHndl)); deviceHandle = nullptr; } } @@ -46,7 +46,7 @@ TEST_P(MvncStressTests, AllocateDeallocateGraph1001) { // Open device ncDeviceHandle_t *deviceHandle = nullptr; - ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath)); + ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, m_ncDeviceOpenParams)); for (int i = 0; i < iterations; ++i) { printf("Iteration %d of %d\n", i, iterations); @@ -66,7 +66,7 @@ TEST_P(MvncStressTests, AllocateDeallocateGraph1001) { // Destroy graph ASSERT_NO_ERROR(ncGraphDestroy(&graphHandle)); } - ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle)); + ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle, m_watchdogHndl)); } @@ -87,7 +87,7 @@ TEST_P(MvncStressTests, FullCycleOfWork101Times) { for (int i = 0; i < iterations; i++) { ncDeviceHandle_t *deviceHandle = nullptr; - ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath)); + ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, m_ncDeviceOpenParams)); ncGraphHandle_t* graphHandle = nullptr; std::string graphName = "graph"; @@ -146,7 +146,7 @@ TEST_P(MvncStressTests, FullCycleOfWork101Times) { ASSERT_NO_ERROR(ncGraphDestroy(&graphHandle)); - ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle)); + ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle, m_watchdogHndl)); } } diff --git a/inference-engine/thirdparty/movidius/mvnc/tests/mvnc_tests_common.cpp b/inference-engine/thirdparty/movidius/mvnc/tests/mvnc_tests_common.cpp index 03e95c9845a..b41e13d89ab 100644 --- a/inference-engine/thirdparty/movidius/mvnc/tests/mvnc_tests_common.cpp +++ b/inference-engine/thirdparty/movidius/mvnc/tests/mvnc_tests_common.cpp @@ -93,7 +93,7 @@ TEST_F(MvncTestsCommon, OpenUSBThenPCIEAndClose) { deviceDesc.protocol = NC_USB; deviceDesc.platform = NC_ANY_PLATFORM; - ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_USB, deviceDesc, watchdogInterval, firmwarePath)); + ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_USB, deviceDesc, m_ncDeviceOpenParams)); actDeviceName = deviceHandle_USB->private_data->dev_addr; ASSERT_TRUE(actDeviceName.size()); @@ -101,15 +101,15 @@ TEST_F(MvncTestsCommon, OpenUSBThenPCIEAndClose) { // Open PCIe device deviceDesc.protocol = NC_PCIE; - ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_PCIe, deviceDesc, watchdogInterval, firmwarePath)); + ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_PCIe, deviceDesc, m_ncDeviceOpenParams)); actDeviceName = deviceHandle_PCIe->private_data->dev_addr; ASSERT_TRUE(actDeviceName.size()); ASSERT_TRUE(isMyriadPCIeDevice(actDeviceName)); // Close all - ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_PCIe)); - ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_USB)); + ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_PCIe, m_watchdogHndl)); + ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_USB, m_watchdogHndl)); } /** @@ -129,8 +129,7 @@ TEST_F(MvncTestsCommon, OpenPCIEThenUSBAndClose) { deviceDesc.platform = NC_ANY_PLATFORM; // Open PCIe device - ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_PCIe, deviceDesc, - watchdogInterval, firmwarePath)); + ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_PCIe, deviceDesc, m_ncDeviceOpenParams)); actDeviceName = deviceHandle_PCIe->private_data->dev_addr; ASSERT_TRUE(actDeviceName.size()); @@ -138,8 +137,7 @@ TEST_F(MvncTestsCommon, OpenPCIEThenUSBAndClose) { // Open USB device deviceDesc.protocol = NC_USB; - ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_USB, deviceDesc, - watchdogInterval, firmwarePath)); + ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_USB, deviceDesc, m_ncDeviceOpenParams)); actDeviceName = deviceHandle_USB->private_data->dev_addr; ASSERT_TRUE(actDeviceName.size()); @@ -147,8 +145,8 @@ TEST_F(MvncTestsCommon, OpenPCIEThenUSBAndClose) { // Close all - ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_PCIe)); - ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_USB)); + ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_PCIe, m_watchdogHndl)); + ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_USB, m_watchdogHndl)); } //------------------------------------------------------------------------------ @@ -167,7 +165,7 @@ TEST_P(MvncOpenDevice, OpenAndClose) { deviceDesc.protocol = _deviceProtocol; deviceDesc.platform = NC_ANY_PLATFORM; - ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath)); + ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, m_ncDeviceOpenParams)); ASSERT_TRUE(deviceHandle != nullptr); ASSERT_TRUE(deviceHandle->private_data != nullptr); @@ -178,7 +176,7 @@ TEST_P(MvncOpenDevice, OpenAndClose) { ASSERT_TRUE(isSameProtocolDevice(deviceName, _deviceProtocol)); - ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle)); + ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle, m_watchdogHndl)); } /** @@ -193,8 +191,7 @@ TEST_P(MvncOpenDevice, AllHandleFieldsInitialized) { deviceDesc.protocol = _deviceProtocol; deviceDesc.platform = NC_ANY_PLATFORM; - ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, - watchdogInterval, firmwarePath)); + ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, m_ncDeviceOpenParams)); ASSERT_TRUE(deviceHandle != nullptr); @@ -204,7 +201,7 @@ TEST_P(MvncOpenDevice, AllHandleFieldsInitialized) { ASSERT_TRUE(device->dev_addr_booted != nullptr); ASSERT_TRUE(device->xlink != nullptr); - ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle)); + ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle, m_watchdogHndl)); } /** @@ -228,16 +225,16 @@ TEST_P(MvncOpenDevice, OpenTwiceSameHandler) { unsigned int data_lenght_second = MAX_DEV_NAME; // First open, get device name - ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath)); + ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, m_ncDeviceOpenParams)); ASSERT_NO_ERROR(ncDeviceGetOption(deviceHandle, NC_RO_DEVICE_NAME, dev_addr_first_open, &data_lenght_first)); // Second open, get device name - ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath)); + ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, m_ncDeviceOpenParams)); ASSERT_NO_ERROR(ncDeviceGetOption(deviceHandle, NC_RO_DEVICE_NAME, dev_addr_second_open, &data_lenght_second)); - ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle)); + ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle, m_watchdogHndl)); // Should be the same device ASSERT_STREQ(dev_addr_first_open, dev_addr_second_open); } @@ -258,14 +255,12 @@ TEST_P(MvncOpenDevice, DISABLED_OpenSameDeviceTwiceDifferentHandlers) { deviceDesc.protocol = _deviceProtocol; deviceDesc.platform = NC_ANY_PLATFORM; - ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle1, deviceDesc, - watchdogInterval, firmwarePath)); + ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle1, deviceDesc, m_ncDeviceOpenParams)); // Till we don't have multiple device support, this function would try to open same device - ASSERT_ERROR(ncDeviceOpen(&deviceHandle2, deviceDesc, - watchdogInterval, firmwarePath)); + ASSERT_ERROR(ncDeviceOpen(&deviceHandle2, deviceDesc, m_ncDeviceOpenParams)); - ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle1)); + ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle1, m_watchdogHndl)); } @@ -284,22 +279,20 @@ TEST_P(MvncOpenDevice, OpenTwiceWithOneXLinkInitializion) { deviceDesc.protocol = _deviceProtocol; deviceDesc.platform = NC_ANY_PLATFORM; - ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, - watchdogInterval, firmwarePath)); + ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, m_ncDeviceOpenParams)); actDeviceName = deviceHandle->private_data->dev_addr; ASSERT_TRUE(isSameProtocolDevice(actDeviceName, _deviceProtocol)); - ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle)); + ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle, m_watchdogHndl)); // Second open - ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, - watchdogInterval, firmwarePath)); + ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, m_ncDeviceOpenParams)); actDeviceName = deviceHandle->private_data->dev_addr; ASSERT_TRUE(isSameProtocolDevice(actDeviceName, _deviceProtocol)); - ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle)); + ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle, m_watchdogHndl)); } //------------------------------------------------------------------------------ @@ -312,7 +305,7 @@ TEST_P(MvncLoggingTests, ShouldNotPrintErrorMessagesIfCanNotOpenDevice) { setLogLevel(MVLOG_INFO); ncDeviceHandle_t * deviceHandle = nullptr; - ASSERT_ERROR(ncDeviceOpen(&deviceHandle, _deviceDesc, watchdogInterval, firmwarePath)); + ASSERT_ERROR(ncDeviceOpen(&deviceHandle, _deviceDesc, m_ncDeviceOpenParams)); std::string content(buff); for (int i = MVLOG_WARN; i < MVLOG_LAST; i++) { @@ -416,7 +409,7 @@ TEST_P(MvncGraphAllocations, DISABLED_AllocateGraphsOn2DevicesParallel) { */ TEST_F(MvncCloseDevice, EmptyDeviceHandler) { ncDeviceHandle_t *deviceHandle = nullptr; - ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle)); + ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle, m_watchdogHndl)); } /** @@ -441,7 +434,7 @@ TEST_F(MvncCloseDevice, EmptyFieldsOfDeviceHandle) { deviceHandlePtr = dH.get(); } - ASSERT_EQ(ncDeviceClose(&deviceHandlePtr), NC_INVALID_PARAMETERS); + ASSERT_EQ(ncDeviceClose(&deviceHandlePtr, m_watchdogHndl), NC_INVALID_PARAMETERS); } //------------------------------------------------------------------------------ @@ -509,7 +502,7 @@ TEST_P(MvncInference, DISABLED_DoOneIterationOfInference) { ASSERT_NO_ERROR(ncGraphDestroy(&_graphHandle[0])); - ASSERT_NO_ERROR(ncDeviceClose(&_deviceHandle[0])); + ASSERT_NO_ERROR(ncDeviceClose(&_deviceHandle[0], m_watchdogHndl)); } diff --git a/inference-engine/thirdparty/movidius/mvnc/tests/mvnc_tests_usb.cpp b/inference-engine/thirdparty/movidius/mvnc/tests/mvnc_tests_usb.cpp index a5cdea04cec..53a1db21e1a 100644 --- a/inference-engine/thirdparty/movidius/mvnc/tests/mvnc_tests_usb.cpp +++ b/inference-engine/thirdparty/movidius/mvnc/tests/mvnc_tests_usb.cpp @@ -27,13 +27,13 @@ TEST_F(MvncOpenUSBDevice, ShouldOpenDeviceAfterChangeConnectTimeoutFromZero) { deviceDesc.platform = NC_ANY_PLATFORM; ASSERT_NO_ERROR(ncSetDeviceConnectTimeout(0)); - ASSERT_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath)); + ASSERT_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, m_ncDeviceOpenParams)); std::this_thread::sleep_for(3_sec); ASSERT_NO_ERROR(ncDeviceResetAll()); ASSERT_NO_ERROR(ncSetDeviceConnectTimeout(30)); - ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, watchdogInterval, firmwarePath)); - ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle)); + ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle, deviceDesc, m_ncDeviceOpenParams)); + ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle, m_watchdogHndl)); ASSERT_NO_ERROR(ncDeviceResetAll()); } @@ -44,8 +44,8 @@ TEST_F(MvncOpenUSBDevice, WithCustomFirmware) { GTEST_SKIP(); // Use custom firmware dir path as parameter for ncDeviceOpen - ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, watchdogInterval, firmwarePath)); - ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_)); + ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, m_ncDeviceOpenParams)); + ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_, m_watchdogHndl)); } /** @@ -58,10 +58,10 @@ TEST_F(MvncOpenUSBDevice, AllAvailableDevices) { ncDeviceHandle_t * deviceHandles[MAX_DEVICES] = {nullptr}; for (int index = 0; index < availableDevices_; ++index) { - ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandles[index], deviceDesc_, watchdogInterval, firmwarePath)); + ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandles[index], deviceDesc_, m_ncDeviceOpenParams)); } for (int index = 0; index < availableDevices_; ++index) { - ASSERT_NO_ERROR(ncDeviceClose(&deviceHandles[index])); + ASSERT_NO_ERROR(ncDeviceClose(&deviceHandles[index], m_watchdogHndl)); } } @@ -78,7 +78,7 @@ TEST_F(MvncOpenUSBDevice, AllAvailableMultiThreads) { for (int i = 0; i < availableDevices_; ++i) { requests[i] = std::thread([i, &rc, &deviceHandle, this]() { - rc[i] = ncDeviceOpen(&deviceHandle[i], deviceDesc_, watchdogInterval, firmwarePath); + rc[i] = ncDeviceOpen(&deviceHandle[i], deviceDesc_, m_ncDeviceOpenParams); }); } @@ -88,7 +88,7 @@ TEST_F(MvncOpenUSBDevice, AllAvailableMultiThreads) { } for (int i = 0; i < availableDevices_; ++i) { - ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle[i])); + ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle[i], m_watchdogHndl)); } } @@ -102,7 +102,8 @@ TEST_F(MvncOpenUSBDevice, WithInvalidFirmwarePath) { const char invalidPath[MAX_PATH] = "./InvalidPath/"; // Use custom firmware dir path as parameter for ncDeviceOpen - ASSERT_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, watchdogInterval, invalidPath)); + m_ncDeviceOpenParams.customFirmwareDirectory = invalidPath; + ASSERT_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, m_ncDeviceOpenParams)); ASSERT_EQ(deviceHandle_, nullptr); } @@ -119,12 +120,12 @@ TEST_F(MvncOpenUSBDevice, OpenAvailableDeviceByName) { ASSERT_TRUE(availableDevices.size()); strncpy(deviceDesc_.name, availableDevices[0].c_str(), NC_MAX_NAME_SIZE); - ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, watchdogInterval, firmwarePath)); + ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, m_ncDeviceOpenParams)); ASSERT_NO_ERROR(ncDeviceGetOption(deviceHandle_, NC_RO_DEVICE_NAME, dev_addr_open, &data_lenght)); ASSERT_TRUE(strncmp(dev_addr_open, deviceDesc_.name, NC_MAX_NAME_SIZE) == 0); - ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_)); + ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_, m_watchdogHndl)); } TEST_F(MvncOpenUSBDevice, ErrorWhenWrongDeviceName) { @@ -138,7 +139,7 @@ TEST_F(MvncOpenUSBDevice, ErrorWhenWrongDeviceName) { auto availableDevices = getDevicesList(); ASSERT_TRUE(availableDevices.size()); - ASSERT_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, watchdogInterval, firmwarePath)); + ASSERT_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, m_ncDeviceOpenParams)); } TEST_F(MvncOpenUSBDevice, OpenTwiceSameHandlerByName) { @@ -156,16 +157,16 @@ TEST_F(MvncOpenUSBDevice, OpenTwiceSameHandlerByName) { ASSERT_TRUE(availableDevices.size()); strncpy(deviceDesc_.name, availableDevices[0].c_str(), NC_MAX_NAME_SIZE); - ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, watchdogInterval, firmwarePath)); + ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, m_ncDeviceOpenParams)); ASSERT_NO_ERROR(ncDeviceGetOption(deviceHandle_, NC_RO_DEVICE_NAME, dev_addr_first_open, &data_lenght_first)); // Second open, get device name - ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, watchdogInterval, firmwarePath)); + ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, m_ncDeviceOpenParams)); ASSERT_NO_ERROR(ncDeviceGetOption(deviceHandle_, NC_RO_DEVICE_NAME, dev_addr_second_open, &data_lenght_second)); - ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_)); + ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_, m_watchdogHndl)); // Should be the same device ASSERT_STREQ(dev_addr_first_open, dev_addr_second_open); } @@ -188,7 +189,7 @@ TEST_F(MvncOpenUSBDevice, CheckErrorWhenPlatformConflictWithName) { strncpy(deviceDesc_.name, availableDevices[0].c_str(), NC_MAX_NAME_SIZE); deviceDesc_.platform = wrongPlatform; - ASSERT_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, watchdogInterval, firmwarePath)); + ASSERT_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, m_ncDeviceOpenParams)); } //------------------------------------------------------------------------------ @@ -200,7 +201,7 @@ TEST_F(MvncCloseUSBDevice, USBDeviceWillBeAvailableRightAfterClosing) { GTEST_SKIP(); ASSERT_NO_ERROR(ncDeviceOpen( - &deviceHandle_, deviceDesc_, watchdogInterval, firmwarePath)); + &deviceHandle_, deviceDesc_, m_ncDeviceOpenParams)); ASSERT_TRUE(deviceHandle_); @@ -210,7 +211,7 @@ TEST_F(MvncCloseUSBDevice, USBDeviceWillBeAvailableRightAfterClosing) { }; strcpy(deviceDesc_.name, deviceHandle_->private_data->dev_addr); - ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_)); + ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_, m_watchdogHndl)); deviceDesc_t foundDevice = {}; XLinkError_t rc = XLinkFindFirstSuitableDevice( @@ -229,7 +230,7 @@ TEST_P(MvncDevicePlatform, OpenAndClose) { if (available_myriad2_ == 0 || available_myriadX_ == 0) GTEST_SKIP(); - ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, watchdogInterval, firmwarePath)); + ASSERT_NO_ERROR(ncDeviceOpen(&deviceHandle_, deviceDesc_, m_ncDeviceOpenParams)); char deviceName[MAX_DEV_NAME]; unsigned int size = MAX_DEV_NAME; @@ -237,7 +238,7 @@ TEST_P(MvncDevicePlatform, OpenAndClose) { EXPECT_TRUE(isSamePlatformUSBDevice(deviceName, devicePlatform_)); - ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_)); + ASSERT_NO_ERROR(ncDeviceClose(&deviceHandle_, m_watchdogHndl)); } diff --git a/model-optimizer/automation/package_BOM.txt b/model-optimizer/automation/package_BOM.txt index d7684481d14..01318af1f8f 100644 --- a/model-optimizer/automation/package_BOM.txt +++ b/model-optimizer/automation/package_BOM.txt @@ -74,6 +74,7 @@ extensions/front/ATenToEmbeddingBag.py extensions/front/AttributedGatherNormalizer.py extensions/front/AttributedPadToPad.py extensions/front/binary_quantize_normalization.py +extensions/front/broadcast_with_range.py extensions/front/caffe/__init__.py extensions/front/caffe/accum_ext.py extensions/front/caffe/argmax_ext.py @@ -114,7 +115,6 @@ extensions/front/caffe/relu_ext.py extensions/front/caffe/reorgyolo_ext.py extensions/front/caffe/resample_ext.py extensions/front/caffe/reshape.py -extensions/front/caffe/ShuffleChannel.py extensions/front/caffe/shufflechannel_ext.py extensions/front/caffe/sigmoid.py extensions/front/caffe/simplernms_ext.py @@ -173,6 +173,8 @@ extensions/front/mxnet/clip_ext.py extensions/front/mxnet/conv_ext.py extensions/front/mxnet/copy_ext.py extensions/front/mxnet/crop_ext.py +extensions/front/mxnet/cumsum.py +extensions/front/mxnet/cumsum_ext.py extensions/front/mxnet/custom.py extensions/front/mxnet/custom_rpn_proposal.py extensions/front/mxnet/deformable_conv_ext.py @@ -245,6 +247,7 @@ extensions/front/onnx/constant_of_shape_ext.py extensions/front/onnx/constant_of_shape_to_broadcast.py extensions/front/onnx/conv_ext.py extensions/front/onnx/crop_ext.py +extensions/front/onnx/cumsum_ext.py extensions/front/onnx/deformable_conv_ext.py extensions/front/onnx/detection_output.py extensions/front/onnx/detectionoutput_ext.py @@ -286,6 +289,7 @@ extensions/front/onnx/priorgridgenerator_ext.py extensions/front/onnx/proposal_ext.py extensions/front/onnx/quantize_dequantize_linear.py extensions/front/onnx/quantize_ext.py +extensions/front/onnx/range_ext.py extensions/front/onnx/reduce_max_ext.py extensions/front/onnx/reduce_mean_ext.py extensions/front/onnx/reduce_min_ext.py @@ -350,6 +354,7 @@ extensions/front/tf/crop_and_resize_ext.py extensions/front/tf/CropAndResizeReplacement.py extensions/front/tf/CTCGreedyDecoder.py extensions/front/tf/CTCGreedyDecoder_ext.py +extensions/front/tf/cumsum_ext.py extensions/front/tf/deconv_ext.py extensions/front/tf/depth_to_space.py extensions/front/tf/elementwise_ext.py @@ -399,6 +404,7 @@ extensions/front/tf/placeholder_ext.py extensions/front/tf/placeholder_with_default_ext.py extensions/front/tf/pooling_ext.py extensions/front/tf/prelu.py +extensions/front/tf/range_ext.py extensions/front/tf/reduce_ext.py extensions/front/tf/reshape_related_ext.py extensions/front/tf/resize_bilinear.py @@ -448,6 +454,7 @@ extensions/front/tf/unique_ext.py extensions/front/tf/UnpackPackReverseInputChannels.py extensions/front/tf/variable_ext.py extensions/front/tf/variables_values_freezing.py +extensions/front/tf/WhereDecomposition.py extensions/front/tf/yolo_v1.json extensions/front/tf/yolo_v1_tiny.json extensions/front/tf/yolo_v2.json @@ -591,6 +598,7 @@ extensions/ops/constant_fill.py extensions/ops/copyop.py extensions/ops/correlation.py extensions/ops/ctc_greedy_decoder.py +extensions/ops/cumsum.py extensions/ops/data_augmentation.py extensions/ops/depth_to_space.py extensions/ops/DetectionOutput.py diff --git a/model-optimizer/extensions/front/broadcast_with_range.py b/model-optimizer/extensions/front/broadcast_with_range.py new file mode 100644 index 00000000000..0bb73b1e2e4 --- /dev/null +++ b/model-optimizer/extensions/front/broadcast_with_range.py @@ -0,0 +1,85 @@ +""" + Copyright (C) 2018-2020 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +import numpy as np + +from extensions.ops.gather import Gather +from extensions.ops.range import Range +from mo.front.common.partial_infer.utils import int64_array +from mo.front.common.replacement import FrontReplacementSubgraph +from mo.front.tf.graph_utils import create_op_with_const_inputs, create_op_node_with_second_input +from mo.graph.graph import Graph, rename_nodes, Node +from mo.ops.unsqueeze import Unsqueeze + + +class ExpandRangeConstant(FrontReplacementSubgraph): + """ + Searches for Constant operations filled with range values starting from 0 and replaces it with Range operation + Faced in ONNX BERT -- replacing it makes model reshape-able by sequence length + + WARNING: true BIDIRECTIONAL mode of Broadcast could cause issues + (the probability is small, so we decided to keep the optimization) + + value_input[1, X] (value=range(0,X)) shape_input[Y, 1] + \ / + Broadcast(mode='bidirectional') [Y, X] + """ + enabled = True + + def find_and_replace_pattern(self, graph: Graph): + for node in graph.get_op_nodes(type='Broadcast'): + value = node.in_port(0).get_source().node + if value.soft_get('type') == 'Const': + self.replace(node, value) + + @staticmethod + def replace(node: Node, const: Node): + graph = node.graph + shape = const.shape + const_name = const.soft_get('name', const.id) + + non_one_dims = np.argwhere(shape != 1).flatten() + one_dims = np.argwhere(shape == 1).flatten() + + if not (non_one_dims.size == 1 and 5 < np.prod(shape) < 500): + # (5;500) range is deduced to affect less models + return + + value = const.value + if not np.array_equal(np.arange(0, np.prod(shape), 1).reshape(shape), value): + return + + positive_idx = non_one_dims.item(0) + negative_idx = positive_idx - len(shape) + gather = create_op_with_const_inputs(graph, Gather, {1: int64_array(negative_idx), 2: int64_array(0)}, + {'name': node.soft_get('name', node.id) + '/BroadcastingDim'}) + + range_node = create_op_with_const_inputs(graph, Range, + {0: np.array(0, dtype=value.dtype), + 2: np.array(1, dtype=value.dtype)}, + {'name': const_name + '/Range', 'dtype': value.dtype}) + + node.in_port(1).get_connection().add_destination(gather.in_port(0)) + gather.out_port(0).connect(range_node.in_port(1)) + node.in_port(0).get_connection().set_source(range_node.out_port(0)) + + if one_dims.size: + unsqueeze = create_op_node_with_second_input(graph, Unsqueeze, one_dims, + {'name': const_name + '/KeepShape'}) + range_node.out_port(0).get_connection().insert_node(unsqueeze) + rename_nodes([(const, const_name + '/ToBeDeleted'), (unsqueeze, const_name)]) + else: + rename_nodes([(const, const_name + '/ToBeDeleted'), (range_node, const_name)]) diff --git a/model-optimizer/extensions/front/broadcast_with_range_test.py b/model-optimizer/extensions/front/broadcast_with_range_test.py new file mode 100644 index 00000000000..2c1ef540e86 --- /dev/null +++ b/model-optimizer/extensions/front/broadcast_with_range_test.py @@ -0,0 +1,75 @@ +""" + Copyright (C) 2018-2020 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +import unittest + +import numpy as np + +from extensions.front.broadcast_with_range import ExpandRangeConstant +from mo.utils.ir_engine.compare_graphs import compare_graphs +from mo.utils.unittest.graph import build_graph, result, regular_op_with_shaped_data, valued_const_with_data, connect, \ + regular_op_with_empty_data, connect_data + + +class TestRangeBroadcast(unittest.TestCase): + def test_broadcast_with_range_positive_test(self): + graph = build_graph({ + **regular_op_with_shaped_data('shape', [2], {'type': 'Parameter'}), + **valued_const_with_data('value', np.arange(0, 384).reshape((1, 384))), + **regular_op_with_empty_data('bc', {'type': 'Broadcast'}), + **result(), + }, [ + *connect('value', '0:bc'), + *connect('shape', '1:bc'), + *connect('bc', 'output'), + ], nodes_with_edges_only=True) + ExpandRangeConstant().find_and_replace_pattern(graph) + + graph_ref = build_graph({ + **regular_op_with_shaped_data('shape', [2], {'type': 'Parameter'}), + + # start + **valued_const_with_data('start', np.array(0)), + # limit + **valued_const_with_data('minus_one', np.array(-1)), + **valued_const_with_data('zero', np.array(0)), + **regular_op_with_empty_data('range_dim', {'type': 'Gather'}), + # delta + **valued_const_with_data('delta', np.array(1)), + **regular_op_with_empty_data('range', {'type': 'Range'}), + + # keep dims + **valued_const_with_data('axes', np.array([0])), + **regular_op_with_empty_data('keep_shape', {'type': 'Unsqueeze'}), + + **regular_op_with_empty_data('bc', {'type': 'Broadcast'}), + **result(), + }, [ + *connect('start', '0:range'), + *connect('shape', '0:range_dim'), + *connect('minus_one', '1:range_dim'), + *connect('zero', '2:range_dim'), + *connect('range_dim', '1:range'), + *connect('delta', '2:range'), + *connect('range', '0:keep_shape'), + *connect('axes', '1:keep_shape'), + *connect('keep_shape', '0:bc'), + *connect_data('shape', '1:bc'), + *connect('bc', 'output'), + ], nodes_with_edges_only=True) + + (flag, resp) = compare_graphs(graph, graph_ref, 'output', check_op_attrs=True) + self.assertTrue(flag, resp) diff --git a/model-optimizer/extensions/front/caffe/ShuffleChannel.py b/model-optimizer/extensions/front/caffe/ShuffleChannel.py deleted file mode 100644 index 07c815b579a..00000000000 --- a/model-optimizer/extensions/front/caffe/ShuffleChannel.py +++ /dev/null @@ -1,90 +0,0 @@ -""" - Copyright (C) 2018-2020 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -""" - -import numpy as np - -from extensions.ops.Cast import Cast -from extensions.ops.elementwise import Div -from extensions.ops.transpose import Transpose -from mo.front.common.partial_infer.utils import int64_array -from mo.front.common.replacement import FrontReplacementPattern -from mo.front.tf.graph_utils import create_op_node_with_second_input -from mo.graph.graph import Graph, Node, rename_node -from mo.ops.const import Const -from mo.ops.reshape import Reshape -from mo.ops.shape import Shape -from mo.utils.shape import node_to_get_features_dimension_value, node_to_get_batch_value, \ - new_shape_node_from_shape_nodes - - -class ShuffleChannel(FrontReplacementPattern): - """ - Before: - ShuffleChannel(group) - - After: - Reshape[input_batch, group, input_channels/group, -1] - \/ - Transpose[0, 2, 1, 3] - \/ - Reshape[input_shape] - """ - enabled = True - graph_condition = [lambda graph: graph.graph['layout'] == 'NCHW'] - - @staticmethod - def decompose_shuffle_channel(node: Node): - graph = node.graph - name = node.soft_get('name', node.id) - - rename_node(node, name + '/to_be_removed') - - shape = Shape(graph, dict(name=name + '/InputShape')).create_node() - shape.in_port(0).connect(node.in_port(0).get_source()) - - # Reshape [input_batch, group, input_channels/group, -1] - batch = node_to_get_batch_value(shape) - group = Const(graph, dict(name=name + '/Rows', value=int64_array([node.group]))).create_node() - const = Const(graph, dict(name=name + '/Const', value=int64_array([-1]))).create_node() - - input_channels = node_to_get_features_dimension_value(shape) - output_channels = create_op_node_with_second_input( - graph, Div, np.int64(node.group), {'name': name + '/Cols'}, input_node=input_channels) - i_output_channels = Cast(graph, {'name': output_channels.name + '/Convert', 'dst_type': np.int64}).create_node() - output_channels.out_port(0).connect(i_output_channels.in_port(0)) - - reshape_split_dim = new_shape_node_from_shape_nodes([batch, group, i_output_channels, const]) - reshape_split_node = Reshape(graph, dict(name=name + '/Reshape_split_')).create_node() - reshape_split_dim.out_port(0).connect(reshape_split_node.in_port(1)) - - # Transpose(0, 2, 1, 3) - transpose_node = create_op_node_with_second_input( - graph, Transpose, int64_array([0, 2, 1, 3]), {'name': name + '/Transpose_'}, input_node=reshape_split_node) - - # Reshape back to input shape - reshape_concat = Reshape(graph, dict(name=name)).create_node() - rename_node(reshape_concat, name) - - shape.out_port(0).connect(reshape_concat.in_port(1)) - transpose_node.out_port(0).connect(reshape_concat.in_port(0)) - - # Final connections - node.in_port(0).get_connection().set_destination(reshape_split_node.in_port(0)) - node.out_port(0).get_connection().set_source(reshape_concat.out_port(0)) - - def find_and_replace_pattern(self, graph: Graph): - for shuffle_channel in graph.get_op_nodes(op='ShuffleChannel'): - self.decompose_shuffle_channel(shuffle_channel) diff --git a/model-optimizer/extensions/front/caffe/ShuffleChannel_test.py b/model-optimizer/extensions/front/caffe/ShuffleChannel_test.py deleted file mode 100644 index 06f52a68b1f..00000000000 --- a/model-optimizer/extensions/front/caffe/ShuffleChannel_test.py +++ /dev/null @@ -1,100 +0,0 @@ -""" - Copyright (C) 2018-2020 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -""" -import unittest - -from extensions.front.caffe.ShuffleChannel import ShuffleChannel -from mo.front.common.partial_infer.utils import int64_array -from mo.graph.graph import Node -from mo.utils.ir_engine.compare_graphs import compare_graphs -from mo.utils.unittest.graph import build_graph - -nodes_attributes = { - 'placeholder': {'kind': 'op', 'op': 'Parameter', 'shape': int64_array([1, 48, 28, 28])}, - 'shuffle_channel': {'kind': 'op', 'op': 'ShuffleChannel', 'group': int64_array(2), 'name': 'scname'}, - 'result': {'kind': 'op', 'op': 'Result'}, - - 'shape': {'op': 'ShapeOf', 'kind': 'op'}, - 'batch_gather': {'op': 'Gather', 'kind': 'op'}, - 'batch_gather_idx': {'value': int64_array([0]), 'kind': 'op', 'type': 'Const'}, - 'batch_gather_axis': {'value': int64_array(0), 'kind': 'op', 'type': 'Const'}, - - 'group': {'value': int64_array([2]), 'kind': 'op', 'type': 'Const'}, - - 'channel_gather': {'op': 'Gather', 'kind': 'op'}, - 'channel_gather_idx': {'value': int64_array([1]), 'kind': 'op', 'type': 'Const'}, - 'channel_gather_axis': {'value': int64_array(0), 'kind': 'op', 'type': 'Const'}, - - 'output_channels': {'op': 'Div', 'kind': 'op'}, - 'div_group': {'value': int64_array([2]), 'kind': 'op', 'type': 'Const'}, - 'convert': {'op': 'Cast', 'kind': 'op'}, - 'const': {'value': int64_array([-1]), 'kind': 'op', 'type': 'Const'}, - 'concat': {'op': 'Concat', 'kind': 'op'}, - 'reshape_split': {'op': 'Reshape', 'kind': 'op'}, - 'transpose': {'op': 'Transpose', 'kind': 'op'}, - 'transpose_const': {'value': int64_array([0, 2, 1, 3]), 'kind': 'op', 'type': 'Const'}, - 'reshape_concat': {'op': 'Reshape', 'kind': 'op'} -} - - -class ShuffleChannelTests(unittest.TestCase): - def test_1(self): - graph = build_graph(nodes_attributes, - [ - ('placeholder', 'shuffle_channel'), - ('shuffle_channel', 'result') - ], - nodes_with_edges_only=True) - graph.graph['layout'] = 'NCHW' - graph.stage = 'front' - - ref_graph = build_graph(nodes_attributes, - [ - ('placeholder', 'shape', {'in': 0, 'out': 0}), - - ('shape', 'batch_gather', {'in': 0, 'out': 0}), - ('batch_gather_idx', 'batch_gather', {'in': 1, 'out': 0}), - ('batch_gather_axis', 'batch_gather', {'in': 2, 'out': 0}), - - ('shape', 'channel_gather', {'in': 0, 'out': 0}), - ('channel_gather_idx', 'channel_gather', {'in': 1, 'out': 0}), - ('channel_gather_axis', 'channel_gather', {'in': 2, 'out': 0}), - - ('channel_gather', 'output_channels', {'in': 0, 'out': 0}), - ('div_group', 'output_channels', {'in': 1, 'out': 0}), - ('output_channels', 'convert', {'in': 0, 'out': 0}), - - ('batch_gather', 'concat', {'in': 0, 'out': 0}), - ('group', 'concat', {'in': 1, 'out': 0}), - ('convert', 'concat', {'in': 2, 'out': 0}), - ('const', 'concat', {'in': 3, 'out': 0}), - - ('placeholder', 'reshape_split', {'in': 0, 'out': 0}), - ('concat', 'reshape_split', {'in': 1, 'out': 0}), - - ('reshape_split', 'transpose', {'in': 0, 'out': 0}), - ('transpose_const', 'transpose', {'in': 1, 'out': 0}), - - ('transpose', 'reshape_concat', {'in': 0, 'out': 0}), - ('shape', 'reshape_concat', {'in': 1, 'out': 0}), - - ('reshape_concat', 'result') - ], - nodes_with_edges_only=True) - - ShuffleChannel().find_and_replace_pattern(graph) - (flag, resp) = compare_graphs(graph, ref_graph, 'result', check_op_attrs=True) - self.assertTrue(flag, resp) - self.assertTrue(Node(graph, 'result').in_port(0).get_source().node.name == 'scname') diff --git a/model-optimizer/extensions/front/caffe/shufflechannel_ext.py b/model-optimizer/extensions/front/caffe/shufflechannel_ext.py index e2acd71ae24..2c00c50fbaa 100644 --- a/model-optimizer/extensions/front/caffe/shufflechannel_ext.py +++ b/model-optimizer/extensions/front/caffe/shufflechannel_ext.py @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. """ -from extensions.ops.shufflechannel import ShuffleChannelOp +from extensions.ops.shufflechannel import ShuffleChannels from mo.front.caffe.collect_attributes import collect_attributes from mo.front.common.extractors.utils import layout_attrs from mo.front.extractor import FrontExtractorOp @@ -29,5 +29,5 @@ class ShuffleChannelFrontExtractor(FrontExtractorOp): mapping_rule.update(layout_attrs()) # update the attributes of the node - ShuffleChannelOp.update_node_stat(node, mapping_rule) + ShuffleChannels.update_node_stat(node, mapping_rule) return cls.enabled diff --git a/model-optimizer/extensions/front/mxnet/cumsum.py b/model-optimizer/extensions/front/mxnet/cumsum.py new file mode 100644 index 00000000000..db98c65bc73 --- /dev/null +++ b/model-optimizer/extensions/front/mxnet/cumsum.py @@ -0,0 +1,47 @@ +""" + Copyright (C) 2018-2020 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" +from extensions.ops.Cast import Cast +from extensions.ops.cumsum import CumSum +from mo.front.common.partial_infer.utils import int64_array +from mo.front.common.replacement import FrontReplacementOp +from mo.front.mxnet.extractors.utils import get_mxnet_layer_attrs, mxnet_str_dtype_to_np +from mo.front.tf.graph_utils import create_op_node_with_second_input +from mo.graph.graph import Graph, rename_node, Node +from mo.ops.const import Const + + +class CumSumFrontReplacer(FrontReplacementOp): + op = 'MXNetCumSum' + enabled = True + + def replace_op(self, graph: Graph, node: Node): + name = node.soft_get('name', node.id) + axis = node.soft_get('axis', 0) + + rename_node(node=node, name=name + '/to_be_removed') + cumsum_node = create_op_node_with_second_input(graph, CumSum, int64_array(axis), + {'name': name, 'reverse': False, 'exclusive': False}) + rename_node(cumsum_node, name) + + node.in_port(0).get_connection().set_destination(cumsum_node.in_port(0)) + if node.has_valid('mx_out_type') and node['mx_out_type'] is not None: + rename_node(node=cumsum_node, name=name + '/Clamp') + convert = Cast(graph, {'name': name, 'dst_type': node['mx_out_type']}).create_node() + rename_node(convert, name) + cumsum_node.out_port(0).connect(convert.in_port(0)) + return [convert.id] + else: + return [cumsum_node.id] diff --git a/model-optimizer/extensions/front/mxnet/cumsum_ext.py b/model-optimizer/extensions/front/mxnet/cumsum_ext.py new file mode 100644 index 00000000000..4ee3bcc35fb --- /dev/null +++ b/model-optimizer/extensions/front/mxnet/cumsum_ext.py @@ -0,0 +1,37 @@ +""" + Copyright (C) 2018-2020 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +import numpy as np +from extensions.ops.cumsum import MXNetCumSum +from mo.front.extractor import FrontExtractorOp +from mo.front.mxnet.extractors.utils import get_mxnet_layer_attrs, mxnet_str_dtype_to_np + + +class CumSumExtractor(FrontExtractorOp): + op = '_np_cumsum' + enabled = True + + @classmethod + def extract(cls, node): + attrs = get_mxnet_layer_attrs(node.symbol_dict) + + update_attrs = { + 'axis': attrs.int('axis', 0), + 'mx_out_type': attrs.dtype('dtype', None) + } + + MXNetCumSum.update_node_stat(node, update_attrs) + return cls.enabled diff --git a/model-optimizer/extensions/front/onnx/cumsum_ext.py b/model-optimizer/extensions/front/onnx/cumsum_ext.py new file mode 100644 index 00000000000..d6b55f62156 --- /dev/null +++ b/model-optimizer/extensions/front/onnx/cumsum_ext.py @@ -0,0 +1,31 @@ +""" + Copyright (C) 2018-2020 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +from extensions.ops.cumsum import CumSum +from mo.front.extractor import FrontExtractorOp +from mo.front.onnx.extractors.utils import onnx_attr + + +class CumSumFrontExtractor(FrontExtractorOp): + op = 'CumSum' + enabled = True + + @classmethod + def extract(cls, node): + exclusive = onnx_attr(node, 'exclusive', 'i', 0) + reverse = onnx_attr(node, 'reverse', 'i', 0) + CumSum.update_node_stat(node, {'exclusive': exclusive, 'reverse': reverse}) + return cls.enabled diff --git a/model-optimizer/extensions/front/onnx/range_ext.py b/model-optimizer/extensions/front/onnx/range_ext.py new file mode 100644 index 00000000000..a4cf3b32bc5 --- /dev/null +++ b/model-optimizer/extensions/front/onnx/range_ext.py @@ -0,0 +1,29 @@ +""" + Copyright (C) 2018-2020 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" +from extensions.ops.range import Range +from mo.front.extractor import FrontExtractorOp +from mo.graph.graph import Node + + +class RangeFrontExtractor(FrontExtractorOp): + op = 'Range' + enabled = True + + @classmethod + def extract(cls, node: Node): + Range.update_node_stat(node, {}) + return cls.enabled + diff --git a/model-optimizer/extensions/front/tf/WhereDecomposition.py b/model-optimizer/extensions/front/tf/WhereDecomposition.py new file mode 100644 index 00000000000..656c015b0cf --- /dev/null +++ b/model-optimizer/extensions/front/tf/WhereDecomposition.py @@ -0,0 +1,48 @@ +""" + Copyright (C) 2020 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +import numpy as np + +from extensions.ops.non_zero import NonZero +from extensions.ops.transpose import Transpose +from mo.front.common.partial_infer.utils import int64_array +from mo.front.common.replacement import FrontReplacementOp +from mo.front.tf.graph_utils import create_op_node_with_second_input +from mo.graph.graph import Node, Graph, rename_nodes + + +class WhereDecomposition(FrontReplacementOp): + """ + This transformation decomposes the TF layer Where (when x = None, y = None) using the formula + Where(condition) = Transpose(NonZero(condition), [1, 0]) + """ + op = 'Where' + enabled = True + + def run_after(self): + from extensions.front.tf.sparse_weighted_sum import ExperimentalSparseWeightedSumFrontReplacer + from extensions.front.TransposeOrderNormalizer import TransposeOrderNormalizer + return [ExperimentalSparseWeightedSumFrontReplacer, TransposeOrderNormalizer] + + def replace_op(self, graph: Graph, node: Node): + node_name = node.soft_get('name', node.id) + non_zero_node = NonZero(graph, {'name': node_name + '/NonZero_', 'output_type': np.int64}).create_node() + transpose_node = create_op_node_with_second_input(graph, Transpose, int64_array([1, 0]), op_attrs={}) + non_zero_node.out_port(0).connect(transpose_node.in_port(0)) + rename_nodes([(node, node_name + '/delete'), (transpose_node, node_name)]) + + non_zero_node.in_port(0).connect(node.in_port(0).get_source()) + return [transpose_node.id] diff --git a/model-optimizer/extensions/front/tf/WhereDecomposition_test.py b/model-optimizer/extensions/front/tf/WhereDecomposition_test.py new file mode 100644 index 00000000000..432a619771a --- /dev/null +++ b/model-optimizer/extensions/front/tf/WhereDecomposition_test.py @@ -0,0 +1,98 @@ +""" + Copyright (C) 2020 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + + +import unittest + +import numpy as np + +from generator import generator, generate + +from extensions.front.tf.WhereDecomposition import WhereDecomposition +from mo.front.common.partial_infer.utils import int64_array +from mo.utils.ir_engine.compare_graphs import compare_graphs +from mo.utils.unittest.graph import build_graph + + +graph_node_attrs = { + 'placeholder': {'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'}, + 'placeholder_data': { + 'value': None, + 'shape': None, + 'kind': 'data', + 'data_type': None + }, + 'tf_where': {'op': 'Where', 'kind': 'op'}, + 'tf_where_data': {'kind': 'data'}, + 'output': {'kind': 'op', 'op': 'Result'}, +} + + +graph_edges = [ + ('placeholder', 'placeholder_data'), + ('placeholder_data', 'tf_where'), + ('tf_where', 'tf_where_data'), + ('tf_where_data', 'output'), +] + + +ref_graph_node_attrs = { + 'placeholder': {'type': 'Parameter', 'kind': 'op', 'op': 'Parameter'}, + 'placeholder_data': { + 'value': None, + 'shape': None, + 'kind': 'data', + 'data_type': None + }, + 'non_zero': {'kind': 'op', 'op': 'NonZero', 'output_type': np.int64}, + 'non_zero_data': {'kind': 'data'}, + 'transpose': {'kind': 'op', 'op': 'Transpose'}, + 'transpose_data': {'kind': 'data'}, + 'perm_const': {'kind': 'op', 'op': 'Const', 'shape': [2], 'value': int64_array([1, 0])}, + 'perm_const_data': {'kind': 'data', 'shape': [2], 'value': int64_array([1, 0])}, + 'output': {'kind': 'op', 'op': 'Result'}, +} + +ref_graph_edges = [ + ('placeholder', 'placeholder_data'), + ('placeholder_data', 'non_zero'), + ('non_zero', 'non_zero_data'), + ('non_zero_data', 'transpose', {'in': 0}), + ('perm_const', 'perm_const_data'), + ('perm_const_data', 'transpose', {'in': 1}), + ('transpose', 'transpose_data'), + ('transpose_data', 'output'), +] + + +@generator +class TFWhereDecompositionTest(unittest.TestCase): + @generate(*[[1, 100, 120, 150], [16, 125, 14]]) + def test_1(self, input_shape): + in_shape = int64_array(input_shape) + graph = build_graph(graph_node_attrs, + graph_edges, + update_attributes={ + 'placeholder_data': {'shape': in_shape} + }) + WhereDecomposition().find_and_replace_pattern(graph) + ref_graph = build_graph(ref_graph_node_attrs, + ref_graph_edges, + update_attributes={ + 'placeholder_data': {'shape': in_shape} + }) + (flag, resp) = compare_graphs(graph, ref_graph, 'output') + self.assertTrue(flag, resp) diff --git a/model-optimizer/extensions/front/tf/cumsum_ext.py b/model-optimizer/extensions/front/tf/cumsum_ext.py new file mode 100644 index 00000000000..53409e9f27c --- /dev/null +++ b/model-optimizer/extensions/front/tf/cumsum_ext.py @@ -0,0 +1,30 @@ +""" + Copyright (C) 2018-2020 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +from extensions.ops.cumsum import CumSum +from mo.front.extractor import FrontExtractorOp + + +class CumSumExtractor(FrontExtractorOp): + op = 'Cumsum' + enabled = True + + @classmethod + def extract(cls, node): + exclusive = node.pb.attr['exclusive'].b + reverse = node.pb.attr['reverse'].b + CumSum.update_node_stat(node, {'exclusive': exclusive, 'reverse': reverse}) + return cls.enabled diff --git a/model-optimizer/extensions/front/tf/range_ext.py b/model-optimizer/extensions/front/tf/range_ext.py new file mode 100644 index 00000000000..44d78556480 --- /dev/null +++ b/model-optimizer/extensions/front/tf/range_ext.py @@ -0,0 +1,30 @@ +""" + Copyright (C) 2018-2020 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" +from extensions.ops.range import Range +from mo.front.extractor import FrontExtractorOp +from mo.front.tf.extractors.utils import tf_dtype_extractor +from mo.graph.graph import Node + + +class RangeFrontExtractor(FrontExtractorOp): + op = 'Range' + enabled = True + + @classmethod + def extract(cls, node: Node): + Range.update_node_stat(node, {'dtype': tf_dtype_extractor(node.pb.attr['type'].type)}) + return cls.enabled + diff --git a/model-optimizer/extensions/middle/EltwiseChecker.py b/model-optimizer/extensions/middle/EltwiseChecker.py index 9acd563f700..b42941aa032 100644 --- a/model-optimizer/extensions/middle/EltwiseChecker.py +++ b/model-optimizer/extensions/middle/EltwiseChecker.py @@ -14,8 +14,6 @@ limitations under the License. """ -import logging as log - import numpy as np from mo.graph.graph import Node, Graph @@ -43,51 +41,55 @@ class EltwiseChecker(MiddleReplacementPattern): for flag in flags: node[flag] = False - def find_and_replace_pattern(self, graph: Graph): + def mark_eltwise_node(self, node, feature_channel=None): + tensor_port, value_port = get_tensor_in_port(node), get_value_in_port(node) + if tensor_port is None or value_port is None: + self.set_flags_to_false(node, ['can_be_fused', 'can_be_scaleshift']) + return + + connected_in_ports = {idx: port for idx, port in node.in_ports().items() if not port.disconnected()} + if len(connected_in_ports) != 2: + return + + tensor_shape = tensor_port.data.get_shape() + out_shape = node.out_port(0).data.get_shape() + assert tensor_shape is not None and out_shape is not None + if not np.array_equal(tensor_shape, out_shape): + # ScaleShift operation doesn't support broadcasting + self.set_flags_to_false(node, ['can_be_fused', 'can_be_scaleshift']) + return + + value_shape = value_port.data.get_shape() + assert value_shape is not None + assert len(value_shape) <= len(tensor_shape), \ + "No broadcasting was done for elementwise node {} due to previous checks in EltwiseChecker class. " \ + "But constant input rank is larger than tensor input rank, that is inconsistent".format(node.name) + + # if both tensors are 0D they cannot be converted to scaleshift + if len(tensor_shape) == 0 and len(value_shape) == 0: + self.set_flags_to_false(node, ['can_be_scaleshift']) + return + + broadcasted_value_shape = np.insert(value_shape, 0, [1] * (len(tensor_shape) - len(value_shape))) + + feature_dim = min(1, tensor_shape.size - 1) if node.graph.graph['layout'] == 'NCHW' else -1 + if feature_channel is not None: + feature_dim = feature_channel + ones = np.ones(len(tensor_shape)) + possible_shape = ones.copy() + np.put(possible_shape, feature_dim, tensor_shape.item(feature_dim)) + + if not np.array_equal(broadcasted_value_shape, ones) and \ + not np.array_equal(broadcasted_value_shape, possible_shape): + # ScaleShift weights should have [1,C,1,1]-like or [1,1,1,1]-like shape + self.set_flags_to_false(node, ['can_be_fused', 'can_be_scaleshift']) + return + + if len(tensor_shape) not in [2, 4, 5]: + # ScaleShift operation is supported for 2D, 4D or 5D tensor inputs + self.set_flags_to_false(node, ['can_be_scaleshift']) + return + + def find_and_replace_pattern(self, graph: Graph, feature_channel=None): for node in graph.get_op_nodes(is_eltwise=True): - log.debug('Checking eltwise op {}'.format(node.soft_get('name', node.id))) - tensor_port, value_port = get_tensor_in_port(node), get_value_in_port(node) - if tensor_port is None or value_port is None: - self.set_flags_to_false(node, ['can_be_fused', 'can_be_scaleshift']) - continue - - connected_in_ports = {idx: port for idx, port in node.in_ports().items() if not port.disconnected()} - if len(connected_in_ports) != 2: - continue - - tensor_shape = tensor_port.data.get_shape() - out_shape = node.out_port(0).data.get_shape() - assert tensor_shape is not None and out_shape is not None - if not np.array_equal(tensor_shape, out_shape): - # ScaleShift operation doesn't support broadcasting - self.set_flags_to_false(node, ['can_be_fused', 'can_be_scaleshift']) - continue - - value_shape = value_port.data.get_shape() - assert value_shape is not None - assert len(value_shape) <= len(tensor_shape), \ - "No broadcasting was done for elementwise node {} due to previous checks in EltwiseChecker class. " \ - "But constant input rank is larger than tensor input rank, that is inconsistent".format(node.name) - - # if both tensors are 0D they cannot be converted to scaleshift - if len(tensor_shape) == 0 and len(value_shape) == 0: - self.set_flags_to_false(node, ['can_be_scaleshift']) - continue - - broadcasted_value_shape = np.insert(value_shape, 0, [1] * (len(tensor_shape) - len(value_shape))) - - feature_dim = min(1, tensor_shape.size - 1) if node.graph.graph['layout'] == 'NCHW' else -1 - ones = np.ones(len(tensor_shape)) - possible_shape = ones.copy() - np.put(possible_shape, feature_dim, tensor_shape.item(feature_dim)) - - if not np.array_equal(broadcasted_value_shape, ones) and \ - not np.array_equal(broadcasted_value_shape, possible_shape): - # ScaleShift weights should have [1,C,1,1]-like or [1,1,1,1]-like shape - self.set_flags_to_false(node, ['can_be_fused', 'can_be_scaleshift']) - continue - - if len(tensor_shape) not in [2, 4, 5]: - # ScaleShift operation is supported for 2D, 4D or 5D tensor inputs - self.set_flags_to_false(node, ['can_be_scaleshift']) - continue + self.mark_eltwise_node(node) diff --git a/model-optimizer/extensions/middle/quantize_fuses.py b/model-optimizer/extensions/middle/quantize_fuses.py index 27db169753d..a392cc20f47 100644 --- a/model-optimizer/extensions/middle/quantize_fuses.py +++ b/model-optimizer/extensions/middle/quantize_fuses.py @@ -13,10 +13,13 @@ See the License for the specific language governing permissions and limitations under the License. """ +import numpy as np + from extensions.middle.BinarizeWeightsM1P1 import BinarizeWeightsM1P1 from extensions.middle.DeleteControlFlowEdges import DeleteControlFlowEdges from extensions.middle.EltwiseChecker import EltwiseChecker from mo.graph.graph import Graph +from mo.middle.passes.fusing.helpers import get_value_in_port from mo.middle.replacement import MiddleReplacementPattern @@ -35,9 +38,27 @@ class MarkNodesToFuseUpToFakeQuantize(MiddleReplacementPattern): def run_before(self): return [] + @staticmethod + def mark_fusable_muls_on_weights(graph): + for node in graph.get_op_nodes(op='Mul'): + children = node.out_port(0).get_destinations() + if len(children) > 1 or children[0].node.soft_get('type') not in ['Convolution', 'Deconvolution', 'MatMul']: + continue + value_in_port = get_value_in_port(node) + if value_in_port is None: + continue + value_shape = value_in_port.data.get_shape() + non_one_axis = np.argwhere(value_shape != 1) + if non_one_axis.size != 1: + continue + non_one_axis = non_one_axis.item(0) + node['can_be_fused'] = True + EltwiseChecker().mark_eltwise_node(node, non_one_axis) + def find_and_replace_pattern(self, graph: Graph): # to prevent fusing of non per channel lin ops, we run EltwiseChecker to mark nodes with can_be_fused attribute EltwiseChecker().find_and_replace_pattern(graph) + self.mark_fusable_muls_on_weights(graph) eltwise_nodes = graph.get_op_nodes(op='Mul', can_be_fused=True) + \ graph.get_op_nodes(op='Sub', can_be_fused=True) + \ graph.get_op_nodes(op='Add', can_be_fused=True) diff --git a/model-optimizer/extensions/ops/cumsum.py b/model-optimizer/extensions/ops/cumsum.py new file mode 100644 index 00000000000..6cc192ac90b --- /dev/null +++ b/model-optimizer/extensions/ops/cumsum.py @@ -0,0 +1,85 @@ +""" + Copyright (C) 2018-2020 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" +import numpy as np + +from mo.graph.graph import Node, Graph +from mo.ops.op import Op + + +def cumsum(a, axis=None, exclusive=False, reverse=False): + if reverse: + a = np.flip(a, axis) + res = np.cumsum(a, axis=axis) + if exclusive: + res -= a + if reverse: + res = np.flip(res, axis) + return res + + +class CumSum(Op): + enabled = False + op = 'CumSum' + version = 'opset3' + + def __init__(self, graph: Graph, attrs: dict): + super().__init__(graph, { + 'op': self.op, + 'type': self.op, + 'version': self.version, + + 'infer': self.infer, + + 'in_ports_count': 2, + 'out_ports_count': 1, + }, attrs) + + def supported_attrs(self): + return ["exclusive", "reverse"] + + @staticmethod + def infer(node: Node): + node_name = node.soft_get('name', node.id) + + input_shape = node.in_port(0).data.get_shape() + assert input_shape is not None, 'Input shape is None for node "{}"'.format(node_name) + if not node.in_port(1).disconnected(): + assert len(node.in_port(1).data.get_shape()) == 0, 'Axis is not scalar for node: {}'.format(node_name) + + node.out_port(0).data.set_shape(input_shape.copy()) + + input_value = node.in_port(0).data.get_value() + if input_value is not None: + axis = None if node.in_port(1).disconnected() else node.in_port(1).data.get_value() + reverse = node.reverse if node.has_valid('reverse') else False + exclusive = node.exclusive if node.has_valid('exclusive') else False + node.out_port(0).data.set_value(cumsum(input_value, axis=axis, reverse=reverse, exclusive=exclusive)) + + +class MXNetCumSum(Op): + enabled = False + op = 'MXNetCumSum' + + def __init__(self, graph: Graph, attrs: dict): + super().__init__(graph, { + 'op': self.op, + 'type': None, + + 'infer': None, + + 'in_ports_count': 1, + 'out_ports_count': 1, + }, attrs) diff --git a/model-optimizer/extensions/ops/cumsum_test.py b/model-optimizer/extensions/ops/cumsum_test.py new file mode 100644 index 00000000000..e6756551a42 --- /dev/null +++ b/model-optimizer/extensions/ops/cumsum_test.py @@ -0,0 +1,133 @@ +""" + Copyright (C) 2018-2020 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +import unittest + +import numpy as np + +from extensions.ops.cumsum import CumSum +from mo.front.common.partial_infer.utils import int64_array +from mo.graph.graph import Node +from mo.utils.unittest.graph import build_graph, valued_const_with_data, regular_op_with_shaped_data, result, connect + +nodes_attributes = { + **regular_op_with_shaped_data('data', [1, 3, 224, 224], {'type': 'Parameter', 'value': None, + '_out_port_data_type': {0: np.float32}}), + **valued_const_with_data('axis', int64_array(0)), + **regular_op_with_shaped_data('cumsum', None, {'op': 'CumSum', 'type': 'CumSum', 'name': 'cumsum'}), + **regular_op_with_shaped_data('identity', None, {'op': 'Identity', 'name': 'identity'}), + **result('output'), +} + + +class TestCumSum(unittest.TestCase): + def test_cumsum_axis(self): + graph = build_graph(nodes_attributes, + [*connect('data', '0:cumsum'), + *connect('axis', '1:cumsum'), + *connect('cumsum', '0:identity'), + ('identity', 'identity_d', {'out': 0}), + ('identity_d', 'output'), + ], + {'cumsum': {'reverse': False, 'exclusive': False} + }, nodes_with_edges_only=True) + + cumsum_node = Node(graph, 'cumsum') + CumSum.infer(cumsum_node) + self.assertTrue(np.array_equal(cumsum_node.out_port(0).data.get_shape(), int64_array([1, 3, 224, 224]))) + + def test_cumsum_value_prop(self): + graph = build_graph(nodes_attributes, + [*connect('data', '0:cumsum'), + *connect('axis', '1:cumsum'), + ('cumsum', 'cumsum_d', {'out': 0}), + ('cumsum_d', 'output'), + ], + {'data_d': {'value': np.array([1., 2., 3., 4., 5.]).astype(np.float32), 'shape': [5]}, + 'cumsum': {'reverse': False, 'exclusive': False} + }, nodes_with_edges_only=True) + + cumsum_node = Node(graph, 'cumsum') + CumSum.infer(cumsum_node) + self.assertTrue(np.array_equal(cumsum_node.out_port(0).data.get_value(), + np.array([1., 3., 6., 10., 15.]).astype(np.float32))) + + def test_cumsum_value_prop_exclusive(self): + graph = build_graph(nodes_attributes, + [*connect('data', '0:cumsum'), + *connect('axis', '1:cumsum'), + ('cumsum', 'cumsum_d', {'out': 0}), + ('cumsum_d', 'output'), + ], + {'data_d': {'value': np.array([1., 2., 3., 4., 5.]).astype(np.float32), 'shape': [5]}, + 'cumsum': {'reverse': False, 'exclusive': True} + }, nodes_with_edges_only=True) + + cumsum_node = Node(graph, 'cumsum') + CumSum.infer(cumsum_node) + self.assertTrue(np.array_equal(cumsum_node.out_port(0).data.get_value(), + np.array([0., 1., 3., 6., 10.]).astype(np.float32))) + + def test_cumsum_value_prop_reverse(self): + graph = build_graph(nodes_attributes, + [*connect('data', '0:cumsum'), + *connect('axis', '1:cumsum'), + ('cumsum', 'cumsum_d', {'out': 0}), + ('cumsum_d', 'output'), + ], + {'data_d': {'value': np.array([1., 2., 3., 4., 5.]).astype(np.float32), 'shape': [5]}, + 'cumsum': {'reverse': True, 'exclusive': False} + }, nodes_with_edges_only=True) + + cumsum_node = Node(graph, 'cumsum') + CumSum.infer(cumsum_node) + self.assertTrue(np.array_equal(cumsum_node.out_port(0).data.get_value(), + np.array([15., 14., 12., 9., 5.]).astype(np.float32))) + + def test_cumsum_value_prop_exclusive_reverse(self): + graph = build_graph(nodes_attributes, + [*connect('data', '0:cumsum'), + *connect('axis', '1:cumsum'), + ('cumsum', 'cumsum_d', {'out': 0}), + ('cumsum_d', 'output'), + ], + {'data_d': {'value': np.array([1., 2., 3., 4., 5.]).astype(np.float32), 'shape': [5]}, + 'cumsum': {'reverse': True, 'exclusive': True} + }, nodes_with_edges_only=True) + + cumsum_node = Node(graph, 'cumsum') + CumSum.infer(cumsum_node) + self.assertTrue(np.array_equal(cumsum_node.out_port(0).data.get_value(), + np.array([14., 12., 9., 5., 0.]).astype(np.float32))) + + def test_cumsum_value_prop_axis_1(self): + graph = build_graph(nodes_attributes, + [*connect('data', '0:cumsum'), + *connect('axis', '1:cumsum'), + ('cumsum', 'cumsum_d', {'out': 0}), + ('cumsum_d', 'output'), + ], + {'data_d': {'value': np.array([[1., 2., 3.], [4., 5., 6.]]).astype(np.float32), + 'shape': [2, 3]}, + 'axis_d': {'value': int64_array(1), + 'shape': []}, + 'cumsum': {'reverse': False, 'exclusive': False} + }, nodes_with_edges_only=True) + + cumsum_node = Node(graph, 'cumsum') + CumSum.infer(cumsum_node) + self.assertTrue(np.array_equal(cumsum_node.out_port(0).data.get_value(), + np.array([[1., 3., 6.], [4., 9., 15.]]).astype(np.float32))) diff --git a/model-optimizer/extensions/ops/range.py b/model-optimizer/extensions/ops/range.py index 4bb80c092ea..f806f1a26e2 100644 --- a/model-optimizer/extensions/ops/range.py +++ b/model-optimizer/extensions/ops/range.py @@ -23,52 +23,50 @@ from mo.ops.op import Op class Range(Op): + """ + Some notes on the automatic result data type infer. The tf.range does is differently than np.arange. Numpy + by default creates array with elements of type int64 and float64, but TF does not widen data types and + keep them int32 and float32. + Compare: + + >>> tf.range(1, 5, 0.5) + + >>> tf.range(1, 5, 2) + + + >>> np.array([0.5], dtype=np.float32) + array([0.5], dtype=float32) + >>> np.arange(np.array([1], dtype=np.int32), np.array([5], dtype=np.int32), np.array([2], dtype=np.int32)).dtype + dtype('int64') + >>> np.arange(np.array([1], dtype=np.int32), np.array([5], dtype=np.int32), np.array([0.5], dtype=np.float32)).dtype + dtype('float64') + """ op = 'Range' def __init__(self, graph: Graph, attrs: dict): mandatory_props = { - 'type': __class__.op, - 'op': __class__.op, + 'type': self.op, + 'op': self.op, + 'version': 'opset1', + 'infer': self.infer, + 'in_ports_count': 3, 'out_ports_count': 1, - 'infer': __class__.infer, } super().__init__(graph, mandatory_props, attrs) @staticmethod def infer(node: Node): - start = node.in_node(0) - limit = node.in_node(1) - delta = node.in_node(2) - output = node.out_node() + name = node.soft_get('name', node.id) + connected_input_ports = [in_port.idx for in_port in node.in_ports().values() if not in_port.disconnected()] + assert len(connected_input_ports) == 3 and [0, 1, 2] == sorted(connected_input_ports), \ + 'Range operation should have 3 inputs, {} found for {}'.format(len(connected_input_ports), name) - if not start.has_valid('value') or not limit.has_valid('value') or not delta.has_valid('value'): - log.error("Range operation is supported with constant inputs only") - return - if node.has_valid('pb') and 'type' in node.pb.attr: - from mo.front.tf.extractors.utils import tf_dtype_extractor - result_data_type = tf_dtype_extractor(node.pb.attr["type"].type) - elif node.has_valid('dtype'): - result_data_type = node.dtype - else: - result_data_type = start.value.dtype - output.value = np.arange(start.value, limit.value, delta.value, dtype=result_data_type) - output.shape = np.array(output.value.shape, dtype=np.int64) + start = node.in_port(0).data.get_value() + limit = node.in_port(1).data.get_value() + delta = node.in_port(2).data.get_value() - # Some notes on the automatic result data type infer. The tf.range does is differently than np.arange. Numpy - # by default creates array with elements of type int64 and float64, but TF does not widen data types and keep them - # int32 and float32. - # Compare: - - # >>> tf.range(1, 5, 0.5) - # - # >>> tf.range(1, 5, 2) - # - - # >>> np.array([0.5], dtype=np.float32) - # array([0.5], dtype=float32) - # >>> np.arange(np.array([1], dtype=np.int32), np.array([5], dtype=np.int32), np.array([2], dtype=np.int32)).dtype - # dtype('int64') - # >>> np.arange(np.array([1], dtype=np.int32), np.array([5], dtype=np.int32), np.array([0.5], dtype=np.float32)).dtype - # dtype('float64') + assert start is not None and limit is not None and delta is not None, \ + 'Range operation {} with dynamic inputs is not supported'.format(name) + node.out_port(0).data.set_value(np.arange(start, limit, delta, dtype=node.soft_get('dtype', start.dtype))) diff --git a/model-optimizer/extensions/ops/shufflechannel.py b/model-optimizer/extensions/ops/shufflechannel.py index ff65280f7db..4faf5c71849 100644 --- a/model-optimizer/extensions/ops/shufflechannel.py +++ b/model-optimizer/extensions/ops/shufflechannel.py @@ -13,23 +13,34 @@ See the License for the specific language governing permissions and limitations under the License. """ - -from mo.graph.graph import Graph +from mo.graph.graph import Graph, Node from mo.ops.op import Op -class ShuffleChannelOp(Op): - op = 'ShuffleChannel' +class ShuffleChannels(Op): + op = 'ShuffleChannels' enabled = False def __init__(self, graph: Graph, attrs: dict): super().__init__(graph, { 'op': self.op, - 'type': None, + 'type': self.op, + 'version': 'opset3', - # operation should be resolved on the front phase, partial inference is not needed - 'infer': None, + 'infer': self.infer, + + 'axis': 1, + 'group': None, 'in_ports_count': 1, 'out_ports_count': 1, }, attrs) + + def backend_attrs(self): + return ['group', 'axis'] + + @staticmethod + def infer(node: Node): + node_name = node.soft_get('name', node.id) + assert node.soft_get('group') is not None, 'The attribute "group" must be set for node {}'.format(node_name) + node.out_port(0).data.set_shape(node.in_port(0).data.get_shape()) diff --git a/model-optimizer/mo/front/mxnet/extractors/utils.py b/model-optimizer/mo/front/mxnet/extractors/utils.py index 7914ed674bd..0be96531c95 100644 --- a/model-optimizer/mo/front/mxnet/extractors/utils.py +++ b/model-optimizer/mo/front/mxnet/extractors/utils.py @@ -15,6 +15,7 @@ """ import mxnet as mx +import numpy as np from extensions.ops.elementwise import Elementwise from mo.graph.graph import Node, Graph @@ -52,6 +53,11 @@ class AttrDictionary(object): return self._dict[key] return default + def dtype(self, key, default=None): + if self.is_valid and key in self._dict: + return mxnet_str_dtype_to_np(self._dict[key]) + return default + def bool(self, key, default=None): attr = self.str(key, default) if isinstance(attr, str): @@ -143,7 +149,7 @@ def get_json_layer_attrs(json_dic): return json_dic[attr] -def load_params(input_model, data_names = ('data',)): +def load_params(input_model, data_names=('data',)): arg_params = {} aux_params = {} arg_keys = [] @@ -153,10 +159,10 @@ def load_params(input_model, data_names = ('data',)): if file_format == 'params': for key in loaded_weight: keys = key.split(':') - if len(keys)>1 and 'aux' == keys[0]: + if len(keys) > 1 and 'aux' == keys[0]: aux_keys.append(keys[1]) aux_params[keys[1]] = loaded_weight[key] - elif len(keys)>1 and 'arg' == keys[0]: + elif len(keys) > 1 and 'arg' == keys[0]: arg_keys.append(keys[1]) arg_params[keys[1]] = loaded_weight[key] else: @@ -205,3 +211,17 @@ def scalar_ops_replacer(graph: Graph, node: Node, elementwise_op_type=Elementwis lin_node.in_port(1).get_connection().set_source(scalar_value.out_port(0)) node.out_port(0).get_connection().set_source(lin_node.out_port(0)) return lin_node + + +MXNET_DATA_TYPES = { + 'float16': np.float16, + 'float32': np.float32, + 'float64': np.float64, + 'int8': np.int8, + 'int32': np.int32, + 'int64': np.int64, +} + + +def mxnet_str_dtype_to_np(dtype: str): + return MXNET_DATA_TYPES[dtype] diff --git a/model-optimizer/mo/ops/slice.py b/model-optimizer/mo/ops/slice.py index ae34af607db..f37c3cb7940 100644 --- a/model-optimizer/mo/ops/slice.py +++ b/model-optimizer/mo/ops/slice.py @@ -40,6 +40,7 @@ class Slice(Op): @staticmethod def infer(node: Node): + input_shape = node.in_port(0).data.get_shape() axis = None steps = None if len(node.in_nodes()) == 1: @@ -90,6 +91,11 @@ class Slice(Op): end = start + size axis = None + # Check for situation when size[i] == -1 in TF + for i in range(start.size): + if end[i] < start[i]: + end[i] = input_shape[i] + # Delete edges to start, size nodes node.graph.remove_edge(node.in_node(1).id, node.id) node.graph.remove_edge(node.in_node(2).id, node.id) @@ -104,16 +110,11 @@ class Slice(Op): log.warning('Incorrect number of input nodes in slice operation') return - input_shape = node.in_node(0).shape - # Check for situation when size[i] == -1 in TF - for i in range(start.size): - if end[i] < start[i]: - end[i] = input_shape[i] # Update end param node.end = end value = node.in_node(0).value - # If value is None create dummy vaue for shape propogation + # If value is None create dummy value for shape propagation if value is None: value = np.zeros(input_shape) @@ -131,7 +132,6 @@ class Slice(Op): # Ranged for output value for specified axis slice_idx[axis[id]] = slice(start[id], end[id], steps[id]) - # TODO: check whether this check is really important for axis, s in enumerate(slice_idx): if s is None: slice_idx[axis] = slice(0, input_shape[axis], 1) diff --git a/model-optimizer/mo/ops/slice_test.py b/model-optimizer/mo/ops/slice_test.py index 500b7cb3bec..dac0c4fdb60 100644 --- a/model-optimizer/mo/ops/slice_test.py +++ b/model-optimizer/mo/ops/slice_test.py @@ -38,6 +38,16 @@ nodes_attributes = { 'shape': None, 'value': None, }, + 'starts': { + 'kind': 'data', + 'shape': None, + 'value': None, + }, + 'ends': { + 'kind': 'data', + 'shape': None, + 'value': None, + }, 'slice': { 'op': 'Slice', 'axis': None, @@ -96,7 +106,7 @@ class TestSliceOp(unittest.TestCase): self.assertTrue(np.array_equal(slice_node['slices'], np.array([slice(1, 4, 1), slice(2, 3, 1), slice(0, 6, 1)]))) def test_slice_infer_multiply_params(self): - # Test case when size[i] == -1 (that means all + # Test case for TF when size[i] == -1 (that means all # remaining elements in dimension i are included in the slice) graph = build_graph(nodes_attributes, [('data_1', 'slice'), @@ -115,3 +125,25 @@ class TestSliceOp(unittest.TestCase): self.assertTrue(np.array_equal(slice_node.out_node().value, None)) self.assertTrue(np.array_equal(slice_node.out_node().shape, np.array([3, 3, 6]))) self.assertTrue(np.array_equal(slice_node['slices'], np.array([slice(1, 4, 1), slice(2, 5, 1), slice(0, 6, 1)]))) + + def test_slice_onnx_10_opset_case(self): + # check for negative end value in the case of ONNX 10 opset + input = np.array([[4, 5, 6, 7], [2, 3, 5, 6], [5, 6, 8, 9], [5, 6, 8, 9]]) + starts = np.array([0, 1]) + ends = np.array([3, -2]) + expected_values = np.array([[5], [3], [6]]) + + graph = build_graph(nodes_attributes, + [('data_1', 'slice'), + ('starts', 'slice'), + ('ends', 'slice'), + ('slice', 'data_2')], + {'data_1': {'value': input, 'shape': input.shape}, + 'starts': {'value': starts, 'shape': starts.shape}, + 'ends': {'value': ends, 'shape': ends.shape}, + 'slice': {'format': 'onnx'}}) + + slice_node = Node(graph, 'slice') + + Slice.infer(slice_node) + self.assertTrue(np.array_equal(slice_node.out_node().value, expected_values)) diff --git a/ngraph/ABOUT.md b/ngraph/ABOUT.md index e116abb4263..37c7e06db90 100644 --- a/ngraph/ABOUT.md +++ b/ngraph/ABOUT.md @@ -1,18 +1,6 @@ About nGraph Compiler stack =========================== -nGraph Compiler stack architecture ----------------------------------- - -The diagram below represents our current release stack. In the diagram, -nGraph components are colored in gray. Please note -that the stack diagram is simplified to show how nGraph executes deep -learning workloads with two hardware backends; however, many other -deep learning frameworks and backends currently are functioning. - -![](doc/sphinx/source/graphics/ngraph_arch_diag.png) - - ## Bridge Starting from the top of the stack, nGraph receives a computational graph @@ -44,21 +32,6 @@ ResNet for TensorFlow, the same optimization can be readily applied to MXNet* or ONNX* implementations of ResNet. -## Hybrid Transformer - -Hybrid transformer takes the nGraph IR, and partitions it into -subgraphs, which can then be assigned to the best-performing backend. -There are two hardware backends shown in the stack diagram to demonstrate -this graph partitioning. The Hybrid transformer assigns complex operations -(subgraphs) to Intel® Nervanaâ„¢ Neural Network Processor (NNP) to expedite the -computation, and the remaining operations default to CPU. In the future, -we will further expand the capabilities of Hybrid transformer -by enabling more features, such as localized cost modeling and memory -sharing. - -Once the subgraphs are assigned, the corresponding backend will -execute the IR. - Features -------- @@ -71,24 +44,3 @@ non-device-specific optimizations: available device. - **Data reuse** -- Save results and reuse for subgraphs with the same input. -- **Graph scheduling** -- Run similar subgraphs in parallel via - multi-threading. -- **Graph partitioning** -- Partition subgraphs to run on different - devices to speed up computation; make better use of spare CPU cycles - with nGraph. -- **Memory management** -- Prevent peak memory usage by intercepting - a graph with or by a "saved checkpoint," and to enable data auditing. - -Limitations ------------ - -The Beta release of nGraph only supports Just-In-Time (JiT) compilation; -Ahead-of Time (AoT) compilation will be supported in the official release. -nGraph currently has limited support for dynamic shapes. - - -Current nGraph Compiler full stack ----------------------------------- - -![](doc/sphinx/source/graphics/ngraph_full_stack_diagrams.png) - diff --git a/ngraph/CMakeLists.txt b/ngraph/CMakeLists.txt index 40781879c17..c55dc2ad412 100644 --- a/ngraph/CMakeLists.txt +++ b/ngraph/CMakeLists.txt @@ -112,7 +112,6 @@ endif() option(NGRAPH_UNIT_TEST_ENABLE "Control the building of unit tests" TRUE) option(NGRAPH_TEST_UTIL_ENABLE "Control the building of test utility" TRUE) -option(NGRAPH_DOC_BUILD_ENABLE "Control the building of documentation" FALSE) option(NGRAPH_INTERPRETER_ENABLE "Control the building of the INTERPRETER backend" TRUE) option(NGRAPH_DEBUG_ENABLE "Enable output for NGRAPH_DEBUG statements" FALSE) option(NGRAPH_DEPRECATED_ENABLE "Enable compiler deprecation pragmas for deprecated APIs (recommended only for development use)" FALSE) @@ -150,7 +149,6 @@ endmacro() NORMALIZE_BOOL(NGRAPH_UNIT_TEST_ENABLE) NORMALIZE_BOOL(NGRAPH_TEST_UTIL_ENABLE) -NORMALIZE_BOOL(NGRAPH_DOC_BUILD_ENABLE) NORMALIZE_BOOL(NGRAPH_INTERPRETER_ENABLE) NORMALIZE_BOOL(NGRAPH_DEBUG_ENABLE) NORMALIZE_BOOL(NGRAPH_DEPRECATED_ENABLE) @@ -172,7 +170,6 @@ message(STATUS "NGRAPH_ADDRESS_SANITIZER_ENABLE: ${NGRAPH_ADDRESS_SANITIZER message(STATUS "NGRAPH_CODE_COVERAGE_ENABLE: ${NGRAPH_CODE_COVERAGE_ENABLE}") message(STATUS "NGRAPH_DEBUG_ENABLE: ${NGRAPH_DEBUG_ENABLE}") message(STATUS "NGRAPH_DEPRECATED_ENABLE: ${NGRAPH_DEPRECATED_ENABLE}") -message(STATUS "NGRAPH_DOC_BUILD_ENABLE: ${NGRAPH_DOC_BUILD_ENABLE}") message(STATUS "NGRAPH_DYNAMIC_COMPONENTS_ENABLE: ${NGRAPH_DYNAMIC_COMPONENTS_ENABLE}") message(STATUS "NGRAPH_EXPORT_TARGETS_ENABLE: ${NGRAPH_EXPORT_TARGETS_ENABLE}") message(STATUS "NGRAPH_IE_ENABLE: ${NGRAPH_IE_ENABLE}") @@ -211,6 +208,13 @@ set(NGRAPH_FORWARD_CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=${CMAKE_POSITION_INDEPENDENT_CODE} ) +if(CMAKE_TOOLCHAIN_FILE) + set(NGRAPH_FORWARD_CMAKE_ARGS + ${NGRAPH_FORWARD_CMAKE_ARGS} + -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} + ) +endif() + if (CMAKE_OSX_SYSROOT) set(NGRAPH_FORWARD_CMAKE_ARGS ${NGRAPH_FORWARD_CMAKE_ARGS} @@ -496,10 +500,6 @@ endif() add_subdirectory(test) -if (NGRAPH_DOC_BUILD_ENABLE) - add_subdirectory(doc) -endif() - if (NGRAPH_PYTHON_BUILD_ENABLE) add_subdirectory(python) endif() diff --git a/ngraph/CONTRIB.md b/ngraph/CONTRIB.md deleted file mode 100644 index b8b7f07b04b..00000000000 --- a/ngraph/CONTRIB.md +++ /dev/null @@ -1,241 +0,0 @@ -Contributor Guidelines -====================== - -The latest version of this file can be found at: - -https://www.ngraph.ai/documentation/contributing/guide - -License -------- - -All contributed code must be compatible with the [Apache -2](https://www.apache.org/licenses/LICENSE-2.0) license, preferably by -being contributed under the Apache 2 license. Code contributed with -another license will need the license reviewed by Intel before it can be -accepted. - -Code formatting ---------------- - -All C/C++ source code in the repository, including the test code, must -adhere to the source-code formatting and style guidelines described -here. The coding style described here applies to the nGraph repository. -Related repositories may make adjustements to better match the coding -styles of libraries they are using. - -### Adding ops to nGraph Core - -Our design philosophy is that the graph is not a script for running -optimized kernels; rather, the graph is a specification for a -computation composed of basic building blocks which we call `ops`. -Compilation should match groups of `ops` to appropriate optimal and -semantically-equivalent groups of kernels for the backend(s) in use. -Thus, we expect that adding of new Core ops should be infrequent and -that most functionality instead gets added with new functions that build -sub-graphs from existing core ops. - -### Coding style - -We have a coding standard to help us to get development done. If part of -the standard is impeding progress, we either adjust that part or remove -it. To this end, we employ coding standards that facilitate -understanding of *what nGraph components are doing*. Programs are -easiest to understand when they can be understood locally; if most local -changes have local impact, you do not need to dig through multiple files -to understand what something does and if it is safe to modify. - -#### Names - -Names should *briefly* describe the thing being named and follow these -casing standards: - -- Define C++ class or type names with `CamelCase`. -- Assign template parameters with `UPPER_SNAKE_CASE`. -- Case variable and function names with `lower_snake_case`. - -Method names for basic accessors are prefixed by `get_`, `is_`, or -`set_` and should have simple $\mathcal{O}(1)$ implementations: - -- A `get_` method should be externally idempotent. It may perform some - simple initialization and cache the result for later use. Trivial - `get_` methods can be defined in a header file. If a method is - non-trivial, that is often a sign that it is not a basic accessor. -- An `is_` may be used instead of `get_` for boolean accessors. -- A `set_` method should change the value returned by the - corresponding `get_` method. - - Use `set_is_` if using `is_` to get a value. - - Trivial `set_` methods may be defined in a header file. -- Names of variables should indicate the use of the variable. - - Member variables should be prefixed with `m_`. - - Static member variables should be rare and be prefixed with - `s_`. -- Do not use `using` to define a type alias at top-level in header - file. If the abstraction is useful, give it a class. - - C++ does not enforce the abstraction. For example if `X` and `Y` - are aliases for the same type, you can pass an `X` to something - expecting a `Y`. - - If one of the aliases were later changed, or turned into a real - type, many callers could require changes. - -#### Namespaces - -- `ngraph` is for the public API, although this is not - currently enforced. - - Use a nested namespace for implementation classes. - - Use an unnamed namespace or `static` for file-local names. This - helps prevent unintended name collisions during linking and when - using shared and dynamically-loaded libraries. - - Never use `using` at top-level in a header file. - - - Doing so leaks the alias into users of the header, including - headers that follow. - - - It is okay to use `using` with local scope, such as inside a class - definiton. - - - Be careful of C++'s implicit namespace inclusions. For example, - if a parameter's type is from another namespace, that namespace - can be visible in the body. - - Only use `using std` and/or `using ngraph` in `.cpp` files. - `using` a nested namespace has can result in - unexpected behavior. - -#### File Names - -- Do not use the same file name in multiple directories. At least one - IDE/debugger ignores the directory name when setting breakpoints. -- Use `.hpp` for headers and `.cpp` for implementation. -- Reflect the namespace nesting in the directory hierarchy. -- Unit test files are in the `tests` directory. - - Transformer-dependent tests are tests running on the default - transformer or specifying a transformer. For these, use the form - - ``` - TEST(file_name, test_name) - ``` - - - Transformer-independent tests: - - File name is `file_name.in.cpp` - - Add `#include "test_control.hpp"` to the file's includes - - Add the line - `static std::string s_manifest = "${MANIFEST}";` to the top - of the file. - - Use - - ``` - NGRAPH_TEST(${BACKEND_NAME}, test_name) - ``` - - for each test. Files are generated for each transformer and - the `${BACKEND_NAME}` is replaced with the transformer name. - - Individual unit tests may be disabled by adding the name of - the test to the `unit_test.manifest` file found in the - transformer's source file directory. - -#### Formatting - -Things that look different should look different because they are -different. We use **clang format** to enforce certain formatting. -Although not always ideal, it is automatically enforced and reduces -merge conflicts. - -- The .clang-format file located in the root of the project specifies - our format. - - The script maint/apply-code-format.sh enforces that formatting - at the C/C++ syntactic level. - - The script at maint/check-code-format.sh verifies that the - formatting rules are met by all C/C++ code (again, at the - syntax level). The script has an exit code of `0` when code - meets the standard and non-zero otherwise. This script does - *not* modify the source code. -- Formatting with `#include` files: - - Put headers in groups separated by a blank line. Logically order - the groups downward from system-level to 3rd-party to `ngraph`. - - Formatting will keep the files in each group in - alphabetic order. - - Use this syntax for files that **do not change during nGraph - development**; they will not be checked for changes - during builds. Normally this will be everything but the ngraph - files: - - ``` - #include - ``` - - - Use this syntax for files that **are changing during nGraph - development**; they will be checked for changes during builds. - Normally this will be ngraph headers: - - ``` - #include "file" - ``` - - - Use this syntax for system C headers with C++ wrappers: - - ``` - #include - ``` - -- To guard against multiple inclusion, use: - - ``` - #pragma once - ``` - - - The syntax is a compiler extension that has been adopted by all - supported compilers. -- The initialization - - ``` - Foo x{4, 5}; - ``` - - is preferred over - - ``` - Foo x(4, 5); - ``` - -- Indentation should be accompanied by braces; this includes - single-line bodies for conditionals and loops. -- Exception checking: - - Throw an exception to report a problem. - - Nothing that calls `abort`, `exit` or `terminate` should - be used. Remember that ngraph is a guest of the framework. - - Do not use exclamation points in messages! - - Be as specific as practical. Keep in mind that the person who - sees the error is likely to be on the other side of the - framework and the message might be the only information they see - about the problem. -- If you use `auto`, know what you are doing. `auto` uses the same - type-stripping rules as template parameters. If something returns a - reference, `auto` will strip the reference unless you use `auto&`: - - Don't do things like - - ``` - auto s = Shape{2,3}; - ``` - - Instead, use - - ``` - Shape s{2, 3}; - ``` - - - Indicate the type in the variable name. - -- One variable declaration/definition per line - - Don't use the C-style - - ``` - int x, y, *z; - ``` - - Instead, use: - - ``` - int x; - int y; - int* z; - ``` diff --git a/ngraph/README.md b/ngraph/README.md deleted file mode 100644 index cabc210949e..00000000000 --- a/ngraph/README.md +++ /dev/null @@ -1,109 +0,0 @@ -![nGraph Compiler stack](doc/sphinx/source/graphics/ngraph_header.png) -[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://github.com/NervanaSystems/ngraph/blob/master/LICENSE) [![Build Status][build-status-badge]][build-status] - - - -## Quick start - -To begin using nGraph with popular frameworks, please refer to the links below. - -| Framework (Version) | Installation guide | Notes -|----------------------------|----------------------------------------|----------------------------------- -| TensorFlow* | [Pip install](https://www.ngraph.ai/tutorials/tensorflow-tutorial#use-pre-built-packages) or [Build from source](https://www.ngraph.ai/tutorials/tensorflow-tutorial#build-from-source) | 20 [Validated workloads] -| ONNX 1.5 | [Pip install](https://www.ngraph.ai/tutorials/onnx-tutorial#use-pre-built-packages) | 17 [Validated workloads] - - -#### Python wheels for nGraph - -The Python wheels for nGraph have been tested and are supported on the following -64-bit systems: - -* Ubuntu 16.04 or later -* CentOS 7.6 -* Debian 10 -* macOS 10.14.3 (Mojave) - -To install via pip, run: - -``` -pip install --upgrade pip==19.3.1 -pip install ngraph-core -``` - - -Frameworks using nGraph Compiler stack to execute workloads have shown -[**up to 45X**](https://ai.intel.com/ngraph-compiler-stack-beta-release/) -performance boost when compared to native framework implementations. We've also -seen performance boosts running workloads that are not included on the list of -[Validated workloads], thanks to nGraph's powerful subgraph pattern matching. - -Additionally we have integrated nGraph with [PlaidML] to provide deep learning -performance acceleration on Intel, nVidia, & AMD GPUs. More details on current -architecture of the nGraph Compiler stack can be found in [Architecture and features], -and recent changes to the stack are explained in the [Release Notes]. - -## What is nGraph Compiler? - -nGraph Compiler aims to accelerate developing AI workloads using any deep learning -framework and deploying to a variety of hardware targets. We strongly believe in -providing freedom, performance, and ease-of-use to AI developers. - -The diagram below shows deep learning frameworks and hardware targets -supported by nGraph. NNP-T and NNP-I in the diagram refer to Intel's next generation -deep learning accelerators: Intel® Nervanaâ„¢ Neural Network Processor for Training and -Inference respectively. Future plans for supporting addtional deep learning frameworks -and backends are outlined in the [ecosystem] section. - -![](doc/sphinx/source/graphics/nGraph_main.png) - - -Our documentation has extensive information about how to use nGraph Compiler -stack to create an nGraph computational graph, integrate custom frameworks, -and to interact with supported backends. If you wish to contribute to the -project, please don't hesitate to ask questions in [GitHub issues] after -reviewing our contribution guide below. - - -## How to contribute - -We welcome community contributions to nGraph. If you have an idea how -to improve it: - -* See the [contrib guide] for code formatting and style guidelines. -* Share your proposal via [GitHub issues]. -* Ensure you can build the product and run all the examples with your patch. -* In the case of a larger feature, create a test. -* Submit a [pull request]. -* Make sure your PR passes all CI tests. Note: You can test locally with `make check`. - - We will review your contribution and, if any additional fixes or modifications are - necessary, may provide feedback to guide you. When accepted, your pull request will - be merged to the repository. - - -[Ecosystem]: ./ecosystem-overview.md -[Architecture and features]: ./ABOUT.md -[Documentation]: https://www.ngraph.ai/documentation -[build the Library]: https://www.ngraph.ai/documentation/buildlb -[Getting Started Guides]: Getting-started-guides -[Validated workloads]: https://www.ngraph.ai/documentation/frameworks/validated/list -[Functional]: https://github.com/NervanaSystems/ngraph-onnx/ -[How to contribute]: How-to-contribute -[framework integration guides]: https://ngraph.ai/documentation/frameworks/overview -[release notes]: https://www.ngraph.ai/documentation/project/release-notes -[Github issues]: https://github.com/NervanaSystems/ngraph/issues -[contrib guide]: https://www.ngraph.ai/documentation/contributing/guide -[pull request]: https://github.com/NervanaSystems/ngraph/pulls -[how to import]: https://www.ngraph.ai/tutorials/onnx-tutorial#import-a-model-with-onnx-and-ngraph -[ngraph_wireframes_with_notice]: doc/sphinx/source/graphics/nGraph_main.png "nGraph components" -[build-status]: https://travis-ci.org/NervanaSystems/ngraph/branches -[build-status-badge]: https://travis-ci.org/NervanaSystems/ngraph.svg?branch=master -[PlaidML]: https://github.com/plaidml/plaidml -[Source compile]: https://github.com/NervanaSystems/ngraph-mxnet/blob/master/README.md -[nGraph-ONNX]: https://github.com/NervanaSystems/ngraph-onnx/blob/master/README.md -[nGraph-ONNX adaptable]: https://ai.intel.com/adaptable-deep-learning-solutions-with-ngraph-compiler-and-onnx/ -[nGraph for PyTorch developers]: https://ai.intel.com/investing-in-the-pytorch-developer-community diff --git a/ngraph/cmake/external_onnx.cmake b/ngraph/cmake/external_onnx.cmake index a4a5c80d1c1..4258f043d4a 100644 --- a/ngraph/cmake/external_onnx.cmake +++ b/ngraph/cmake/external_onnx.cmake @@ -40,6 +40,7 @@ add_definitions(-DONNX_NAMESPACE=${NGRAPH_ONNX_NAMESPACE}) set(CMAKE_CXX_FLAGS ${CMAKE_ORIGINAL_CXX_FLAGS}) if (WIN32) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4244 /wd4251") string(REPLACE "/W3" "/W0" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") endif() diff --git a/ngraph/ecosystem-overview.md b/ngraph/ecosystem-overview.md deleted file mode 100644 index aef5be475d2..00000000000 --- a/ngraph/ecosystem-overview.md +++ /dev/null @@ -1,43 +0,0 @@ - -# Framework & runtime support - -One of nGraph’s key features is framework neutrality. We currently support -popular deep learning frameworks such as TensorFlow and MXNet with stable -bridges to pass computational graphs to nGraph. Additionally nGraph -Compiler has a functional bridge to PaddlePaddle. -For these frameworks, we have successfully tested functionality with a few -deep learning workloads, and we plan to bring stable support for them in the -upcoming releases. - -To further promote framework neutrality, the nGraph team has been actively -contributing to the ONNX project. Developers who already have a "trained" -DNN (Deep Neural Network) model can use nGraph to bypass significant -framework-based complexity and [import it] to test or run on targeted and -efficient backends with our user-friendly Python-based API. - -nGraph is also integrated as an execution provider for [ONNX Runtime], -which is the first publicly available inference engine for ONNX. - -The table below summarizes our current progress on supported frameworks. -If you are an architect of a framework wishing to take advantage of speed -and multi-device support of nGraph Compiler, please refer to [Framework integration guide] section. - - -| Framework & Runtime | Supported | Validated -|----------------------------|--------------------|------------- -| TensorFlow* 1.12 | :heavy_check_mark: | :heavy_check_mark: -| MXNet* 1.3 | :heavy_check_mark: | :heavy_check_mark: -| ONNX 1.3 | :heavy_check_mark: | :heavy_check_mark: -| ONNX Runtime | Functional | No -| PaddlePaddle | Functional | No - - - - -[Architecture and features]: ./ABOUT.md -[Upcoming DL accelerators]: https://www.intel.com/content/dam/www/public/us/en/documents/product-briefs/vision-accelerator-design-product-brief.pdf -[import it]: https://ngraph.nervanasys.com/docs/latest/core/constructing-graphs/import.html -[ONNX Runtime]: https://azure.microsoft.com/en-us/blog/onnx-runtime-is-now-open-source/ -[WinML]: http://docs.microsoft.com/en-us/windows/ai -[How to]: https://ngraph.nervanasys.com/docs/latest/howto/index.html -[Framework integration guide]: https://ngraph.nervanasys.com/docs/latest/frameworks/index.html diff --git a/ngraph/python/build_wheel.py.in b/ngraph/python/build_wheel.py.in index 3f70d3c118d..0cba66758ea 100644 --- a/ngraph/python/build_wheel.py.in +++ b/ngraph/python/build_wheel.py.in @@ -66,6 +66,8 @@ try: except subprocess.CalledProcessError as err: print("Could not complete the wheel building process") print("Command that failed: ", err.cmd) - print("Command std output: ", err.stdout.decode('utf-8')) - print("Command err output: ", err.stderr.decode('utf-8')) + if err.stdout is not None: + print("Command std output: ", err.stdout.decode('utf-8')) + if err.stderr is not None: + print("Command err output: ", err.stderr.decode('utf-8')) sys.exit(1) diff --git a/ngraph/python/setup.py b/ngraph/python/setup.py index 7c445ae2bf4..f3bf421c5d8 100644 --- a/ngraph/python/setup.py +++ b/ngraph/python/setup.py @@ -381,7 +381,7 @@ setup( long_description=open(os.path.join(PYNGRAPH_ROOT_DIR, "README.md")).read(), long_description_content_type="text/markdown", ext_modules=ext_modules, - package_dir={"": "src"}, + package_dir={'': PYNGRAPH_SRC_DIR}, packages=packages, cmdclass={"build_ext": BuildExt}, data_files=data_files, diff --git a/ngraph/python/src/ngraph/__init__.py b/ngraph/python/src/ngraph/__init__.py index 96f45b7188d..c94e882dd86 100644 --- a/ngraph/python/src/ngraph/__init__.py +++ b/ngraph/python/src/ngraph/__init__.py @@ -126,7 +126,6 @@ from ngraph.ops import rnn_cell from ngraph.ops import roi_align from ngraph.ops import roi_pooling from ngraph.ops import scatter_elements_update -from ngraph.ops import scatter_nd_update from ngraph.ops import scatter_update from ngraph.ops import select from ngraph.ops import selu diff --git a/ngraph/python/src/ngraph/ops.py b/ngraph/python/src/ngraph/ops.py index 06513940e03..58c0f8970bf 100644 --- a/ngraph/python/src/ngraph/ops.py +++ b/ngraph/python/src/ngraph/ops.py @@ -2631,25 +2631,6 @@ def result(data: NodeInput, name: Optional[str] = None) -> Node: return _get_node_factory().create("Result", [data]) -@nameable_op -def scatter_nd_update( - data: NodeInput, indices: NodeInput, updates: NodeInput, name: str = None -) -> Node: - """Return a node which produces a ScatterNDUpdate operation. - - ScatterNDUpdate creates a copy of the first input tensor - with updated elements specified with second and third input tensors. - - :param data: The input tensor to be updated. - :param indices: The tensor with indexes which will be updated. - :param updates: The tensor with update values. - :param name: Optional name for output node. - :return: ScatterNDUpdate node - """ - node_inputs = as_nodes(data, indices, updates) - return _get_node_factory().create("ScatterNDUpdate", node_inputs) - - @nameable_op def scatter_update( data: Node, indices: NodeInput, updates: NodeInput, axis: NodeInput, name: Optional[str] = None diff --git a/ngraph/python/src/ngraph/runtime.py b/ngraph/python/src/ngraph/runtime.py index e11b48f97de..abb6bb8f28a 100644 --- a/ngraph/python/src/ngraph/runtime.py +++ b/ngraph/python/src/ngraph/runtime.py @@ -16,31 +16,42 @@ """Provide a layer of abstraction for the ngraph++ runtime environment.""" import logging from typing import Dict, List, Union +from enum import Enum import numpy as np from ngraph.exceptions import UserInputError -from ngraph.impl import Function, Node, Shape, serialize, util +from ngraph.impl import Function, Node, Shape, PartialShape, serialize, util from ngraph.impl.runtime import Backend, Executable, Tensor from ngraph.utils.types import NumericData, get_dtype log = logging.getLogger(__name__) -def runtime(backend_name: str = "CPU") -> "Runtime": +class BackendMode(Enum): + """DYNAMIC mode enables backend's wrapper which supports dynamic shapes.""" + + STATIC = 0 + DYNAMIC = 1 + + +def runtime(backend_name: str = "CPU", mode: BackendMode = BackendMode.STATIC) -> "Runtime": """Create a Runtime object (helper factory). Use signature to parameterize runtime as needed. """ - return Runtime(backend_name) + return Runtime(backend_name, mode) class Runtime: """Represents the ngraph++ runtime environment.""" - def __init__(self, backend_name: str) -> None: + def __init__(self, backend_name: str, mode: BackendMode = BackendMode.STATIC) -> None: self.backend_name = backend_name - self.backend = Backend.create(backend_name) + if mode == BackendMode.DYNAMIC: + self.backend = Backend.create_dynamic(backend_name) + else: + self.backend = Backend.create(backend_name) def set_config(self, config: Dict[str, str]) -> None: """Set the backend configuration.""" @@ -83,9 +94,15 @@ class Computation(object): self.result_views = [] # type: List[Tensor] for result in self.results: - shape = result.get_shape() element_type = result.get_element_type() - self.result_views.append(runtime.backend.create_tensor(element_type, shape)) + if self.function.is_dynamic(): + output_pshape = result.get_output_partial_shape(0) + output_tensor = runtime.backend.create_dynamic_tensor(element_type, output_pshape) + self.result_views.append(output_tensor) + else: + output_shape = result.get_shape() + output_tensor = runtime.backend.create_tensor(element_type, output_shape) + self.result_views.append(output_tensor) def __repr__(self) -> str: params_string = ", ".join([param.name for param in self.parameters]) @@ -98,7 +115,10 @@ class Computation(object): value = np.array(value) Computation._write_ndarray_to_tensor_view(value, tensor_view) - self.handle.call(self.result_views, self.tensor_views) + if self.function.is_dynamic(): + self.handle.call_with_validate(self.result_views, self.tensor_views) + else: + self.handle.call(self.result_views, self.tensor_views) results = [] for result_view in self.result_views: diff --git a/ngraph/python/src/pyngraph/function.cpp b/ngraph/python/src/pyngraph/function.cpp index 4751596fb0b..1c9364b5731 100644 --- a/ngraph/python/src/pyngraph/function.cpp +++ b/ngraph/python/src/pyngraph/function.cpp @@ -41,12 +41,14 @@ void regclass_pyngraph_Function(py::module m) function.def("get_output_op", &ngraph::Function::get_output_op); function.def("get_output_element_type", &ngraph::Function::get_output_element_type); function.def("get_output_shape", &ngraph::Function::get_output_shape); + function.def("get_output_partial_shape", &ngraph::Function::get_output_partial_shape); function.def("get_parameters", &ngraph::Function::get_parameters); function.def("get_results", &ngraph::Function::get_results); function.def("get_result", &ngraph::Function::get_result); function.def("get_unique_name", &ngraph::Function::get_name); function.def("get_name", &ngraph::Function::get_friendly_name); function.def("set_friendly_name", &ngraph::Function::set_friendly_name); + function.def("is_dynamic", &ngraph::Function::is_dynamic); function.def("__repr__", [](const ngraph::Function& self) { std::string class_name = py::cast(self).get_type().attr("__name__").cast(); std::string shape = diff --git a/ngraph/python/src/pyngraph/node.cpp b/ngraph/python/src/pyngraph/node.cpp index a51c8aac529..a8f42c69986 100644 --- a/ngraph/python/src/pyngraph/node.cpp +++ b/ngraph/python/src/pyngraph/node.cpp @@ -74,6 +74,7 @@ void regclass_pyngraph_Node(py::module m) node.def("get_output_element_type", &ngraph::Node::get_output_element_type); node.def("get_element_type", &ngraph::Node::get_element_type); node.def("get_output_shape", &ngraph::Node::get_output_shape); + node.def("get_output_partial_shape", &ngraph::Node::get_output_partial_shape); node.def("get_shape", &ngraph::Node::get_shape); node.def("get_output_partial_shape", &ngraph::Node::get_output_partial_shape); node.def("get_type_name", &ngraph::Node::get_type_name); diff --git a/ngraph/python/src/pyngraph/node_factory.cpp b/ngraph/python/src/pyngraph/node_factory.cpp index 160039a3de4..09597a78eeb 100644 --- a/ngraph/python/src/pyngraph/node_factory.cpp +++ b/ngraph/python/src/pyngraph/node_factory.cpp @@ -46,20 +46,6 @@ namespace { } - virtual void on_attribute(const std::string& name, std::string& value) override - { - if (m_attributes.contains(name)) - { - value = m_attributes[name.c_str()].cast(); - } - } - virtual void on_attribute(const std::string& name, bool& value) override - { - if (m_attributes.contains(name)) - { - value = m_attributes[name.c_str()].cast(); - } - } virtual void on_adapter(const std::string& name, ngraph::ValueAccessor& adapter) override { @@ -69,6 +55,14 @@ namespace false, "No AttributeVisitor support for accessing attribute named: ", name); } } + virtual void on_adapter(const std::string& name, + ngraph::ValueAccessor& adapter) override + { + if (m_attributes.contains(name)) + { + adapter.set(m_attributes[name.c_str()].cast()); + } + } virtual void on_adapter(const std::string& name, ngraph::ValueAccessor& adapter) override { diff --git a/ngraph/python/src/pyngraph/runtime/backend.cpp b/ngraph/python/src/pyngraph/runtime/backend.cpp index 888533bc1af..c2782caea56 100644 --- a/ngraph/python/src/pyngraph/runtime/backend.cpp +++ b/ngraph/python/src/pyngraph/runtime/backend.cpp @@ -23,6 +23,18 @@ namespace py = pybind11; +static std::shared_ptr create_static(const std::string& type) +{ + bool must_support_dynamic = false; + return ngraph::runtime::Backend::create(type, must_support_dynamic); +} + +static std::shared_ptr create_dynamic(const std::string& type) +{ + bool must_support_dynamic = true; + return ngraph::runtime::Backend::create(type, must_support_dynamic); +} + static std::shared_ptr compile(ngraph::runtime::Backend* self, std::shared_ptr func) { @@ -30,23 +42,22 @@ static std::shared_ptr compile(ngraph::runtime::Bac return self->compile(func, enable_performance_data); } -static std::shared_ptr create(const std::string& type) -{ - bool must_support_dynamic = false; - return ngraph::runtime::Backend::create(type, must_support_dynamic); -} - void regclass_pyngraph_runtime_Backend(py::module m) { py::class_> backend( m, "Backend"); backend.doc() = "ngraph.impl.runtime.Backend wraps ngraph::runtime::Backend"; - backend.def_static("create", &create); + backend.def_static("create", &create_static); + backend.def_static("create_dynamic", &create_dynamic); backend.def_static("get_registered_devices", &ngraph::runtime::Backend::get_registered_devices); backend.def("create_tensor", (std::shared_ptr(ngraph::runtime::Backend::*)( const ngraph::element::Type&, const ngraph::Shape&)) & ngraph::runtime::Backend::create_tensor); + backend.def("create_dynamic_tensor", + (std::shared_ptr(ngraph::runtime::Backend::*)( + const ngraph::element::Type&, const ngraph::PartialShape&)) & + ngraph::runtime::Backend::create_dynamic_tensor); backend.def("compile", &compile); backend.def("set_config", &ngraph::runtime::Backend::set_config); } diff --git a/ngraph/python/src/pyngraph/runtime/executable.cpp b/ngraph/python/src/pyngraph/runtime/executable.cpp index d557aab9867..09f9f1429ad 100644 --- a/ngraph/python/src/pyngraph/runtime/executable.cpp +++ b/ngraph/python/src/pyngraph/runtime/executable.cpp @@ -33,6 +33,11 @@ void regclass_pyngraph_runtime_Executable(py::module m) const std::vector>&, const std::vector>&)) & ngraph::runtime::Executable::call); + executable.def("call_with_validate", + (bool (ngraph::runtime::Executable::*)( + const std::vector>&, + const std::vector>&)) & + ngraph::runtime::Executable::call_with_validate); executable.def( "get_performance_data", (std::vector(ngraph::runtime::Executable::*)()) & diff --git a/ngraph/python/test/ngraph/test_basic.py b/ngraph/python/test/ngraph/test_basic.py index 487d726432b..e9ca0cd7b43 100644 --- a/ngraph/python/test/ngraph/test_basic.py +++ b/ngraph/python/test/ngraph/test_basic.py @@ -240,9 +240,8 @@ def test_constant_get_data_floating_point(data_type): @pytest.mark.parametrize("data_type", [np.int64, np.int32, np.int16, np.int8]) def test_constant_get_data_signed_integer(data_type): np.random.seed(133391) - input_data = np.random.randint( - np.iinfo(data_type).min, np.iinfo(data_type).max, [2, 3, 4] - ).astype(data_type) + input_data = np.random.randint(np.iinfo(data_type).min, np.iinfo(data_type).max, + size=[2, 3, 4], dtype=data_type) node = ng.constant(input_data, dtype=data_type) retrieved_data = node.get_data() assert np.allclose(input_data, retrieved_data) diff --git a/ngraph/python/test/ngraph/test_ops_reshape.py b/ngraph/python/test/ngraph/test_ops_reshape.py index 7b7182fbb39..362c95937fd 100644 --- a/ngraph/python/test/ngraph/test_ops_reshape.py +++ b/ngraph/python/test/ngraph/test_ops_reshape.py @@ -155,15 +155,15 @@ def test_tile(): def test_strided_slice(): - input_tensor = np.arange(2 * 3 * 4).reshape((2, 3, 4)) - begin = np.array([1, 0]) - end = np.array([0, 0]) - strides = np.array([1, 1]) - begin_mask = np.array([0, 0, 0]) - end_mask = np.array([0, 0, 0]) - new_axis_mask = np.array([0, 1, 0]) - shrink_axis_mask = np.array([1, 0, 0]) - ellipsis_mask = np.array([0, 0, 0]) + input_tensor = np.arange(2 * 3 * 4, dtype=np.float32).reshape((2, 3, 4)) + begin = np.array([1, 0], dtype=np.int64) + end = np.array([0, 0], dtype=np.int64) + strides = np.array([1, 1], dtype=np.int64) + begin_mask = np.array([0, 0, 0], dtype=np.int64) + end_mask = np.array([0, 0, 0], dtype=np.int64) + new_axis_mask = np.array([0, 1, 0], dtype=np.int64) + shrink_axis_mask = np.array([1, 0, 0], dtype=np.int64) + ellipsis_mask = np.array([0, 0, 0], dtype=np.int64) result = run_op_node( [input_tensor, begin, end, strides], @@ -175,7 +175,8 @@ def test_strided_slice(): ellipsis_mask, ) - expected = np.array([12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]).reshape((1, 3, 4)) + expected = np.array([12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23], + dtype=np.float32).reshape((1, 3, 4)) assert np.allclose(result, expected) diff --git a/ngraph/python/test/ngraph/test_ops_scatter.py b/ngraph/python/test/ngraph/test_ops_scatter.py index 0a9b10bb6e9..249ec62e196 100644 --- a/ngraph/python/test/ngraph/test_ops_scatter.py +++ b/ngraph/python/test/ngraph/test_ops_scatter.py @@ -18,19 +18,6 @@ import ngraph as ng from ngraph.impl import Type -def test_scatter_nd_update_props(): - dtype = np.int32 - parameter_r = ng.parameter([1000, 256, 10, 15], dtype=dtype, name="data") - parameter_i = ng.parameter([25, 125, 3], dtype=dtype, name="indices") - parameter_u = ng.parameter([25, 125, 15], dtype=dtype, name="updates") - - node = ng.scatter_nd_update(parameter_r, parameter_i, parameter_u) - assert node.get_type_name() == "ScatterNDUpdate" - assert node.get_output_size() == 1 - assert list(node.get_output_shape(0)) == [1000, 256, 10, 15] - assert node.get_output_element_type(0) == Type.i32 - - def test_scatter_update_props(): dtype = np.int8 parameter_r = ng.parameter([2, 3, 4], dtype=dtype, name="data") diff --git a/ngraph/src/ngraph/CMakeLists.txt b/ngraph/src/ngraph/CMakeLists.txt index 19604294f9b..c2977e054a9 100644 --- a/ngraph/src/ngraph/CMakeLists.txt +++ b/ngraph/src/ngraph/CMakeLists.txt @@ -19,6 +19,7 @@ set (SRC assertion.hpp attribute_adapter.cpp attribute_adapter.hpp + attribute_visitor.cpp attribute_visitor.hpp autodiff/adjoints.cpp autodiff/adjoints.hpp @@ -91,6 +92,7 @@ set (SRC except.hpp factory.cpp factory.hpp + factory_adapter.hpp file_util.cpp file_util.hpp function.cpp diff --git a/ngraph/src/ngraph/attribute_adapter.cpp b/ngraph/src/ngraph/attribute_adapter.cpp index c48bd78b7f4..a93bc770093 100644 --- a/ngraph/src/ngraph/attribute_adapter.cpp +++ b/ngraph/src/ngraph/attribute_adapter.cpp @@ -31,388 +31,32 @@ using namespace ngraph; namespace ngraph { - // float constexpr DiscreteTypeInfo AttributeAdapter::type_info; - const double& AttributeAdapter::get() - { - if (!m_buffer_valid) - { - m_buffer = m_value; - m_buffer_valid = true; - } - return m_buffer; - } - - void AttributeAdapter::set(const double& value) - { - m_value = value; - m_buffer_valid = false; - } - - // double constexpr DiscreteTypeInfo AttributeAdapter::type_info; - const double& AttributeAdapter::get() - { - if (!m_buffer_valid) - { - m_buffer = m_value; - m_buffer_valid = true; - } - return m_buffer; - } - - void AttributeAdapter::set(const double& value) - { - m_value = value; - m_buffer_valid = false; - } - - // bool + constexpr DiscreteTypeInfo AttributeAdapter::type_info; constexpr DiscreteTypeInfo AttributeAdapter::type_info; - const bool& AttributeAdapter::get() - { - if (!m_buffer_valid) - { - m_buffer = m_value; - m_buffer_valid = true; - } - return m_buffer; - } - - void AttributeAdapter::set(const bool& value) - { - m_value = value; - m_buffer_valid = false; - } - constexpr DiscreteTypeInfo AttributeAdapter::type_info; - const int64_t& AttributeAdapter::get() - { - if (!m_buffer_valid) - { - m_buffer = m_value; - m_buffer_valid = true; - } - return m_buffer; - } - - void AttributeAdapter::set(const int64_t& value) - { - m_value = value; - m_buffer_valid = false; - } - constexpr DiscreteTypeInfo AttributeAdapter::type_info; - const int64_t& AttributeAdapter::get() - { - if (!m_buffer_valid) - { - m_buffer = m_value; - m_buffer_valid = true; - } - return m_buffer; - } - - void AttributeAdapter::set(const int64_t& value) - { - m_value = value; - m_buffer_valid = false; - } - constexpr DiscreteTypeInfo AttributeAdapter::type_info; - const int64_t& AttributeAdapter::get() - { - if (!m_buffer_valid) - { - m_buffer = m_value; - m_buffer_valid = true; - } - return m_buffer; - } - - void AttributeAdapter::set(const int64_t& value) - { - m_value = value; - m_buffer_valid = false; - } - constexpr DiscreteTypeInfo AttributeAdapter::type_info; - const int64_t& AttributeAdapter::get() - { - if (!m_buffer_valid) - { - m_buffer = m_value; - m_buffer_valid = true; - } - return m_buffer; - } - - void AttributeAdapter::set(const int64_t& value) - { - m_value = value; - m_buffer_valid = false; - } - constexpr DiscreteTypeInfo AttributeAdapter::type_info; - const int64_t& AttributeAdapter::get() - { - if (!m_buffer_valid) - { - m_buffer = m_value; - m_buffer_valid = true; - } - return m_buffer; - } - - void AttributeAdapter::set(const int64_t& value) - { - m_value = value; - m_buffer_valid = false; - } - constexpr DiscreteTypeInfo AttributeAdapter::type_info; - const int64_t& AttributeAdapter::get() - { - if (!m_buffer_valid) - { - m_buffer = m_value; - m_buffer_valid = true; - } - return m_buffer; - } - - void AttributeAdapter::set(const int64_t& value) - { - m_value = value; - m_buffer_valid = false; - } - constexpr DiscreteTypeInfo AttributeAdapter::type_info; - const int64_t& AttributeAdapter::get() - { - if (!m_buffer_valid) - { - m_buffer = m_value; - m_buffer_valid = true; - } - return m_buffer; - } - - void AttributeAdapter::set(const int64_t& value) - { - m_value = value; - m_buffer_valid = false; - } - constexpr DiscreteTypeInfo AttributeAdapter::type_info; - const int64_t& AttributeAdapter::get() - { - if (!m_buffer_valid) - { - m_buffer = m_value; - m_buffer_valid = true; - } - return m_buffer; - } - - void AttributeAdapter::set(const int64_t& value) - { - m_value = value; - m_buffer_valid = false; - } - #ifdef __APPLE__ // size_t is not uint_64t on OSX constexpr DiscreteTypeInfo AttributeAdapter::type_info; - const int64_t& AttributeAdapter::get() - { - if (!m_buffer_valid) - { - m_buffer = m_value; - m_buffer_valid = true; - } - return m_buffer; - } - - void AttributeAdapter::set(const int64_t& value) - { - m_value = value; - m_buffer_valid = false; - } -#endif - - // vector - constexpr DiscreteTypeInfo AttributeAdapter>::type_info; - - const vector& AttributeAdapter>::get() { return m_value; } - void AttributeAdapter>::set(const vector& value) { m_value = value; } - // vector - constexpr DiscreteTypeInfo AttributeAdapter>::type_info; - - const vector& AttributeAdapter>::get() { return m_value; } - void AttributeAdapter>::set(const vector& value) { m_value = value; } - // vector - constexpr DiscreteTypeInfo AttributeAdapter>::type_info; - - const vector& AttributeAdapter>::get() { return m_value; } - void AttributeAdapter>::set(const vector& value) { m_value = value; } - // vector - constexpr DiscreteTypeInfo AttributeAdapter>::type_info; - - const vector& AttributeAdapter>::get() { return m_value; } - void AttributeAdapter>::set(const vector& value) { m_value = value; } - // vector - constexpr DiscreteTypeInfo AttributeAdapter>::type_info; - - const vector& AttributeAdapter>::get() - { - if (!m_buffer_valid) - { - m_buffer = copy_from>(m_value); - m_buffer_valid = true; - } - return m_buffer; - } - - void AttributeAdapter>::set(const vector& value) - { - m_value = copy_from>(value); - m_buffer_valid = false; - } - - // vector - constexpr DiscreteTypeInfo AttributeAdapter>::type_info; - - const vector& AttributeAdapter>::get() - { - if (!m_buffer_valid) - { - m_buffer = copy_from>(m_value); - m_buffer_valid = true; - } - return m_buffer; - } - - void AttributeAdapter>::set(const vector& value) - { - m_value = copy_from>(value); - m_buffer_valid = false; - } - - // vector - constexpr DiscreteTypeInfo AttributeAdapter>::type_info; - - const vector& AttributeAdapter>::get() - { - if (!m_buffer_valid) - { - m_buffer = copy_from>(m_value); - m_buffer_valid = true; - } - return m_buffer; - } - - void AttributeAdapter>::set(const vector& value) - { - m_value = copy_from>(value); - m_buffer_valid = false; - } - - // vector - constexpr DiscreteTypeInfo AttributeAdapter>::type_info; - - const vector& AttributeAdapter>::get() - { - if (!m_buffer_valid) - { - m_buffer = copy_from>(m_value); - m_buffer_valid = true; - } - return m_buffer; - } - - void AttributeAdapter>::set(const vector& value) - { - m_value = copy_from>(value); - m_buffer_valid = false; - } - -#ifdef __APPLE__ - // size_t is not uint64_t on OSX - // vector constexpr DiscreteTypeInfo AttributeAdapter>::type_info; - - const vector& AttributeAdapter>::get() - { - if (!m_buffer_valid) - { - m_buffer = copy_from>(m_value); - m_buffer_valid = true; - } - return m_buffer; - } - - void AttributeAdapter>::set(const vector& value) - { - m_value = copy_from>(value); - m_buffer_valid = false; - } #endif - - /// vector + constexpr DiscreteTypeInfo AttributeAdapter>::type_info; + constexpr DiscreteTypeInfo AttributeAdapter>::type_info; + constexpr DiscreteTypeInfo AttributeAdapter>::type_info; + constexpr DiscreteTypeInfo AttributeAdapter>::type_info; + constexpr DiscreteTypeInfo AttributeAdapter>::type_info; + constexpr DiscreteTypeInfo AttributeAdapter>::type_info; + constexpr DiscreteTypeInfo AttributeAdapter>::type_info; + constexpr DiscreteTypeInfo AttributeAdapter>::type_info; constexpr DiscreteTypeInfo AttributeAdapter>::type_info; - - const vector& AttributeAdapter>::get() - { - if (!m_buffer_valid) - { - m_buffer = copy_from>(m_value); - m_buffer_valid = true; - } - return m_buffer; - } - - void AttributeAdapter>::set(const vector& value) - { - m_value = copy_from>(value); - m_buffer_valid = false; - } - - /// vector constexpr DiscreteTypeInfo AttributeAdapter>::type_info; - - const vector& AttributeAdapter>::get() - { - if (!m_buffer_valid) - { - m_buffer = copy_from>(m_value); - m_buffer_valid = true; - } - return m_buffer; - } - - void AttributeAdapter>::set(const vector& value) - { - m_value = copy_from>(value); - m_buffer_valid = false; - } - - /// vector constexpr DiscreteTypeInfo AttributeAdapter>::type_info; - - const vector& AttributeAdapter>::get() - { - if (!m_buffer_valid) - { - m_buffer = copy_from>(m_value); - m_buffer_valid = true; - } - return m_buffer; - } - - void AttributeAdapter>::set(const vector& value) - { - m_value = copy_from>(value); - m_buffer_valid = false; - } } diff --git a/ngraph/src/ngraph/attribute_adapter.hpp b/ngraph/src/ngraph/attribute_adapter.hpp index 21d7a8e3f72..e49eed83e80 100644 --- a/ngraph/src/ngraph/attribute_adapter.hpp +++ b/ngraph/src/ngraph/attribute_adapter.hpp @@ -23,278 +23,378 @@ #include "ngraph/enum_names.hpp" #include "ngraph/type.hpp" +/// namespace ngraph { - template + class AttributeVisitor; + + /// \brief Provides access to an attribute of type AT as a value accessor type VAT + template class ValueAccessor; /// \brief ValueAccessor provides an accessor for values that do not have get/set methonds + /// via AttributeVistor.on_adapter. + /// + /// All ValueAccessors must be derived from ValueAccessor so that an AttributeVisitor + /// only needs to implement a subset of the on_adapter methods. template <> class NGRAPH_API ValueAccessor { public: + /// \brief type info enables identification of the value accessor, as well as is_type and + /// as_type. virtual const DiscreteTypeInfo& get_type_info() const = 0; virtual ~ValueAccessor() {} }; - /// \brief Provides access to values via get/set methods - /// \tparam T The type of the value; may be wider than the value being accessed. - template + /// \brief Provides access to values via get/set methods from an m_value, typically from + /// ValueReference + /// + /// The m_buffer holds a VAT, which may be wider than the attribute AT. For example, serializers + /// that only + /// support int64_t integers would use a ValueAccessor> to reference a + /// vector attribute. Destruction moves the value back to the attribute if it was + /// changed. + /// \tparam VAT The adapter value type; may be wider than the value being accessed. + template class ValueAccessor : public ValueAccessor { public: - virtual const DiscreteTypeInfo& get_type_info() const = 0; /// Returns the value - virtual const T& get() = 0; + virtual const VAT& get() = 0; /// Sets the value - virtual void set(const T& value) = 0; + virtual void set(const VAT& value) = 0; + }; + + template <> + class ValueAccessor : public ValueAccessor + { + public: + virtual void* get_ptr() = 0; + virtual size_t size() = 0; + }; + + template + class DirectValueAccessor : public ValueAccessor + { + public: + DirectValueAccessor(AT& ref) + : m_ref(ref) + { + } + const AT& get() override { return m_ref; } + void set(const AT& value) override { m_ref = value; } + protected: + AT& m_ref; + }; + + template + class IndirectScalarValueAccessor : public ValueAccessor + { + public: + IndirectScalarValueAccessor(AT& ref) + : m_ref(ref) + { + } + + const VAT& get() override + { + if (!m_buffer_valid) + { + m_buffer = static_cast(m_ref); + m_buffer_valid = true; + } + return m_buffer; + } + + void set(const VAT& value) override + { + m_ref = static_cast(value); + m_buffer_valid = false; + } protected: - T m_buffer; + AT& m_ref; + VAT m_buffer; bool m_buffer_valid{false}; }; - /// \brief holds a reference to a value - /// \tparam Type the type of the referenced value - template - class ValueReference + template + A copy_from(B& b) + { + A result(b.size()); + for (size_t i = 0; i < b.size(); ++i) + { + result[i] = + static_cast::type>(b[i]); + } + return result; + } + + template + class IndirectVectorValueAccessor : public ValueAccessor { public: - operator Type&() const { return m_value; } - protected: - ValueReference(Type& value) - : m_value(value) + IndirectVectorValueAccessor(AT& ref) + : m_ref(ref) { } - Type& m_value; + + const VAT& get() override + { + if (!m_buffer_valid) + { + m_buffer = copy_from::type>(m_ref); + m_buffer_valid = true; + } + return m_buffer; + } + + void set(const VAT& value) override + { + m_ref = copy_from(value); + m_buffer_valid = false; + } + + operator AT&() { return m_ref; } + protected: + AT& m_ref; + VAT m_buffer; + bool m_buffer_valid{false}; }; - template + /// \brief An AttributeAdapter "captures" an attribute as an AT& and makes it available as a + /// ValueAccessor. + template class AttributeAdapter { }; /// \brief Access an enum via a string - /// \tparam Type The enum class - template - class EnumAttributeAdapterBase : public ValueReference, public ValueAccessor + /// \tparam AT The attribute type enum class + template + class EnumAttributeAdapterBase : public ValueAccessor { public: - EnumAttributeAdapterBase(Type& value) - : ValueReference(value) + EnumAttributeAdapterBase(AT& value) + : m_ref(value) { } - const std::string& get() override { return as_string(ValueReference::m_value); } - void set(const std::string& value) override - { - ValueReference::m_value = as_enum(value); - } + const std::string& get() override { return as_string(m_ref); } + void set(const std::string& value) override { m_ref = as_enum(value); } + operator AT&() { return m_ref; } + protected: + AT& m_ref; + }; + + /// Adapters will see visitor + class VisitorAdapter : public ValueAccessor + { + public: + virtual bool visit_attributes(AttributeVisitor& visitor) = 0; }; - /// \brief Access a float as a double template <> - class NGRAPH_API AttributeAdapter : public ValueReference, - public ValueAccessor + class NGRAPH_API AttributeAdapter : public IndirectScalarValueAccessor { public: AttributeAdapter(float& value) - : ValueReference(value) + : IndirectScalarValueAccessor(value) { } static constexpr DiscreteTypeInfo type_info{"AttributeAdapter", 0}; const DiscreteTypeInfo& get_type_info() const override { return type_info; } - const double& get() override; - void set(const double& value) override; }; /// \brief Access a double as a double template <> - class NGRAPH_API AttributeAdapter : public ValueReference, - public ValueAccessor + class NGRAPH_API AttributeAdapter : public DirectValueAccessor { public: AttributeAdapter(double& value) - : ValueReference(value) + : DirectValueAccessor(value) { } static constexpr DiscreteTypeInfo type_info{"AttributeAdapter", 0}; const DiscreteTypeInfo& get_type_info() const override { return type_info; } - const double& get() override; - void set(const double& value) override; + }; + + /// \brief Access a string as a string + template <> + class NGRAPH_API AttributeAdapter : public DirectValueAccessor + { + public: + AttributeAdapter(std::string& value) + : DirectValueAccessor(value) + { + } + + static constexpr DiscreteTypeInfo type_info{"AttributeAdapter", 0}; + const DiscreteTypeInfo& get_type_info() const override { return type_info; } }; /// \brief Access a bool as a bool template <> - class NGRAPH_API AttributeAdapter : public ValueReference, - public ValueAccessor + class NGRAPH_API AttributeAdapter : public DirectValueAccessor { public: AttributeAdapter(bool& value) - : ValueReference(value) + : DirectValueAccessor(value) { } static constexpr DiscreteTypeInfo type_info{"AttributeAdapter", 0}; const DiscreteTypeInfo& get_type_info() const override { return type_info; } - const bool& get() override; - void set(const bool& value) override; }; - /// \brief Access an int8_t and an int16_t + /// \brief Access an int8_t and an int64_t template <> - class NGRAPH_API AttributeAdapter : public ValueReference, - public ValueAccessor + class NGRAPH_API AttributeAdapter : public IndirectScalarValueAccessor { public: AttributeAdapter(int8_t& value) - : ValueReference(value) + : IndirectScalarValueAccessor(value) { } static constexpr DiscreteTypeInfo type_info{"AttributeAdapter", 0}; const DiscreteTypeInfo& get_type_info() const override { return type_info; } - const int64_t& get() override; - void set(const int64_t& value) override; }; /// \brief Access an int16_t as an int64_t template <> - class NGRAPH_API AttributeAdapter : public ValueReference, - public ValueAccessor + class NGRAPH_API AttributeAdapter + : public IndirectScalarValueAccessor { public: AttributeAdapter(int16_t& value) - : ValueReference(value) + : IndirectScalarValueAccessor(value) { } static constexpr DiscreteTypeInfo type_info{"AttributeAdapter", 0}; const DiscreteTypeInfo& get_type_info() const override { return type_info; } - const int64_t& get() override; - void set(const int64_t& value) override; }; /// \brief Access an int32_t as an int64_t template <> - class NGRAPH_API AttributeAdapter : public ValueReference, - public ValueAccessor + class NGRAPH_API AttributeAdapter + : public IndirectScalarValueAccessor { public: AttributeAdapter(int32_t& value) - : ValueReference(value) + : IndirectScalarValueAccessor(value) { } static constexpr DiscreteTypeInfo type_info{"AttributeAdapter", 0}; const DiscreteTypeInfo& get_type_info() const override { return type_info; } - const int64_t& get() override; - void set(const int64_t& value) override; }; /// \brief Access an int64_t as an int64_t template <> - class NGRAPH_API AttributeAdapter : public ValueReference, - public ValueAccessor + class NGRAPH_API AttributeAdapter : public DirectValueAccessor { public: AttributeAdapter(int64_t& value) - : ValueReference(value) + : DirectValueAccessor(value) { } static constexpr DiscreteTypeInfo type_info{"AttributeAdapter", 0}; const DiscreteTypeInfo& get_type_info() const override { return type_info; } - const int64_t& get() override; - void set(const int64_t& value) override; }; /// \brief Access a uint8_t as an int64_t template <> - class NGRAPH_API AttributeAdapter : public ValueReference, - public ValueAccessor + class NGRAPH_API AttributeAdapter + : public IndirectScalarValueAccessor { public: AttributeAdapter(uint8_t& value) - : ValueReference(value) + : IndirectScalarValueAccessor(value) { } static constexpr DiscreteTypeInfo type_info{"AttributeAdapter", 0}; const DiscreteTypeInfo& get_type_info() const override { return type_info; } - const int64_t& get() override; - void set(const int64_t& value) override; }; /// \brief Access a uint16_t as an int64_t template <> - class NGRAPH_API AttributeAdapter : public ValueReference, - public ValueAccessor + class NGRAPH_API AttributeAdapter + : public IndirectScalarValueAccessor { public: AttributeAdapter(uint16_t& value) - : ValueReference(value) + : IndirectScalarValueAccessor(value) { } static constexpr DiscreteTypeInfo type_info{"AttributeAdapter", 0}; const DiscreteTypeInfo& get_type_info() const override { return type_info; } - const int64_t& get() override; - void set(const int64_t& value) override; }; /// \brief Access a uint32_t as an int64_t template <> - class NGRAPH_API AttributeAdapter : public ValueReference, - public ValueAccessor + class NGRAPH_API AttributeAdapter + : public IndirectScalarValueAccessor { public: AttributeAdapter(uint32_t& value) - : ValueReference(value) + : IndirectScalarValueAccessor(value) { } static constexpr DiscreteTypeInfo type_info{"AttributeAdapter", 0}; const DiscreteTypeInfo& get_type_info() const override { return type_info; } - const int64_t& get() override; - void set(const int64_t& value) override; }; /// \brief Access a uint64_t as an int64_t template <> - class NGRAPH_API AttributeAdapter : public ValueReference, - public ValueAccessor + class NGRAPH_API AttributeAdapter + : public IndirectScalarValueAccessor { public: AttributeAdapter(uint64_t& value) - : ValueReference(value) + : IndirectScalarValueAccessor(value) { } static constexpr DiscreteTypeInfo type_info{"AttributeAdapter", 0}; const DiscreteTypeInfo& get_type_info() const override { return type_info; } - const int64_t& get() override; - void set(const int64_t& value) override; }; #ifdef __APPLE__ // size_t is one of the uint types on _WIN32 template <> - class NGRAPH_API AttributeAdapter : public ValueReference, - public ValueAccessor + class NGRAPH_API AttributeAdapter : public IndirectScalarValueAccessor { public: AttributeAdapter(size_t& value) - : ValueReference(value) + : IndirectScalarValueAccessor(value) { } static constexpr DiscreteTypeInfo type_info{"AttributeAdapter", 0}; const DiscreteTypeInfo& get_type_info() const override { return type_info; } - const int64_t& get() override; - void set(const int64_t& value) override; + }; + + template <> + class NGRAPH_API AttributeAdapter> + : public IndirectVectorValueAccessor, std::vector> + { + public: + AttributeAdapter(std::vector& value) + : IndirectVectorValueAccessor, std::vector>(value) + { + } + + static constexpr DiscreteTypeInfo type_info{"AttributeAdapter>", 0}; + const DiscreteTypeInfo& get_type_info() const override { return type_info; } }; #endif @@ -304,219 +404,165 @@ namespace ngraph /// \brief Access a vector template <> class NGRAPH_API AttributeAdapter> - : public ValueReference>, public ValueAccessor> + : public DirectValueAccessor> { public: AttributeAdapter(std::vector& value) - : ValueReference>(value) + : DirectValueAccessor>(value) { } static constexpr DiscreteTypeInfo type_info{"AttributeAdapter>", 0}; const DiscreteTypeInfo& get_type_info() const override { return type_info; } - const std::vector& get() override; - void set(const std::vector& value) override; }; /// \brief Access a vector template <> class NGRAPH_API AttributeAdapter> - : public ValueReference>, public ValueAccessor> + : public DirectValueAccessor> { public: AttributeAdapter(std::vector& value) - : ValueReference>(value) + : DirectValueAccessor>(value) { } static constexpr DiscreteTypeInfo type_info{"AttributeAdapter>", 0}; const DiscreteTypeInfo& get_type_info() const override { return type_info; } - const std::vector& get() override; - void set(const std::vector& value) override; }; /// \brief Access a vector template <> class NGRAPH_API AttributeAdapter> - : public ValueReference>, public ValueAccessor> + : public DirectValueAccessor> { public: AttributeAdapter(std::vector& value) - : ValueReference>(value) + : DirectValueAccessor>(value) { } static constexpr DiscreteTypeInfo type_info{"AttributeAdapter>", 0}; const DiscreteTypeInfo& get_type_info() const override { return type_info; } - const std::vector& get() override; - void set(const std::vector& value) override; }; /// \brief Access a vector template <> class NGRAPH_API AttributeAdapter> - : public ValueReference>, public ValueAccessor> + : public DirectValueAccessor> { public: AttributeAdapter(std::vector& value) - : ValueReference>(value) + : DirectValueAccessor>(value) { } static constexpr DiscreteTypeInfo type_info{"AttributeAdapter>", 0}; const DiscreteTypeInfo& get_type_info() const override { return type_info; } - const std::vector& get() override; - void set(const std::vector& value) override; }; - /// \brief Access a vector as a vector + /// \brief Access a vector template <> class NGRAPH_API AttributeAdapter> - : public ValueReference>, public ValueAccessor> + : public DirectValueAccessor> { public: AttributeAdapter(std::vector& value) - : ValueReference>(value) + : DirectValueAccessor>(value) { } static constexpr DiscreteTypeInfo type_info{"AttributeAdapter>", 0}; const DiscreteTypeInfo& get_type_info() const override { return type_info; } - const std::vector& get() override; - void set(const std::vector& value) override; }; - /// \brief Access a vector as a vector + /// \brief Access a vector template <> class NGRAPH_API AttributeAdapter> - : public ValueReference>, public ValueAccessor> + : public DirectValueAccessor> { public: AttributeAdapter(std::vector& value) - : ValueReference>(value) + : DirectValueAccessor>(value) { } static constexpr DiscreteTypeInfo type_info{"AttributeAdapter>", 0}; const DiscreteTypeInfo& get_type_info() const override { return type_info; } - const std::vector& get() override; - void set(const std::vector& value) override; }; - /// \brief Access a vector as a vector + /// \brief Access a vector template <> class NGRAPH_API AttributeAdapter> - : public ValueReference>, public ValueAccessor> + : public DirectValueAccessor> { public: AttributeAdapter(std::vector& value) - : ValueReference>(value) + : DirectValueAccessor>(value) { } static constexpr DiscreteTypeInfo type_info{"AttributeAdapter>", 0}; const DiscreteTypeInfo& get_type_info() const override { return type_info; } - const std::vector& get() override; - void set(const std::vector& value) override; }; - /// \brief Access a vector as a vector + /// \brief Access a vector template <> class NGRAPH_API AttributeAdapter> - : public ValueReference>, public ValueAccessor> + : public DirectValueAccessor> { public: AttributeAdapter(std::vector& value) - : ValueReference>(value) + : DirectValueAccessor>(value) { } static constexpr DiscreteTypeInfo type_info{"AttributeAdapter>", 0}; const DiscreteTypeInfo& get_type_info() const override { return type_info; } - const std::vector& get() override; - void set(const std::vector& value) override; }; -#ifdef __APPLE__ - // size_t is not uint64_t on OSX - template <> - class NGRAPH_API AttributeAdapter> - : public ValueReference>, public ValueAccessor> - { - public: - AttributeAdapter(std::vector& value) - : ValueReference>(value) - { - } - - static constexpr DiscreteTypeInfo type_info{"AttributeAdapter>", 0}; - const DiscreteTypeInfo& get_type_info() const override { return type_info; } - const std::vector& get() override; - void set(const std::vector& value) override; - }; -#endif - /// \brief Access a vector template <> class NGRAPH_API AttributeAdapter> - : public ValueReference>, public ValueAccessor> + : public DirectValueAccessor> { public: AttributeAdapter(std::vector& value) - : ValueReference>(value) + : DirectValueAccessor>(value) { } static constexpr DiscreteTypeInfo type_info{"AttributeAdapter>", 0}; const DiscreteTypeInfo& get_type_info() const override { return type_info; } - const std::vector& get() override; - void set(const std::vector& value) override; }; /// \brief Access a vector template <> class NGRAPH_API AttributeAdapter> - : public ValueReference>, public ValueAccessor> + : public DirectValueAccessor> { public: AttributeAdapter(std::vector& value) - : ValueReference>(value) + : DirectValueAccessor>(value) { } static constexpr DiscreteTypeInfo type_info{"AttributeAdapter>", 0}; const DiscreteTypeInfo& get_type_info() const override { return type_info; } - const std::vector& get() override; - void set(const std::vector& value) override; }; /// \brief Access a vector template <> class NGRAPH_API AttributeAdapter> - : public ValueReference>, - public ValueAccessor> + : public DirectValueAccessor> { public: AttributeAdapter(std::vector& value) - : ValueReference>(value) + : DirectValueAccessor>(value) { } static constexpr DiscreteTypeInfo type_info{"AttributeAdapter>", 0}; const DiscreteTypeInfo& get_type_info() const override { return type_info; } - const std::vector& get() override; - void set(const std::vector& value) override; }; - - template - A copy_from(B& b) - { - A result(b.size()); - for (int i = 0; i < b.size(); ++i) - { - result[i] = - static_cast::type>(b[i]); - } - return result; - } } diff --git a/ngraph/src/ngraph/attribute_visitor.cpp b/ngraph/src/ngraph/attribute_visitor.cpp new file mode 100644 index 00000000000..b3fc8ccf3b3 --- /dev/null +++ b/ngraph/src/ngraph/attribute_visitor.cpp @@ -0,0 +1,196 @@ +//***************************************************************************** +// Copyright 2017-2020 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** + +#include "ngraph/attribute_visitor.hpp" +#include "ngraph/attribute_adapter.hpp" +#include "ngraph/node.hpp" + +using namespace std; +using namespace ngraph; + +void AttributeVisitor::start_structure(const string& name) +{ + m_context.push_back(name); +} + +string AttributeVisitor::finish_structure() +{ + string result = m_context.back(); + m_context.pop_back(); + return result; +} + +string AttributeVisitor::get_name_with_context() +{ + ostringstream result; + string sep = ""; + for (auto c : m_context) + { + result << sep << c; + sep = "."; + } + return result.str(); +} + +void AttributeVisitor::on_adapter(const std::string& name, VisitorAdapter& adapter) +{ + adapter.visit_attributes(*this); +} + +void AttributeVisitor::on_adapter(const std::string& name, ValueAccessor& adapter) +{ + on_adapter(name, static_cast&>(adapter)); +} + +void AttributeVisitor::on_adapter(const string& name, ValueAccessor& adapter) +{ + on_adapter(name, static_cast&>(adapter)); +}; + +void AttributeVisitor::on_adapter(const string& name, ValueAccessor& adapter) +{ + on_adapter(name, static_cast&>(adapter)); +}; + +void AttributeVisitor::on_adapter(const string& name, ValueAccessor& adapter) +{ + on_adapter(name, static_cast&>(adapter)); +} + +void AttributeVisitor::on_adapter(const string& name, ValueAccessor& adapter) +{ + on_adapter(name, static_cast&>(adapter)); +} + +void AttributeVisitor::on_adapter(const string& name, ValueAccessor& adapter) +{ + on_adapter(name, static_cast&>(adapter)); +} + +void AttributeVisitor::on_adapter(const string& name, ValueAccessor& adapter) +{ + on_adapter(name, static_cast&>(adapter)); +} + +void AttributeVisitor::on_adapter(const string& name, ValueAccessor& adapter) +{ + on_adapter(name, static_cast&>(adapter)); +} + +void AttributeVisitor::on_adapter(const string& name, ValueAccessor& adapter) +{ + on_adapter(name, static_cast&>(adapter)); +} + +void AttributeVisitor::on_adapter(const string& name, ValueAccessor& adapter) +{ + on_adapter(name, static_cast&>(adapter)); +} + +void AttributeVisitor::on_adapter(const string& name, ValueAccessor& adapter) +{ + on_adapter(name, static_cast&>(adapter)); +} + +void AttributeVisitor::on_adapter(const string& name, ValueAccessor& adapter) +{ + on_adapter(name, static_cast&>(adapter)); +} + +void AttributeVisitor::on_adapter(const string& name, ValueAccessor& adapter) +{ + on_adapter(name, static_cast&>(adapter)); +} + +void AttributeVisitor::on_adapter(const string& name, ValueAccessor>& adapter) +{ + on_adapter(name, static_cast&>(adapter)); +} + +void AttributeVisitor::on_adapter(const string& name, ValueAccessor>& adapter) +{ + on_adapter(name, static_cast&>(adapter)); +} + +void AttributeVisitor::on_adapter(const string& name, ValueAccessor>& adapter) +{ + on_adapter(name, static_cast&>(adapter)); +} + +void AttributeVisitor::on_adapter(const string& name, ValueAccessor>& adapter) +{ + on_adapter(name, static_cast&>(adapter)); +} + +void AttributeVisitor::on_adapter(const string& name, ValueAccessor>& adapter) +{ + on_adapter(name, static_cast&>(adapter)); +} + +void AttributeVisitor::on_adapter(const string& name, ValueAccessor>& adapter) +{ + on_adapter(name, static_cast&>(adapter)); +} + +void AttributeVisitor::on_adapter(const string& name, ValueAccessor>& adapter) +{ + on_adapter(name, static_cast&>(adapter)); +} + +void AttributeVisitor::on_adapter(const string& name, ValueAccessor>& adapter) +{ + on_adapter(name, static_cast&>(adapter)); +} + +void AttributeVisitor::on_adapter(const string& name, ValueAccessor>& adapter) +{ + on_adapter(name, static_cast&>(adapter)); +} + +void AttributeVisitor::on_adapter(const string& name, ValueAccessor>& adapter) +{ + on_adapter(name, static_cast&>(adapter)); +} + +void AttributeVisitor::on_adapter(const string& name, ValueAccessor>& adapter) +{ + on_adapter(name, static_cast&>(adapter)); +} + +const AttributeVisitor::node_id_t AttributeVisitor::invalid_node_id = ""; + +void AttributeVisitor::register_node(const std::shared_ptr& node, node_id_t id) +{ + if (id == invalid_node_id) + { + id = node->get_friendly_name(); + } + m_id_node_map[id] = node; + m_node_id_map[node] = id; +} + +std::shared_ptr AttributeVisitor::get_registered_node(node_id_t id) +{ + auto it = m_id_node_map.find(id); + return it == m_id_node_map.end() ? shared_ptr() : it->second; +} + +AttributeVisitor::node_id_t + AttributeVisitor::get_registered_node_id(const std::shared_ptr& node) +{ + auto it = m_node_id_map.find(node); + return it == m_node_id_map.end() ? invalid_node_id : it->second; +} diff --git a/ngraph/src/ngraph/attribute_visitor.hpp b/ngraph/src/ngraph/attribute_visitor.hpp index 9813e1adfc6..b837444bacc 100644 --- a/ngraph/src/ngraph/attribute_visitor.hpp +++ b/ngraph/src/ngraph/attribute_visitor.hpp @@ -17,6 +17,7 @@ #pragma once #include +#include #include #include "ngraph/partial_shape.hpp" @@ -27,137 +28,128 @@ namespace ngraph { template class ValueAccessor; + class VisitorAdapter; + class Node; - /// \brief Visits the attributes of a node. + /// \brief Visits the attributes of a node, primarily for serialization-like tasks. /// - /// Attributes are the values set when building a graph which are not - /// computed as the graph executes. Values computed from the graph topology and attributes - /// during compilation are not attributes. + /// Attributes are the node parameters that are always compile-time constants. + /// Values computed from the graph topology and attributes during compilation are not + /// attributes. + /// + /// Attributes have a wide variety of types, but serialization formats are more restricted. + /// We asume serialation easily supports scalar types of bool 64-bit signed, string, and double, + /// and has specialized ways to support numeric arrays and raw data+size. The visitor and + /// adapter convert between the limited serialization types and the unlimited attribute types. + /// + /// A visitor is passed to an op's visit_attributes method. The visit_attributes method calls + /// the template method visitor.on_attribute(const std::string& name, AT& value) on each + /// attribute. The visitor can read or write the attribute's value. The on_attribute + /// method creates an AttributeAdapter for the value and passes it to one of the visitors + /// on_adapter methods. The on_adapter methods expect a reference to a ValueAccessor or a + /// VisitorAdapter. A ValueAccessor has get/set methods that can be used to read/write the + /// attribute value as type VAT. These methods are triggered by deriving AttributeAdapter + /// from ValueAccessor. For more complex cases, such as structs, the on_adapter method for + /// VisitorAdapter passes the name and visitor to the adapter, so that the adapter can perform + /// additional work such as visiting struct members or sequence values. + /// + /// When a node visits an attribute with structure, the node's on_attribute passes a name for + /// the entire attribute, but the struct will have its own methods to be visited. Similarly, a + /// vector will have a sequence of members to be visited. The adapter may use the visitor + /// methods start_struct/finish_struct and start_vector/next_vector/finish_vector to inidicate + /// nexted members. + /// + /// The visitor method get_name_with_context creates a generic nested version of the name. + /// Visitors can override according to their serialization requirements. + /// + /// Attributes that are shared_ptr are special. They must have been already been + /// registered with the visitor using register_node, which needs a shared pointer to a node and + /// a string ID. The ID string will be used to serialize the node or find the node during + /// deserialization. class NGRAPH_API AttributeVisitor { public: virtual ~AttributeVisitor() {} // Must implement these methods - virtual void on_attribute(const std::string& name, std::string& value) = 0; - virtual void on_attribute(const std::string& name, bool& value) = 0; - virtual void on_attribute(const std::string& name, void* data, size_t size) {} + /// \brief handles all specialized on_adapter methods implemented by the visitor. + /// + /// The adapter implements get_type_info(), which can be used to determine the adapter + /// directly + /// or via is_type and as_type on any platform virtual void on_adapter(const std::string& name, ValueAccessor& adapter) = 0; // The remaining adapter methods fall back on the void adapter if not implemented - virtual void on_adapter(const std::string& name, ValueAccessor& adapter) - { - on_adapter(name, static_cast&>(adapter)); - }; - virtual void on_adapter(const std::string& name, ValueAccessor& adapter) - { - on_adapter(name, static_cast&>(adapter)); - } - virtual void on_adapter(const std::string& name, ValueAccessor& adapter) - { - on_adapter(name, static_cast&>(adapter)); - } - virtual void on_adapter(const std::string& name, ValueAccessor& adapter) - { - on_adapter(name, static_cast&>(adapter)); - } - virtual void on_adapter(const std::string& name, ValueAccessor& adapter) - { - on_adapter(name, static_cast&>(adapter)); - } - virtual void on_adapter(const std::string& name, ValueAccessor& adapter) - { - on_adapter(name, static_cast&>(adapter)); - } - virtual void on_adapter(const std::string& name, ValueAccessor& adapter) - { - on_adapter(name, static_cast&>(adapter)); - } - virtual void on_adapter(const std::string& name, ValueAccessor& adapter) - { - on_adapter(name, static_cast&>(adapter)); - } - virtual void on_adapter(const std::string& name, ValueAccessor& adapter) - { - on_adapter(name, static_cast&>(adapter)); - } - virtual void on_adapter(const std::string& name, ValueAccessor& adapter) - { - on_adapter(name, static_cast&>(adapter)); - } - virtual void on_adapter(const std::string& name, ValueAccessor& adapter) - { - on_adapter(name, static_cast&>(adapter)); - } + virtual void on_adapter(const std::string& name, ValueAccessor& adapter); + virtual void on_adapter(const std::string& name, ValueAccessor& adapter); + virtual void on_adapter(const std::string& name, ValueAccessor& adapter); + virtual void on_adapter(const std::string& name, ValueAccessor& adapter); + virtual void on_adapter(const std::string& name, ValueAccessor& adapter); + virtual void on_adapter(const std::string& name, ValueAccessor& adapter); + virtual void on_adapter(const std::string& name, ValueAccessor& adapter); + virtual void on_adapter(const std::string& name, ValueAccessor& adapter); + virtual void on_adapter(const std::string& name, ValueAccessor& adapter); + virtual void on_adapter(const std::string& name, ValueAccessor& adapter); + virtual void on_adapter(const std::string& name, ValueAccessor& adapter); + virtual void on_adapter(const std::string& name, ValueAccessor& adapter); + virtual void on_adapter(const std::string& name, ValueAccessor& adapter); virtual void on_adapter(const std::string& name, - ValueAccessor>& adapter) - { - on_adapter(name, static_cast&>(adapter)); - } + ValueAccessor>& adapter); virtual void on_adapter(const std::string& name, - ValueAccessor>& adapter) - { - on_adapter(name, static_cast&>(adapter)); - } + ValueAccessor>& adapter); virtual void on_adapter(const std::string& name, - ValueAccessor>& adapter) - { - on_adapter(name, static_cast&>(adapter)); - } + ValueAccessor>& adapter); virtual void on_adapter(const std::string& name, - ValueAccessor>& adapter) - { - on_adapter(name, static_cast&>(adapter)); - } + ValueAccessor>& adapter); virtual void on_adapter(const std::string& name, - ValueAccessor>& adapter) - { - on_adapter(name, static_cast&>(adapter)); - } + ValueAccessor>& adapter); virtual void on_adapter(const std::string& name, - ValueAccessor>& adapter) - { - on_adapter(name, static_cast&>(adapter)); - } + ValueAccessor>& adapter); virtual void on_adapter(const std::string& name, - ValueAccessor>& adapter) - { - on_adapter(name, static_cast&>(adapter)); - } + ValueAccessor>& adapter); virtual void on_adapter(const std::string& name, - ValueAccessor>& adapter) - { - on_adapter(name, static_cast&>(adapter)); - } - virtual void on_adapter(const std::string& name, ValueAccessor>& adapter) - { - on_adapter(name, static_cast&>(adapter)); - } + ValueAccessor>& adapter); virtual void on_adapter(const std::string& name, - ValueAccessor>& adapter) - { - on_adapter(name, static_cast&>(adapter)); - } + ValueAccessor>& adapter); virtual void on_adapter(const std::string& name, - ValueAccessor>& adapter) + ValueAccessor>& adapter); + virtual void on_adapter(const std::string& name, + ValueAccessor>& adapter); + /// \brief Hook for adapters that need visitor access + virtual void on_adapter(const std::string& name, VisitorAdapter& adapter); + + /// The generic visitor. There must be a definition of AttributeAdapter that can convert + /// to a ValueAccessor for one of the on_adpater methods. + template + void on_attribute(const std::string& name, AT& value) { - on_adapter(name, static_cast&>(adapter)); - } - // Use an adapter for non-primitive types - template - // typename std::enable_if::value, void>::type - void on_attribute(const std::string& name, T& value) - { - AttributeAdapter adapter(value); - on_adapter(name, adapter); - } - void on_attribute(const std::string& name, op::AutoBroadcastSpec& value) - { - AttributeAdapter adapter(value.m_type); - on_adapter(name, adapter); - } - void on_attribute(const std::string& name, op::BroadcastModeSpec& value) - { - AttributeAdapter adapter(value.m_type); - on_adapter(name, adapter); + AttributeAdapter adapter(value); + start_structure(name); + on_adapter(get_name_with_context(), adapter); + finish_structure(); } + /// \returns The nested context of visits + const std::vector& get_context() const { return m_context; } + /// \returns context prepended to names + virtual std::string get_name_with_context(); + /// \brief Start visiting a nested structure + virtual void start_structure(const std::string& name); + /// \brief Finish visiting a nested structure + virtual std::string finish_structure(); + using node_id_t = std::string; + static const node_id_t invalid_node_id; + /// \brief Associate a node with an id. + /// + /// No node may be used as an attribute unless it has already been registered with an ID. + /// References to nodes are visited with a ValueAccessor of their ID. + virtual void register_node(const std::shared_ptr& node, + node_id_t id = invalid_node_id); + /// Returns the node with the given id, or nullptr if there is no registered node + virtual std::shared_ptr get_registered_node(node_id_t id); + /// Returns the id for the node, or -1 if the node is not registered + virtual node_id_t get_registered_node_id(const std::shared_ptr& node); + + protected: + std::vector m_context; + std::unordered_map, node_id_t> m_node_id_map; + std::unordered_map> m_id_node_map; }; } diff --git a/ngraph/src/ngraph/axis_set.cpp b/ngraph/src/ngraph/axis_set.cpp index 5ffac23bbfc..4978518d8df 100644 --- a/ngraph/src/ngraph/axis_set.cpp +++ b/ngraph/src/ngraph/axis_set.cpp @@ -71,20 +71,22 @@ const std::vector& ngraph::AttributeAdapter::get() { if (!m_buffer_valid) { - for (auto elt : m_value) + m_buffer.clear(); + for (auto elt : m_ref) { m_buffer.push_back(elt); } + m_buffer_valid = true; } return m_buffer; } void ngraph::AttributeAdapter::set(const std::vector& value) { - m_value = AxisSet(); + m_ref = AxisSet(); for (auto elt : value) { - m_value.insert(elt); + m_ref.insert(elt); } m_buffer_valid = false; } diff --git a/ngraph/src/ngraph/axis_set.hpp b/ngraph/src/ngraph/axis_set.hpp index 59b8706f6fc..aad365f8190 100644 --- a/ngraph/src/ngraph/axis_set.hpp +++ b/ngraph/src/ngraph/axis_set.hpp @@ -48,19 +48,22 @@ namespace ngraph }; template <> - class NGRAPH_API AttributeAdapter : public ValueReference, - public ValueAccessor> + class NGRAPH_API AttributeAdapter : public ValueAccessor> { public: AttributeAdapter(AxisSet& value) - : ValueReference(value) + : m_ref(value) { } - static constexpr DiscreteTypeInfo type_info{"AttributeAdapter", 0}; - const DiscreteTypeInfo& get_type_info() const override { return type_info; } const std::vector& get() override; void set(const std::vector& value) override; + static constexpr DiscreteTypeInfo type_info{"AttributeAdapter", 0}; + const DiscreteTypeInfo& get_type_info() const override { return type_info; } + protected: + AxisSet& m_ref; + std::vector m_buffer; + bool m_buffer_valid{false}; }; NGRAPH_API diff --git a/ngraph/src/ngraph/axis_vector.cpp b/ngraph/src/ngraph/axis_vector.cpp index 434c0b975d4..c41b2f7a3a1 100644 --- a/ngraph/src/ngraph/axis_vector.cpp +++ b/ngraph/src/ngraph/axis_vector.cpp @@ -64,3 +64,5 @@ ngraph::AxisVector& ngraph::AxisVector::operator=(AxisVector&& v) noexcept static_cast*>(this)->operator=(v); return *this; } + +constexpr ngraph::DiscreteTypeInfo ngraph::AttributeAdapter::type_info; diff --git a/ngraph/src/ngraph/axis_vector.hpp b/ngraph/src/ngraph/axis_vector.hpp index 1607dd1c4e0..0347a0f44f1 100644 --- a/ngraph/src/ngraph/axis_vector.hpp +++ b/ngraph/src/ngraph/axis_vector.hpp @@ -20,6 +20,7 @@ #include #include +#include "ngraph/attribute_adapter.hpp" #include "ngraph/ngraph_visibility.hpp" namespace ngraph @@ -51,6 +52,20 @@ namespace ngraph NGRAPH_API AxisVector& operator=(AxisVector&& v) noexcept; }; + template <> + class NGRAPH_API AttributeAdapter + : public IndirectVectorValueAccessor> + { + public: + AttributeAdapter(AxisVector& value) + : IndirectVectorValueAccessor>(value) + { + } + + static constexpr DiscreteTypeInfo type_info{"AttributeAdapter", 0}; + const DiscreteTypeInfo& get_type_info() const override { return type_info; } + }; + NGRAPH_API std::ostream& operator<<(std::ostream& s, const AxisVector& axis_vector); } diff --git a/ngraph/src/ngraph/builder/reshape.cpp b/ngraph/src/ngraph/builder/reshape.cpp index e5d6ab4db9c..0ae89840062 100644 --- a/ngraph/src/ngraph/builder/reshape.cpp +++ b/ngraph/src/ngraph/builder/reshape.cpp @@ -258,58 +258,41 @@ namespace ngraph shared_ptr builder::opset1::flatten(const Output& value, int axis) { - if (value.get_partial_shape().is_static()) + // First dimension of output tensor is the product of [d_0, ... d_{axis-1}] dimensions of + // input tensor. The last dimension is the product of the rest of input tensor dimensions: + // [d_{axis}, ..., d_n] + shared_ptr output_shape; + if (axis == 0) { - auto data_shape = value.get_shape(); - // First dimension of output tensor is the product of [d_0, ... d_{axis-1}] dimensions of - // input - // tensor. The last dimension is the product of the rest of input tensor dimensions: - // [d_{axis}, ..., d_n] - size_t first_dim_size = - accumulate(begin(data_shape), next(begin(data_shape), axis), 1UL, multiplies()); - - size_t last_dim_size = - accumulate(next(begin(data_shape), axis), end(data_shape), 1UL, multiplies()); - - return builder::opset1::reshape(value, Shape{first_dim_size, last_dim_size}); + output_shape = ngraph::opset1::Constant::create(element::i64, Shape{2}, {1, -1}); + } + else if (axis == 1) + { + output_shape = ngraph::opset1::Constant::create(element::i64, Shape{2}, {0, -1}); } else { - shared_ptr output_shape; - if (axis == 0) - { - output_shape = ngraph::opset1::Constant::create(element::i64, Shape{2}, {1, -1}); - } - else if (axis == 1) - { - output_shape = ngraph::opset1::Constant::create(element::i64, Shape{2}, {0, -1}); - } - else - { - const auto value_shape = make_shared(value); - const auto value_rank = make_shared(value_shape); - const auto axis_node = get_normalized_axis_node(value_rank, axis); - const auto remaining_part_length = - ngraph::opset1::Constant::create(element::i64, Shape{1}, {-1}); - const auto shape_split_lengths = make_shared( - OutputVector{axis_node, remaining_part_length}, 0); - const auto split_parts = make_shared( - value_shape, - ngraph::opset1::Constant::create(element::i64, Shape{}, {0}), - shape_split_lengths); - // We're reducing vectors thus, just single zero axis to reduce and keep dims to true. - const auto first_part_dim = make_shared( - split_parts->get_output_as_single_output_node(0), - ngraph::opset1::Constant::create(element::i64, Shape{}, {0}), - true); - // TODO, handle edge case where first part is empty - then should equal to one - output_shape = make_shared( - OutputVector{first_part_dim, remaining_part_length}, 0); - } + const auto value_shape = make_shared(value); + const auto value_rank = make_shared(value_shape); + const auto axis_node = get_normalized_axis_node(value_rank, axis); - return make_shared(value, output_shape, true) - ->add_provenance_group_members_above({value}); + const auto first_part_dims = make_shared( + value_shape, + ngraph::opset1::Constant::create(element::i64, {1}, {0}), + axis_node, + vector{}, + vector{}); + const auto first_part_dims_length = make_shared( + first_part_dims, ngraph::opset1::Constant::create(element::i64, {}, {0}), true); + + const auto remaining_part_length = + ngraph::opset1::Constant::create(element::i64, {1}, {-1}); + + output_shape = make_shared( + OutputVector{first_part_dims_length, remaining_part_length}, 0); } + return make_shared(value, output_shape, true) + ->add_provenance_group_members_above({value}); } shared_ptr builder::opset1::expand_dims(const Output& value, size_t axis) diff --git a/ngraph/src/ngraph/coordinate.cpp b/ngraph/src/ngraph/coordinate.cpp index f04f8b30574..ca14ced153f 100644 --- a/ngraph/src/ngraph/coordinate.cpp +++ b/ngraph/src/ngraph/coordinate.cpp @@ -73,20 +73,4 @@ ngraph::Coordinate& ngraph::Coordinate::operator=(Coordinate&& v) noexcept return *this; } -const vector& AttributeAdapter::get() -{ - if (!m_buffer_valid) - { - m_buffer = copy_from>(m_value); - m_buffer_valid = true; - } - return m_buffer; -} - -void AttributeAdapter::set(const vector& value) -{ - m_value = copy_from(m_value); - m_buffer_valid = false; -} - constexpr ngraph::DiscreteTypeInfo ngraph::AttributeAdapter::type_info; diff --git a/ngraph/src/ngraph/coordinate.hpp b/ngraph/src/ngraph/coordinate.hpp index c5476c820a0..c2b2f1b269a 100644 --- a/ngraph/src/ngraph/coordinate.hpp +++ b/ngraph/src/ngraph/coordinate.hpp @@ -54,19 +54,17 @@ namespace ngraph }; template <> - class NGRAPH_API AttributeAdapter : public ValueReference, - public ValueAccessor> + class NGRAPH_API AttributeAdapter + : public IndirectVectorValueAccessor> { public: AttributeAdapter(Coordinate& value) - : ValueReference(value) + : IndirectVectorValueAccessor>(value) { } static constexpr DiscreteTypeInfo type_info{"AttributeAdapter", 0}; const DiscreteTypeInfo& get_type_info() const override { return type_info; } - const std::vector& get() override; - void set(const std::vector& value) override; }; NGRAPH_API diff --git a/ngraph/src/ngraph/coordinate_diff.cpp b/ngraph/src/ngraph/coordinate_diff.cpp index c7aa14e438c..4865dd99c16 100644 --- a/ngraph/src/ngraph/coordinate_diff.cpp +++ b/ngraph/src/ngraph/coordinate_diff.cpp @@ -68,20 +68,4 @@ ngraph::CoordinateDiff& ngraph::CoordinateDiff::operator=(CoordinateDiff&& v) no return *this; } -const vector& AttributeAdapter::get() -{ - if (!m_buffer_valid) - { - m_buffer = copy_from>(m_value); - m_buffer_valid = true; - } - return m_buffer; -} - -void AttributeAdapter::set(const vector& value) -{ - m_value = copy_from(value); - m_buffer_valid = false; -} - constexpr ngraph::DiscreteTypeInfo ngraph::AttributeAdapter::type_info; diff --git a/ngraph/src/ngraph/coordinate_diff.hpp b/ngraph/src/ngraph/coordinate_diff.hpp index c49a00c3314..db172564868 100644 --- a/ngraph/src/ngraph/coordinate_diff.hpp +++ b/ngraph/src/ngraph/coordinate_diff.hpp @@ -53,19 +53,18 @@ namespace ngraph }; template <> - class NGRAPH_API AttributeAdapter : public ValueReference, - public ValueAccessor> + class NGRAPH_API AttributeAdapter + : public IndirectVectorValueAccessor> + { public: AttributeAdapter(CoordinateDiff& value) - : ValueReference(value) + : IndirectVectorValueAccessor>(value) { } static constexpr DiscreteTypeInfo type_info{"AttributeAdapter", 0}; const DiscreteTypeInfo& get_type_info() const override { return type_info; } - const std::vector& get() override; - void set(const std::vector& value) override; }; NGRAPH_API diff --git a/ngraph/src/ngraph/factory_adapter.hpp b/ngraph/src/ngraph/factory_adapter.hpp new file mode 100644 index 00000000000..e57dac90fce --- /dev/null +++ b/ngraph/src/ngraph/factory_adapter.hpp @@ -0,0 +1,71 @@ +//***************************************************************************** +// Copyright 2017-2020 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** + +#pragma once + +#include "ngraph/attribute_adapter.hpp" +#include "ngraph/attribute_visitor.hpp" +#include "ngraph/factory.hpp" + +namespace ngraph +{ + template + class FactoryAttributeAdapter : public VisitorAdapter + { + public: + FactoryAttributeAdapter(std::shared_ptr& ref) + : m_ref(ref) + { + } + + /// \brief Hook for extra processing before other attributes + virtual bool on_start(AttributeVisitor& /* visitor */) { return true; } + /// \brief Hook for extra processing after other attributes + virtual bool on_finish(AttributeVisitor& /* visitor */) { return true; } + bool visit_attributes(AttributeVisitor& visitor) override + { + if (on_start(visitor)) + { + std::string type_info_name; + uint64_t type_info_version; + if (m_ref) + { + auto& type_info = m_ref->get_type_info(); + type_info_name = type_info.name; + type_info_version = type_info.version; + } + visitor.on_attribute("name", type_info_name); + visitor.on_attribute("version", type_info_version); + if (!type_info_name.empty() && !m_ref) + { + m_ref = std::shared_ptr(FactoryRegistry::get().create( + DiscreteTypeInfo{type_info_name.c_str(), type_info_version})); + } + if (m_ref) + { + visitor.start_structure("value"); + m_ref->visit_attributes(visitor); + visitor.finish_structure(); + } + on_finish(visitor); + } + return true; + } + + protected: + std::shared_ptr& m_ref; + }; +} \ No newline at end of file diff --git a/ngraph/src/ngraph/frontend/onnx_import/CMakeLists.txt b/ngraph/src/ngraph/frontend/onnx_import/CMakeLists.txt index ec44743b07a..20782def75e 100644 --- a/ngraph/src/ngraph/frontend/onnx_import/CMakeLists.txt +++ b/ngraph/src/ngraph/frontend/onnx_import/CMakeLists.txt @@ -247,16 +247,18 @@ add_library(onnx_importer SHARED set(ONNX_IMPORT_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR} CACHE INTERNAL "") -target_link_libraries(onnx_importer PRIVATE ngraph onnx onnx_proto) +target_link_libraries(onnx_importer PRIVATE onnx onnx_proto) +target_link_libraries(onnx_importer PUBLIC ngraph) set_target_properties(onnx_importer PROPERTIES CXX_VISIBILITY_PRESET hidden C_VISIBILITY_PRESET hidden VISIBILITY_INLINES_HIDDEN ON POSITION_INDEPENDENT_CODE ON) -target_include_directories(onnx_importer SYSTEM PUBLIC ${ONNX_IMPORT_INCLUDE_DIR}) +target_include_directories(onnx_importer SYSTEM PUBLIC $ + $) target_include_directories(onnx_importer SYSTEM PRIVATE ${NGRAPH_INCLUDE_PATH} - SYSTEM PRIVATE ${ONNX_INCLUDE_DIR} ${ONNX_PROTO_INCLUDE_DIR} ${Protobuf_INCLUDE_DIR}) + ${ONNX_INCLUDE_DIR} ${ONNX_PROTO_INCLUDE_DIR} ${Protobuf_INCLUDE_DIR}) target_compile_definitions(onnx_importer PRIVATE ONNX_OPSET_VERSION=${ONNX_OPSET_VERSION}) @@ -268,4 +270,7 @@ endif() target_compile_definitions(onnx_importer PRIVATE ONNX_IMPORTER_DLL_EXPORTS) -install(TARGETS onnx_importer DESTINATION ${NGRAPH_INSTALL_LIB}) +install(TARGETS onnx_importer EXPORT ngraphTargets + RUNTIME DESTINATION ${NGRAPH_INSTALL_LIB} COMPONENT ngraph + ARCHIVE DESTINATION ${NGRAPH_INSTALL_LIB} COMPONENT ngraph + LIBRARY DESTINATION ${NGRAPH_INSTALL_LIB} COMPONENT ngraph) diff --git a/ngraph/src/ngraph/frontend/onnx_import/op/hardmax.cpp b/ngraph/src/ngraph/frontend/onnx_import/op/hardmax.cpp index 4eb8a550ae9..7c0ed5240e4 100644 --- a/ngraph/src/ngraph/frontend/onnx_import/op/hardmax.cpp +++ b/ngraph/src/ngraph/frontend/onnx_import/op/hardmax.cpp @@ -22,6 +22,7 @@ #include "ngraph/opsets/opset0.hpp" #include "ngraph/validation_util.hpp" #include "utils/common.hpp" +#include "utils/reshape.hpp" namespace ngraph { @@ -35,16 +36,25 @@ namespace ngraph { const auto input = node.get_ng_inputs().at(0); const auto& input_shape = input->get_output_partial_shape(0); - const auto axis = node.get_attribute_value("axis", 1); - const auto normalized_axis = - ngraph::normalize_axis(node.get_description(), axis, input_shape.rank()); + auto axis = node.get_attribute_value("axis", 1); + if (input_shape.rank().is_static()) + { + axis = ngraph::normalize_axis( + node.get_description(), axis, input_shape.rank()); + } // reshape to 2D - "batch size" x "input feature dimensions" (NxD) - const auto coerced_tensor = - ngraph::builder::opset1::flatten(input, normalized_axis); - const auto& coerced_shape = coerced_tensor->get_shape(); - const auto row_size = static_cast(coerced_shape.at(1)); + const auto coerced_tensor = ngraph::builder::opset1::flatten(input, axis); + + const auto coerced_tensor_shape = + std::make_shared(coerced_tensor); + std::shared_ptr row_size = + std::make_shared( + coerced_tensor_shape, + default_opset::Constant::create(element::i64, {1}, {1}), + default_opset::Constant::create(element::i64, {}, {0})); + row_size = ngraph::onnx_import::reshape::interpret_as_scalar(row_size); const auto indices_axis = 1; const auto max_indices = std::make_shared( @@ -56,15 +66,13 @@ namespace ngraph default_opset::TopK::SortType::NONE), 1); - const auto depth = - default_opset::Constant::create(ngraph::element::i64, Shape{}, {row_size}); const auto on_value = default_opset::Constant::create(ngraph::element::i64, Shape{}, {1}); const auto off_value = default_opset::Constant::create(ngraph::element::i64, Shape{}, {0}); const auto results = std::make_shared( - max_indices, depth, on_value, off_value, indices_axis); + max_indices, row_size, on_value, off_value, indices_axis); const auto converted_results = std::make_shared( results, input->get_element_type()); diff --git a/ngraph/src/ngraph/frontend/onnx_import/op/resize.cpp b/ngraph/src/ngraph/frontend/onnx_import/op/resize.cpp index 3bf384d3796..ca055cbb258 100644 --- a/ngraph/src/ngraph/frontend/onnx_import/op/resize.cpp +++ b/ngraph/src/ngraph/frontend/onnx_import/op/resize.cpp @@ -70,9 +70,10 @@ namespace ngraph axes.insert(ax); } - auto attrs = default_opset::Interpolate::InterpolateAttrs(); + auto attrs = ngraph::op::v0::InterpolateAttrs(); attrs.axes = axes; - attrs.mode = as_enum(mode); + attrs.mode = mode; + attrs.align_corners = false; if (scales->is_constant() && data_shape.is_static()) { diff --git a/ngraph/src/ngraph/frontend/onnx_import/ops_bridge.cpp b/ngraph/src/ngraph/frontend/onnx_import/ops_bridge.cpp index f9cefa6b4b8..695958ad096 100644 --- a/ngraph/src/ngraph/frontend/onnx_import/ops_bridge.cpp +++ b/ngraph/src/ngraph/frontend/onnx_import/ops_bridge.cpp @@ -191,7 +191,7 @@ namespace ngraph auto dm = m_map.find(domain); if (dm == std::end(m_map)) { - NGRAPH_WARN << "Domain '" << domain << "' not recognized by nGraph"; + NGRAPH_DEBUG << "Domain '" << domain << "' not recognized by nGraph"; return OperatorSet{}; } if (domain == "" && version > OperatorsBridge::LATEST_SUPPORTED_ONNX_OPSET_VERSION) diff --git a/ngraph/src/ngraph/lambda.cpp b/ngraph/src/ngraph/lambda.cpp index 49f66dc9b08..34f774da41e 100644 --- a/ngraph/src/ngraph/lambda.cpp +++ b/ngraph/src/ngraph/lambda.cpp @@ -15,7 +15,9 @@ //***************************************************************************** #include "ngraph/lambda.hpp" -#include "validation_util.hpp" +#include "ngraph/factory_adapter.hpp" +#include "ngraph/graph_util.hpp" +#include "ngraph/validation_util.hpp" using namespace std; using namespace ngraph; @@ -94,3 +96,274 @@ bool Lambda::evaluate(const HostTensorVector& output_tensors, const HostTensorVe evaluate_nodes(value_map, output_tensor_map, outputs); return true; } + +bool Lambda::visit_attributes(AttributeVisitor& visitor) +{ + visitor.on_attribute("parameters", m_parameters); + visitor.on_attribute("results", m_results); + return true; +} + +constexpr DiscreteTypeInfo AttributeAdapter>::type_info; + +AttributeAdapter>::AttributeAdapter(shared_ptr& ref) + : m_ref(ref) +{ +} + +class NodeAttributeAdapter : public FactoryAttributeAdapter +{ +public: + using FactoryAttributeAdapter::FactoryAttributeAdapter; + bool on_start(AttributeVisitor& visitor) override + { + // Indicate that there is a node following + m_id = visitor.get_registered_node_id(m_ref); + m_set_id = (m_ref == nullptr); + visitor.on_attribute("id", m_id); + return m_ref == nullptr || m_id != AttributeVisitor::invalid_node_id; + } + bool on_finish(AttributeVisitor&) override + { + if (m_set_id && m_ref) + { + m_ref->set_friendly_name(m_id); + } + return true; + } + void visit(AttributeVisitor& visitor, const std::string& id) + { + visitor.start_structure(id); + visitor.on_adapter(id, *this); + visitor.finish_structure(); + } + static constexpr DiscreteTypeInfo type_info{"Lambda.NodeAttributeAdapter", 0}; + const DiscreteTypeInfo& get_type_info() const override { return type_info; } + string m_id; + bool m_set_id; +}; + +constexpr DiscreteTypeInfo NodeAttributeAdapter::type_info; + +bool AttributeAdapter>::visit_attributes(AttributeVisitor& visitor) +{ + if (m_ref->get_results().size() > 0) + { + NodeVector serialized_nodes; + { + // Start with all nodes not already serialized + visitor.start_structure("nodes"); + NodeVector results; + for (auto result : m_ref->get_results()) + { + results.push_back(result); + } + + int64_t i = 0; + ostringstream index; + traverse_nodes( + results, [&i, &index, &visitor, &serialized_nodes](shared_ptr node) -> void { + if (AttributeVisitor::invalid_node_id == visitor.get_registered_node_id(node)) + { + // This node hasn't been seen before + visitor.register_node(node); + index.str(""); + index << i++; + string id = index.str(); + NodeAttributeAdapter adapter(node); + adapter.visit(visitor, id); + serialized_nodes.push_back(node); + } + }); + { + // Sentinel at end + index.str(""); + index << i++; + string id = index.str(); + shared_ptr null_node; + NodeAttributeAdapter adapter(null_node); + adapter.visit(visitor, id); + } + visitor.finish_structure(); + } + { + // Now do all the edges + visitor.start_structure("edges"); + int64_t i = 0; + ostringstream index; + for (auto node : serialized_nodes) + { + for (auto input : node->inputs()) + { + index.str(""); + index << i++; + string id = index.str(); + visitor.start_structure(id); + string input_node_id = visitor.get_registered_node_id(node); + uint64_t input_index = input.get_index(); + visitor.on_attribute("input_node", input_node_id); + visitor.on_attribute("input_index", input_index); + auto output = input.get_source_output(); + string output_node_id = + visitor.get_registered_node_id(output.get_node_shared_ptr()); + uint64_t output_index = output.get_index(); + visitor.on_attribute("output_node", output_node_id); + visitor.on_attribute("output_index", output_index); + visitor.finish_structure(); + } + } + { + // Add a sentinel + index.str(""); + index << i++; + string id = index.str(); + visitor.start_structure(id); + string input_node_id = AttributeVisitor::invalid_node_id; + visitor.on_attribute("input_node", input_node_id); + visitor.finish_structure(); + } + visitor.finish_structure(); + } + { + // Control dependencies + visitor.start_structure("control"); + int64_t i = 0; + ostringstream index; + for (auto node : serialized_nodes) + { + for (auto control : node->get_control_dependencies()) + { + index.str(""); + index << i++; + string id = index.str(); + visitor.start_structure(id); + string node_id = visitor.get_registered_node_id(node); + string dependency_id = visitor.get_registered_node_id(control); + visitor.on_attribute("node", node_id); + visitor.on_attribute("dependency", dependency_id); + visitor.finish_structure(); + } + } + { + // Add a sentinel + index.str(""); + index << i++; + string id = index.str(); + visitor.start_structure(id); + string node_id = AttributeVisitor::invalid_node_id; + visitor.on_attribute("node", node_id); + visitor.finish_structure(); + } + visitor.finish_structure(); + } + } + else + { + NodeVector deserialized_nodes; + { + // Read the graph + visitor.start_structure("nodes"); + int64_t i = 0; + ostringstream index; + while (true) + { + index.str(""); + index << i++; + string id = index.str(); + shared_ptr node; + NodeAttributeAdapter adapter(node); + adapter.visit(visitor, id); + if (node) + { + visitor.register_node(node); + deserialized_nodes.push_back(node); + } + else + { + break; + } + } + visitor.finish_structure(); + } + { + visitor.start_structure("edges"); + // Connect the nodes + int64_t i = 0; + ostringstream index; + bool more_edges = true; + while (more_edges) + { + index.str(""); + index << i++; + string id = index.str(); + visitor.start_structure(id); + string input_node_id; + visitor.on_attribute("input_node", input_node_id); + if (!input_node_id.empty()) + { + shared_ptr input_node = visitor.get_registered_node(input_node_id); + NGRAPH_CHECK(input_node, "input node of edge not known"); + uint64_t input_index; + string output_node_id; + uint64_t output_index; + visitor.on_attribute("input_index", input_index); + visitor.on_attribute("output_node", output_node_id); + visitor.on_attribute("output_index", output_index); + shared_ptr output_node = visitor.get_registered_node(output_node_id); + NGRAPH_CHECK(output_node, "output_node of edge not known"); + input_node->set_argument(input_index, output_node->output(output_index)); + } + else + { + more_edges = false; + } + visitor.finish_structure(); + } + visitor.finish_structure(); + } + { + // Control dependencies + visitor.start_structure("control"); + int64_t i = 0; + ostringstream index; + bool more_control = true; + while (more_control) + { + index.str(""); + index << i++; + string id = index.str(); + visitor.start_structure(id); + string node_id; + visitor.on_attribute("node", node_id); + if (!node_id.empty()) + { + shared_ptr node = visitor.get_registered_node(node_id); + NGRAPH_CHECK(node, "node of control edge not known"); + string dependency_id; + visitor.on_attribute("dependency", dependency_id); + shared_ptr dependency = visitor.get_registered_node(dependency_id); + NGRAPH_CHECK(dependency, "dependency of control edge not known"); + node->add_control_dependency(dependency); + } + else + { + more_control = false; + } + visitor.finish_structure(); + } + visitor.finish_structure(); + } + for (auto node : topological_sort(deserialized_nodes)) + { + node->validate_and_infer_types(); + } + } + + { + // Finally visit the object attributes + visitor.start_structure("value"); + m_ref->visit_attributes(visitor); + visitor.finish_structure(); + } + return true; +} diff --git a/ngraph/src/ngraph/lambda.hpp b/ngraph/src/ngraph/lambda.hpp index 10c37362bf3..317e273bec3 100644 --- a/ngraph/src/ngraph/lambda.hpp +++ b/ngraph/src/ngraph/lambda.hpp @@ -26,9 +26,11 @@ namespace ngraph class NGRAPH_API Lambda { public: + virtual ~Lambda() {} static constexpr DiscreteTypeInfo type_info{"Lamdba", 0}; const DiscreteTypeInfo& get_type_info() const { return type_info; } /// Return the function parameters + virtual bool visit_attributes(AttributeVisitor& visitor); const ParameterVector& get_parameters() const { return m_parameters; }; /// Index for parameter, or -1 int64_t get_parameter_index(const std::shared_ptr& parameter) const; @@ -43,10 +45,25 @@ namespace ngraph const HostTensorVector& input_tensors); protected: + Lambda() = default; Lambda(const ResultVector& results, const ParameterVector& parameters); Lambda(const OutputVector& results, const ParameterVector& parameters); ResultVector m_results; ParameterVector m_parameters; }; + + template <> + class NGRAPH_API AttributeAdapter> : public VisitorAdapter + { + public: + AttributeAdapter(std::shared_ptr& ref); + + bool visit_attributes(AttributeVisitor& visitor) override; + + static constexpr DiscreteTypeInfo type_info{"AttributeAdapter>", 0}; + const DiscreteTypeInfo& get_type_info() const override { return type_info; } + protected: + std::shared_ptr& m_ref; + }; } diff --git a/ngraph/src/ngraph/node.cpp b/ngraph/src/ngraph/node.cpp index 7f55a128d65..150618f4906 100644 --- a/ngraph/src/ngraph/node.cpp +++ b/ngraph/src/ngraph/node.cpp @@ -1144,3 +1144,56 @@ bool Node::constant_fold(OutputVector& output_values, const OutputVector& input_ } return false; } + +constexpr DiscreteTypeInfo AttributeAdapter>::type_info; + +AttributeAdapter>::AttributeAdapter(std::shared_ptr& value) + : m_ref(value) +{ +} + +bool AttributeAdapter>::visit_attributes(AttributeVisitor& visitor) +{ + auto original_id = visitor.get_registered_node_id(m_ref); + auto id = original_id; + visitor.on_attribute("ID", id); + if (id != original_id) + { + m_ref = visitor.get_registered_node(id); + } + return true; +} + +constexpr DiscreteTypeInfo AttributeAdapter::type_info; + +AttributeAdapter::AttributeAdapter(NodeVector& ref) + : m_ref(ref) +{ +} + +bool AttributeAdapter::visit_attributes(AttributeVisitor& visitor) +{ + int64_t size = m_ref.size(); + visitor.on_attribute("size", size); + if (size != m_ref.size()) + { + m_ref.resize(size); + } + ostringstream index; + for (int64_t i = 0; i < size; i++) + { + index.str(""); + index << i; + string id; + if (m_ref[i]) + { + id = visitor.get_registered_node_id(m_ref[i]); + } + visitor.on_attribute(index.str(), id); + if (!m_ref[i]) + { + m_ref[i] = visitor.get_registered_node(id); + } + } + return true; +} diff --git a/ngraph/src/ngraph/node.hpp b/ngraph/src/ngraph/node.hpp index 46031b45c10..e2bcea960c9 100644 --- a/ngraph/src/ngraph/node.hpp +++ b/ngraph/src/ngraph/node.hpp @@ -637,6 +637,34 @@ namespace ngraph bool operator>=(const RawNodeOutput& other) const { return !(*this < other); } }; + /// \brief Visits a reference to a node that has been registered with the visitor. + template <> + class NGRAPH_API AttributeAdapter> : public VisitorAdapter + { + public: + AttributeAdapter(std::shared_ptr& value); + + bool visit_attributes(AttributeVisitor& visitor) override; + static constexpr DiscreteTypeInfo type_info{"AttributeAdapter>", 0}; + const DiscreteTypeInfo& get_type_info() const override { return type_info; } + protected: + std::shared_ptr& m_ref; + }; + + template <> + class NGRAPH_API AttributeAdapter : public VisitorAdapter + { + public: + AttributeAdapter(NodeVector& ref); + + bool visit_attributes(AttributeVisitor& visitor) override; + + static constexpr DiscreteTypeInfo type_info{"AttributeAdapter", 0}; + const DiscreteTypeInfo& get_type_info() const override { return type_info; } + protected: + NodeVector& m_ref; + }; + using RawNodeOutputMap = std::map>; class NGRAPH_API NodeValidationFailure : public CheckFailure diff --git a/ngraph/src/ngraph/op/constant.cpp b/ngraph/src/ngraph/op/constant.cpp index 24addf48593..33dd1f9facc 100644 --- a/ngraph/src/ngraph/op/constant.cpp +++ b/ngraph/src/ngraph/op/constant.cpp @@ -338,26 +338,26 @@ string op::Constant::convert_value_to_string(size_t index) const #endif switch (get_element_type()) { - case element::Type_t::boolean: rc = to_string(get_vector()[index]); break; + case element::Type_t::boolean: rc = to_string(get_data_ptr()[index]); break; case element::Type_t::bf16: - rc = to_cpp_string(static_cast(get_vector()[index])); + rc = to_cpp_string(static_cast(get_data_ptr()[index])); break; case element::Type_t::f16: - rc = to_cpp_string(static_cast(get_vector()[index])); + rc = to_cpp_string(static_cast(get_data_ptr()[index])); break; - case element::Type_t::f32: rc = to_cpp_string(get_vector()[index]); break; - case element::Type_t::f64: rc = to_cpp_string(get_vector()[index]); break; - case element::Type_t::i8: rc = to_string(get_vector()[index]); break; - case element::Type_t::i16: rc = to_string(get_vector()[index]); break; - case element::Type_t::i32: rc = to_string(get_vector()[index]); break; - case element::Type_t::i64: rc = to_string(get_vector()[index]); break; + case element::Type_t::f32: rc = to_cpp_string(get_data_ptr()[index]); break; + case element::Type_t::f64: rc = to_cpp_string(get_data_ptr()[index]); break; + case element::Type_t::i8: rc = to_string(get_data_ptr()[index]); break; + case element::Type_t::i16: rc = to_string(get_data_ptr()[index]); break; + case element::Type_t::i32: rc = to_string(get_data_ptr()[index]); break; + case element::Type_t::i64: rc = to_string(get_data_ptr()[index]); break; case element::Type_t::u1: - rc = to_string((get_vector()[index / 8] >> (7 - (index % 8))) & 1); + rc = to_string((get_data_ptr()[index / 8] >> (7 - (index % 8))) & 1); break; - case element::Type_t::u8: rc = to_string(get_vector()[index]); break; - case element::Type_t::u16: rc = to_string(get_vector()[index]); break; - case element::Type_t::u32: rc = to_string(get_vector()[index]); break; - case element::Type_t::u64: rc = to_string(get_vector()[index]); break; + case element::Type_t::u8: rc = to_string(get_data_ptr()[index]); break; + case element::Type_t::u16: rc = to_string(get_data_ptr()[index]); break; + case element::Type_t::u32: rc = to_string(get_data_ptr()[index]); break; + case element::Type_t::u64: rc = to_string(get_data_ptr()[index]); break; case element::Type_t::undefined: throw runtime_error("unsupported type"); case element::Type_t::dynamic: throw runtime_error("unsupported type"); } @@ -623,7 +623,7 @@ bool op::v0::Constant::visit_attributes(AttributeVisitor& visitor) // Filling in a fresh constant allocate_buffer(); } - visitor.on_attribute("value", get_data_ptr_nc(), shape_size(m_shape) * m_element_type.size()); + visitor.on_attribute("value", m_data); return true; } diff --git a/ngraph/src/ngraph/op/constant.hpp b/ngraph/src/ngraph/op/constant.hpp index 23cb06eb42f..cb7c6939931 100644 --- a/ngraph/src/ngraph/op/constant.hpp +++ b/ngraph/src/ngraph/op/constant.hpp @@ -323,18 +323,8 @@ namespace ngraph template std::vector get_vector() const { - if (sizeof(T) > m_element_type.size() && shape_size(m_shape) > 0) - { - throw ngraph_error("Buffer over-read"); - } - - std::vector rc; - const T* p = static_cast(get_data_ptr()); - for (size_t i = 0; i < shape_size(m_shape); i++) - { - rc.push_back(p[i]); - } - return rc; + const T* p = get_data_ptr(); + return std::vector(p, p + shape_size(m_shape)); } /// \brief Return the Constant's value as a vector cast to type T @@ -435,6 +425,11 @@ namespace ngraph template const T* get_data_ptr() const { + if (sizeof(T) > m_element_type.size() && shape_size(m_shape) > 0) + { + throw ngraph_error("Buffer over-read"); + } + return static_cast(get_data_ptr()); } diff --git a/ngraph/src/ngraph/op/fused/shuffle_channels.cpp b/ngraph/src/ngraph/op/fused/shuffle_channels.cpp index cceab1c9bd5..34d345f5a40 100644 --- a/ngraph/src/ngraph/op/fused/shuffle_channels.cpp +++ b/ngraph/src/ngraph/op/fused/shuffle_channels.cpp @@ -23,10 +23,12 @@ using namespace ngraph; constexpr NodeTypeInfo op::ShuffleChannels::type_info; -op::ShuffleChannels::ShuffleChannels(const Output& data, const int axis, const size_t groups) +op::ShuffleChannels::ShuffleChannels(const Output& data, + const int64_t axis, + const int64_t group) : FusedOp({data}) , m_axis(axis) - , m_groups{groups} + , m_group{group} { constructor_validate_and_infer_types(); } @@ -34,7 +36,7 @@ op::ShuffleChannels::ShuffleChannels(const Output& data, const int axis, c bool ngraph::op::v0::ShuffleChannels::visit_attributes(AttributeVisitor& visitor) { visitor.on_attribute("axis", m_axis); - visitor.on_attribute("groups", m_groups); + visitor.on_attribute("group", m_group); return true; } @@ -72,10 +74,13 @@ void op::ShuffleChannels::pre_validate_and_infer_types() "The 'axis' parameter for ShuffleChannels has to point to one of the " "input tensor's shape dimensions."); + NODE_VALIDATION_CHECK( + this, m_group >= 1, "The 'group' parameter must be greater or equal to 1."); + const auto channel_dim_size = shape.at(axis_zb); NODE_VALIDATION_CHECK( this, - channel_dim_size % m_groups == 0, + channel_dim_size % m_group == 0, "The channel dimension size has to be a multiple of the groups parameter value."); } } @@ -99,7 +104,7 @@ shared_ptr op::ShuffleChannels::clone_with_new_inputs(const OutputVector& std::to_string(new_args.size())); } - return make_shared(new_args.at(0), m_axis, m_groups); + return make_shared(new_args.at(0), m_axis, m_group); } Shape op::ShuffleChannels::get_pre_shuffle_shape(const Shape& data_shape) const @@ -108,8 +113,8 @@ Shape op::ShuffleChannels::get_pre_shuffle_shape(const Shape& data_shape) const // in general the resulting shape should contain the following values: // [0]: ds[0] * ds[1] * ... * ds[m_axis-1] (or 1 if m_axis == 0) - // [1]: m_groups - // [2]: ds[axis] / m_groups + // [1]: m_group + // [2]: ds[axis] / m_group // [3]: ds[axis+1] * ds[axis+2] * ... * ds[ds.size()-1] (or 1 if m_axis points to the last elem // of ds) Shape res(4, 1); @@ -120,8 +125,8 @@ Shape op::ShuffleChannels::get_pre_shuffle_shape(const Shape& data_shape) const res[0] *= ds[i]; } - res[1] = m_groups; - res[2] = ds[axis_zb] / m_groups; + res[1] = m_group; + res[2] = ds[axis_zb] / m_group; for (size_t i = axis_zb + 1; i < ds.size(); ++i) { diff --git a/ngraph/src/ngraph/op/fused/shuffle_channels.hpp b/ngraph/src/ngraph/op/fused/shuffle_channels.hpp index 47c281de310..33995993810 100644 --- a/ngraph/src/ngraph/op/fused/shuffle_channels.hpp +++ b/ngraph/src/ngraph/op/fused/shuffle_channels.hpp @@ -41,12 +41,12 @@ namespace ngraph /// that the index should be calculated from the back of the input /// data /// shape. - /// \param groups - number of groups the channel dimension specified by axis should + /// \param group - number of group the channel dimension specified by axis should /// be /// split into ShuffleChannels(const Output& data, - const int axis = 1, - const size_t groups = 1UL); + const int64_t axis = 1, + const int64_t group = 1); bool visit_attributes(AttributeVisitor& visitor) override; size_t get_zero_based_axis() const; @@ -58,8 +58,8 @@ namespace ngraph virtual std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - int get_axis() const { return m_axis; } - size_t get_groups() const { return m_groups; } + int64_t get_axis() const { return m_axis; } + int64_t get_group() const { return m_group; } private: /// \brief Generates a shape required to permute the data /// @@ -67,8 +67,8 @@ namespace ngraph /// \return A 4D tensor to be used to reshape the input data before shuffling it Shape get_pre_shuffle_shape(const Shape& data_shape) const; - int m_axis; - size_t m_groups; + int64_t m_axis; + int64_t m_group; }; } using v0::ShuffleChannels; diff --git a/ngraph/src/ngraph/op/parameter.cpp b/ngraph/src/ngraph/op/parameter.cpp index fba520da27d..1594e873d58 100644 --- a/ngraph/src/ngraph/op/parameter.cpp +++ b/ngraph/src/ngraph/op/parameter.cpp @@ -70,3 +70,37 @@ void op::Parameter::set_is_relevant_to_shapes(bool is_relevant) { m_is_relevant_to_shapes = is_relevant; } + +constexpr DiscreteTypeInfo AttributeAdapter::type_info; + +AttributeAdapter::AttributeAdapter(ParameterVector& ref) + : m_ref(ref) +{ +} + +bool AttributeAdapter::visit_attributes(AttributeVisitor& visitor) +{ + int64_t size = m_ref.size(); + visitor.on_attribute("size", size); + if (size != m_ref.size()) + { + m_ref.resize(size); + } + ostringstream index; + for (int64_t i = 0; i < size; i++) + { + index.str(""); + index << i; + string id; + if (m_ref[i]) + { + id = visitor.get_registered_node_id(m_ref[i]); + } + visitor.on_attribute(index.str(), id); + if (!m_ref[i]) + { + m_ref[i] = as_type_ptr(visitor.get_registered_node(id)); + } + } + return true; +} diff --git a/ngraph/src/ngraph/op/parameter.hpp b/ngraph/src/ngraph/op/parameter.hpp index 5c942499b6c..dbfd627f080 100644 --- a/ngraph/src/ngraph/op/parameter.hpp +++ b/ngraph/src/ngraph/op/parameter.hpp @@ -84,4 +84,18 @@ namespace ngraph using v0::Parameter; } using ParameterVector = std::vector>; + + template <> + class NGRAPH_API AttributeAdapter : public VisitorAdapter + { + public: + AttributeAdapter(ParameterVector& ref); + + bool visit_attributes(AttributeVisitor& visitor) override; + + static constexpr DiscreteTypeInfo type_info{"AttributeAdapter", 0}; + const DiscreteTypeInfo& get_type_info() const override { return type_info; } + protected: + ParameterVector& m_ref; + }; } diff --git a/ngraph/src/ngraph/op/reshape.cpp b/ngraph/src/ngraph/op/reshape.cpp index e089f85ff9d..5c330cdcb6b 100644 --- a/ngraph/src/ngraph/op/reshape.cpp +++ b/ngraph/src/ngraph/op/reshape.cpp @@ -183,6 +183,13 @@ shared_ptr op::Reshape::clone_with_new_inputs(const OutputVector& new_args return make_shared(new_args.at(0), m_input_order, m_output_shape); } +bool op::Reshape::visit_attributes(AttributeVisitor& visitor) +{ + visitor.on_attribute("input_order", m_input_order); + visitor.on_attribute("output_shape", m_output_shape); + return true; +} + void op::Reshape::generate_adjoints(autodiff::Adjoints& adjoints, const OutputVector& deltas) { auto delta = deltas.at(0); @@ -244,6 +251,7 @@ void op::v1::Reshape::validate_and_infer_types() this, pattern_et.is_integral_number(), "Pattern must be an integral number."); // check shapes + const PartialShape& input_pshape = get_input_partial_shape(0); const PartialShape& pattern_shape = get_input_partial_shape(1); NODE_VALIDATION_CHECK(this, pattern_shape.rank().compatible(1), @@ -290,22 +298,33 @@ void op::v1::Reshape::validate_and_infer_types() else { std::vector partial_shape(output_rank.get_length()); - // Replace zeros and negatives with Dynamic dimensions as needed - std::transform(out_shape_val.begin(), - out_shape_val.end(), - partial_shape.begin(), - [&](const int64_t& v) { - return (v < 0) ? Dimension() - : ((v == 0 && m_special_zero) ? Dimension() - : Dimension(v)); - }); + // Replace zeros with Dynamic dimensions as needed + for (size_t i = 0; i < out_shape_val.size(); ++i) + { + const auto& v = out_shape_val[i]; + if (v < 0) + { + partial_shape[i] = Dimension(); + } + else if (v == 0 && m_special_zero) + { + partial_shape[i] = ((input_pshape.rank().is_static() && + input_pshape.rank().get_length() == out_shape_val.size()) + ? input_pshape[i] + : Dimension()); + } + else + { + partial_shape[i] = Dimension(v); + } + } - if (get_input_partial_shape(0).is_static()) + if (input_pshape.is_static()) { size_t output_elements = 1; int negative_dim = -1; - auto input_shape = get_input_partial_shape(0).to_shape(); + auto input_shape = input_pshape.to_shape(); size_t input_elements = shape_size(input_shape); for (size_t i = 0; i < output_rank.get_length(); i++) { diff --git a/ngraph/src/ngraph/op/reshape.hpp b/ngraph/src/ngraph/op/reshape.hpp index fdd827cca29..ef6d4350f3a 100644 --- a/ngraph/src/ngraph/op/reshape.hpp +++ b/ngraph/src/ngraph/op/reshape.hpp @@ -91,6 +91,7 @@ namespace ngraph virtual std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; + bool visit_attributes(AttributeVisitor& visitor) override; /// \return The order in which to iterate over input axes. const AxisVector& get_input_order() const { return m_input_order; } diff --git a/ngraph/src/ngraph/op/result.cpp b/ngraph/src/ngraph/op/result.cpp index 7d4bbe61de4..5f4d492f06a 100644 --- a/ngraph/src/ngraph/op/result.cpp +++ b/ngraph/src/ngraph/op/result.cpp @@ -75,3 +75,37 @@ bool op::Result::constant_fold(OutputVector& output_values, const OutputVector& { return false; } + +constexpr DiscreteTypeInfo AttributeAdapter::type_info; + +AttributeAdapter::AttributeAdapter(ResultVector& ref) + : m_ref(ref) +{ +} + +bool AttributeAdapter::visit_attributes(AttributeVisitor& visitor) +{ + int64_t size = m_ref.size(); + visitor.on_attribute("size", size); + if (size != m_ref.size()) + { + m_ref.resize(size); + } + ostringstream index; + for (int64_t i = 0; i < size; i++) + { + index.str(""); + index << i; + string id; + if (m_ref[i]) + { + id = visitor.get_registered_node_id(m_ref[i]); + } + visitor.on_attribute(index.str(), id); + if (!m_ref[i]) + { + m_ref[i] = as_type_ptr(visitor.get_registered_node(id)); + } + } + return true; +} diff --git a/ngraph/src/ngraph/op/result.hpp b/ngraph/src/ngraph/op/result.hpp index c8ec098cb0e..63954183bf9 100644 --- a/ngraph/src/ngraph/op/result.hpp +++ b/ngraph/src/ngraph/op/result.hpp @@ -64,4 +64,18 @@ namespace ngraph using v0::Result; } using ResultVector = std::vector>; + + template <> + class NGRAPH_API AttributeAdapter : public VisitorAdapter + { + public: + AttributeAdapter(ResultVector& ref); + + bool visit_attributes(AttributeVisitor& visitor) override; + + static constexpr DiscreteTypeInfo type_info{"AttributeAdapter", 0}; + const DiscreteTypeInfo& get_type_info() const override { return type_info; } + protected: + ResultVector& m_ref; + }; } diff --git a/ngraph/src/ngraph/op/split.cpp b/ngraph/src/ngraph/op/split.cpp index f894b0aff21..4aedd3d3241 100644 --- a/ngraph/src/ngraph/op/split.cpp +++ b/ngraph/src/ngraph/op/split.cpp @@ -53,7 +53,7 @@ void op::v0::Split::pre_validate_and_infer_types() const auto axis_node = input_value(1).get_node_shared_ptr(); NODE_VALIDATION_CHECK(this, axis_node->is_constant(), "The 'axis' input node must be constant"); const auto axis_node_const = as_type_ptr(axis_node); - m_axis = axis_node_const->cast_vector()[0]; + m_axis = axis_node_const->get_data_ptr()[0]; // Create dynamic-typed outputs. Actual shape/type will be computed during shape inference for (size_t i = 0; i < std::max(m_splits.size(), m_num_split); i++) diff --git a/ngraph/src/ngraph/op/tensor_iterator.cpp b/ngraph/src/ngraph/op/tensor_iterator.cpp index 0ed8269eec4..87325b62ec8 100644 --- a/ngraph/src/ngraph/op/tensor_iterator.cpp +++ b/ngraph/src/ngraph/op/tensor_iterator.cpp @@ -15,6 +15,7 @@ //***************************************************************************** #include "ngraph/op/tensor_iterator.hpp" +#include "ngraph/factory.hpp" #include "ngraph/graph_util.hpp" #include "ngraph/pass/get_output_element_elimination.hpp" #include "ngraph/specialize_function.hpp" @@ -22,93 +23,135 @@ using namespace std; using namespace ngraph; -constexpr NodeTypeInfo op::TensorIterator::type_info; +constexpr NodeTypeInfo op::v0::TensorIterator::type_info; -constexpr DiscreteTypeInfo op::TensorIterator::SliceInputDescription::type_info; -constexpr DiscreteTypeInfo op::TensorIterator::MergedInputDescription::type_info; -constexpr DiscreteTypeInfo op::TensorIterator::InvariantInputDescription::type_info; +constexpr DiscreteTypeInfo op::v0::TensorIterator::SliceInputDescription::type_info; +constexpr DiscreteTypeInfo op::v0::TensorIterator::MergedInputDescription::type_info; +constexpr DiscreteTypeInfo op::v0::TensorIterator::InvariantInputDescription::type_info; -constexpr DiscreteTypeInfo op::TensorIterator::BodyOutputDescription::type_info; -constexpr DiscreteTypeInfo op::TensorIterator::ConcatOutputDescription::type_info; +constexpr DiscreteTypeInfo op::v0::TensorIterator::BodyOutputDescription::type_info; +constexpr DiscreteTypeInfo op::v0::TensorIterator::ConcatOutputDescription::type_info; -constexpr DiscreteTypeInfo op::TensorIterator::BodyLambda::type_info; +constexpr DiscreteTypeInfo op::v0::TensorIterator::BodyLambda::type_info; -op::TensorIterator::TensorIterator(const OutputVector& values) +bool op::v0::TensorIterator::BodyLambda::visit_attributes(AttributeVisitor& visitor) +{ + return true; +} + +op::v0::TensorIterator::TensorIterator(const OutputVector& values) : op::util::FusedOp(values) { } -op::TensorIterator::InputDescription::InputDescription(uint64_t input_index, - uint64_t body_parameter_index) +op::v0::TensorIterator::InputDescription::InputDescription(uint64_t input_index, + uint64_t body_parameter_index) : m_input_index(input_index) , m_body_parameter_index(body_parameter_index) { } -op::TensorIterator::SliceInputDescription::SliceInputDescription(uint64_t input_index, - uint64_t body_parameter_index, - int64_t start, - int64_t stride, - int64_t part_size, - int64_t end, - int64_t axis) - : InputDescription(input_index, body_parameter_index) - , m_start(start) - , m_stride(stride) - , m_part_size(part_size) - , m_end(end) - , m_axis(axis) +bool op::v0::TensorIterator::InputDescription::visit_attributes(AttributeVisitor& visitor) { + visitor.on_attribute("input_index", m_input_index); + visitor.on_attribute("body_parameter_index", m_body_parameter_index); + return true; } -shared_ptr - op::TensorIterator::SliceInputDescription::copy() const -{ - return make_shared( - m_input_index, m_body_parameter_index, m_start, m_stride, m_part_size, m_end, m_axis); -} - -op::TensorIterator::MergedInputDescription::MergedInputDescription(uint64_t input_index, - uint64_t body_parameter_index, - uint64_t body_value_index) - : InputDescription(input_index, body_parameter_index) - , m_body_value_index(body_value_index) -{ -} - -shared_ptr - op::TensorIterator::MergedInputDescription::copy() const -{ - return make_shared( - m_input_index, m_body_parameter_index, m_body_value_index); -} - -op::TensorIterator::InvariantInputDescription::InvariantInputDescription( - uint64_t input_index, uint64_t body_parameter_index) - : InputDescription(input_index, body_parameter_index) -{ -} - -shared_ptr - op::TensorIterator::InvariantInputDescription::copy() const -{ - return make_shared(m_input_index, m_body_parameter_index); -} - -op::TensorIterator::OutputDescription::OutputDescription(uint64_t body_value_index, - uint64_t output_index) - : m_body_value_index(body_value_index) - , m_output_index(output_index) -{ -} - -op::TensorIterator::ConcatOutputDescription::ConcatOutputDescription(uint64_t body_value_index, - uint64_t output_index, +op::v0::TensorIterator::SliceInputDescription::SliceInputDescription(uint64_t input_index, + uint64_t body_parameter_index, int64_t start, int64_t stride, int64_t part_size, int64_t end, int64_t axis) + : InputDescription(input_index, body_parameter_index) + , m_start(start) + , m_stride(stride) + , m_part_size(part_size) + , m_end(end) + , m_axis(axis) +{ +} + +shared_ptr + op::v0::TensorIterator::SliceInputDescription::copy() const +{ + return make_shared( + m_input_index, m_body_parameter_index, m_start, m_stride, m_part_size, m_end, m_axis); +} + +bool op::v0::TensorIterator::SliceInputDescription::visit_attributes(AttributeVisitor& visitor) +{ + InputDescription::visit_attributes(visitor); + visitor.on_attribute("start", m_start); + visitor.on_attribute("stride", m_stride); + visitor.on_attribute("part_size", m_part_size); + visitor.on_attribute("end", m_end); + visitor.on_attribute("axis", m_axis); + return true; +} + +op::v0::TensorIterator::MergedInputDescription::MergedInputDescription( + uint64_t input_index, uint64_t body_parameter_index, uint64_t body_value_index) + : InputDescription(input_index, body_parameter_index) + , m_body_value_index(body_value_index) +{ +} + +shared_ptr + op::v0::TensorIterator::MergedInputDescription::copy() const +{ + return make_shared( + m_input_index, m_body_parameter_index, m_body_value_index); +} + +bool op::v0::TensorIterator::MergedInputDescription::visit_attributes(AttributeVisitor& visitor) +{ + InputDescription::visit_attributes(visitor); + visitor.on_attribute("body_value_index", m_body_value_index); + return true; +} + +op::v0::TensorIterator::InvariantInputDescription::InvariantInputDescription( + uint64_t input_index, uint64_t body_parameter_index) + : InputDescription(input_index, body_parameter_index) +{ +} + +shared_ptr + op::v0::TensorIterator::InvariantInputDescription::copy() const +{ + return make_shared(m_input_index, m_body_parameter_index); +} + +bool op::v0::TensorIterator::InvariantInputDescription::visit_attributes(AttributeVisitor& visitor) +{ + InputDescription::visit_attributes(visitor); + return true; +} + +op::v0::TensorIterator::OutputDescription::OutputDescription(uint64_t body_value_index, + uint64_t output_index) + : m_body_value_index(body_value_index) + , m_output_index(output_index) +{ +} + +bool op::v0::TensorIterator::OutputDescription::visit_attributes(AttributeVisitor& visitor) +{ + visitor.on_attribute("body_value_index", m_body_value_index); + visitor.on_attribute("output_index", m_output_index); + return true; +} + +op::v0::TensorIterator::ConcatOutputDescription::ConcatOutputDescription(uint64_t body_value_index, + uint64_t output_index, + int64_t start, + int64_t stride, + int64_t part_size, + int64_t end, + int64_t axis) : OutputDescription(body_value_index, output_index) , m_start(start) , m_stride(stride) @@ -118,41 +161,180 @@ op::TensorIterator::ConcatOutputDescription::ConcatOutputDescription(uint64_t bo { } -shared_ptr - op::TensorIterator::ConcatOutputDescription::copy() const +bool op::v0::TensorIterator::ConcatOutputDescription::visit_attributes(AttributeVisitor& visitor) +{ + OutputDescription::visit_attributes(visitor); + visitor.on_attribute("start", m_start); + visitor.on_attribute("stride", m_stride); + visitor.on_attribute("part_size", m_part_size); + visitor.on_attribute("end", m_end); + visitor.on_attribute("axis", m_axis); + return true; +} + +shared_ptr + op::v0::TensorIterator::ConcatOutputDescription::copy() const { return make_shared( m_body_value_index, m_output_index, m_start, m_stride, m_part_size, m_end, m_axis); } -op::TensorIterator::BodyOutputDescription::BodyOutputDescription(uint64_t body_value_index, - uint64_t output_index, - int64_t iteration) +op::v0::TensorIterator::BodyOutputDescription::BodyOutputDescription(uint64_t body_value_index, + uint64_t output_index, + int64_t iteration) : OutputDescription(body_value_index, output_index) , m_iteration(iteration) { } -shared_ptr - op::TensorIterator::BodyOutputDescription::copy() const +shared_ptr + op::v0::TensorIterator::BodyOutputDescription::copy() const { return make_shared(m_body_value_index, m_output_index, m_iteration); } -Input op::TensorIterator::input_for_value(const Output& value) +bool op::v0::TensorIterator::BodyOutputDescription::visit_attributes(AttributeVisitor& visitor) +{ + OutputDescription::visit_attributes(visitor); + visitor.on_attribute("iteration", m_iteration); + return true; +} + +namespace +{ +} + +namespace ngraph +{ + template <> + FactoryRegistry& + FactoryRegistry::get() + { + static FactoryRegistry registry; + static mutex init_guard; + if (registry.m_factory_map.size() == 0) + { + lock_guard guard(init_guard); + if (registry.m_factory_map.size() == 0) + { + registry.register_factory(); + registry.register_factory(); + registry.register_factory(); + } + } + return registry; + } + + constexpr DiscreteTypeInfo + AttributeAdapter>::type_info; + + constexpr DiscreteTypeInfo AttributeAdapter< + std::vector>>::type_info; + + AttributeAdapter>>:: + AttributeAdapter(std::vector>& ref) + : m_ref(ref) + { + } + + bool AttributeAdapter>>:: + visit_attributes(AttributeVisitor& visitor) + { + int64_t size = m_ref.size(); + visitor.on_attribute("size", size); + if (size != m_ref.size()) + { + m_ref.resize(size); + } + ostringstream index; + for (int64_t i = 0; i < size; i++) + { + index.str(""); + index << i; + visitor.on_attribute(index.str(), m_ref[i]); + } + return true; + } + + template <> + FactoryRegistry& + FactoryRegistry::get() + { + static FactoryRegistry registry; + static mutex init_guard; + // TODO: Add a lock + if (registry.m_factory_map.size() == 0) + { + lock_guard guard(init_guard); + if (registry.m_factory_map.size() == 0) + { + registry.register_factory(); + registry.register_factory(); + } + } + return registry; + } + + constexpr DiscreteTypeInfo AttributeAdapter< + std::vector>>::type_info; + + constexpr DiscreteTypeInfo + AttributeAdapter>::type_info; + + AttributeAdapter>>:: + AttributeAdapter(std::vector>& ref) + : m_ref(ref) + { + } + + bool AttributeAdapter>>:: + visit_attributes(AttributeVisitor& visitor) + { + int64_t size = m_ref.size(); + visitor.on_attribute("size", size); + if (size != m_ref.size()) + { + m_ref.resize(size); + } + ostringstream index; + for (int64_t i = 0; i < size; i++) + { + index.str(""); + index << i; + visitor.on_attribute(index.str(), m_ref[i]); + } + return true; + } +} + +bool op::v0::TensorIterator::visit_attributes(AttributeVisitor& visitor) +{ + if (!m_body) + { + m_body = make_shared(); + } + shared_ptr lambda = m_body; + visitor.on_attribute("body", lambda); + visitor.on_attribute("input_descriptions", m_input_descriptions); + visitor.on_attribute("output_descriptions", m_output_descriptions); + + return false; +} + +Input op::v0::TensorIterator::input_for_value(const Output& value) { auto input_index = get_input_size(); set_argument(input_index, value); return Input(this, input_index); } -void op::TensorIterator::set_sliced_input(const std::shared_ptr& body_parameter, - const Output& value, - int64_t start, - int64_t stride, - int64_t part_size, - int64_t end, - int64_t axis) +void op::v0::TensorIterator::set_sliced_input(const std::shared_ptr& body_parameter, + const Output& value, + int64_t start, + int64_t stride, + int64_t part_size, + int64_t end, + int64_t axis) { m_input_descriptions.push_back( make_shared(input_for_value(value).get_index(), @@ -164,9 +346,9 @@ void op::TensorIterator::set_sliced_input(const std::shared_ptr& axis)); } -void op::TensorIterator::set_merged_input(const std::shared_ptr& body_parameter, - const Output& initial_value, - const Output& successive_value) +void op::v0::TensorIterator::set_merged_input(const std::shared_ptr& body_parameter, + const Output& initial_value, + const Output& successive_value) { m_input_descriptions.push_back( make_shared(input_for_value(initial_value).get_index(), @@ -174,14 +356,15 @@ void op::TensorIterator::set_merged_input(const std::shared_ptr& body m_body->get_result_index(successive_value))); } -void op::TensorIterator::set_invariant_input(const std::shared_ptr& body_parameter, - const Output& value) +void op::v0::TensorIterator::set_invariant_input(const std::shared_ptr& body_parameter, + const Output& value) { m_input_descriptions.push_back(make_shared( input_for_value(value).get_index(), m_body->get_parameter_index(body_parameter))); } -Output op::TensorIterator::get_iter_value(const Output& body_value, int64_t iteration) +Output op::v0::TensorIterator::get_iter_value(const Output& body_value, + int64_t iteration) { auto output_index = get_output_size(); m_output_descriptions.push_back(make_shared( @@ -190,12 +373,12 @@ Output op::TensorIterator::get_iter_value(const Output& body_value, return Output(shared_from_this(), output_index); } -Output op::TensorIterator::get_concatenated_slices(const Output& body_value, - int64_t start, - int64_t stride, - int64_t part_size, - int64_t end, - int64_t axis) +Output op::v0::TensorIterator::get_concatenated_slices(const Output& body_value, + int64_t start, + int64_t stride, + int64_t part_size, + int64_t end, + int64_t axis) { auto output_index = get_output_size(); m_output_descriptions.push_back(make_shared( @@ -204,13 +387,13 @@ Output op::TensorIterator::get_concatenated_slices(const Output& bod return Output(shared_from_this(), output_index); } -NodeVector op::TensorIterator::decompose_op() const +NodeVector op::v0::TensorIterator::decompose_op() const { // Stub return NodeVector{}; } -void op::TensorIterator::revalidate_and_infer_types_for_body_ops() +void op::v0::TensorIterator::revalidate_and_infer_types_for_body_ops() { std::stack, std::vector>> nodes_to_do; std::unordered_set> nodes_done; @@ -224,7 +407,7 @@ void op::TensorIterator::revalidate_and_infer_types_for_body_ops() auto node = nodes_to_do.top(); if (nodes_done.count(node) == 0) { - NGRAPH_CHECK(as_type_ptr(node) == nullptr, + NGRAPH_CHECK(as_type_ptr(node) == nullptr, "No nested TensorIterator"); bool can_add = true; size_t arg_count = node->get_input_size(); @@ -254,7 +437,7 @@ void op::TensorIterator::revalidate_and_infer_types_for_body_ops() } } -void op::TensorIterator::validate_and_infer_types() +void op::v0::TensorIterator::validate_and_infer_types() { NODE_VALIDATION_CHECK(this, get_input_size() == m_input_descriptions.size(), @@ -429,9 +612,10 @@ void op::TensorIterator::validate_and_infer_types() } } -std::shared_ptr op::TensorIterator::clone_with_new_inputs(const OutputVector& new_args) const +std::shared_ptr + op::v0::TensorIterator::clone_with_new_inputs(const OutputVector& new_args) const { - auto op = make_shared(new_args); + auto op = make_shared(new_args); op->set_output_size(m_output_descriptions.size()); std::vector<::ngraph::element::Type> types(m_body->get_parameters().size()); @@ -451,7 +635,8 @@ std::shared_ptr op::TensorIterator::clone_with_new_inputs(const OutputVect if (new_shapes[input_description->m_body_parameter_index].is_static()) { if (auto slice_in = ::ngraph::as_type_ptr< - ngraph::op::TensorIterator::SliceInputDescription>(input_description)) + ngraph::op::v0::TensorIterator::SliceInputDescription>( + input_description)) { new_shapes[slice_in->m_body_parameter_index][slice_in->m_axis] = slice_in->m_part_size; @@ -485,3 +670,7 @@ std::shared_ptr op::TensorIterator::clone_with_new_inputs(const OutputVect } return move(op); } + +namespace ngraph +{ +} diff --git a/ngraph/src/ngraph/op/tensor_iterator.hpp b/ngraph/src/ngraph/op/tensor_iterator.hpp index b68410a36f8..5d418c22512 100644 --- a/ngraph/src/ngraph/op/tensor_iterator.hpp +++ b/ngraph/src/ngraph/op/tensor_iterator.hpp @@ -18,6 +18,7 @@ #include +#include "ngraph/factory_adapter.hpp" #include "ngraph/lambda.hpp" #include "ngraph/op/parameter.hpp" #include "ngraph/op/util/fused_op.hpp" @@ -34,6 +35,7 @@ namespace ngraph public: static constexpr NodeTypeInfo type_info{"TensorIterator", 0}; const NodeTypeInfo& get_type_info() const override { return type_info; } + bool visit_attributes(AttributeVisitor& visitor) override; // Forward declarations class SliceInputDescription; class MergedInputDescription; @@ -45,8 +47,9 @@ namespace ngraph class NGRAPH_API BodyLambda : public Lambda { public: - static constexpr DiscreteTypeInfo type_info{"BodyLamdba", 0}; - const DiscreteTypeInfo& get_type_info() const { return type_info; } + using type_info_t = DiscreteTypeInfo; + static constexpr type_info_t type_info{"BodyLamdba", 0}; + const type_info_t& get_type_info() const { return type_info; } BodyLambda(const OutputVector& outputs, const ParameterVector& parameters) : Lambda(outputs, parameters) { @@ -55,6 +58,8 @@ namespace ngraph : Lambda(results, parameters) { } + BodyLambda() = default; + virtual bool visit_attributes(AttributeVisitor& visitor); }; /// \brief Describes a connection between a TensorIterator input and the body. @@ -64,15 +69,18 @@ namespace ngraph /// \param input_index Position of the TensorIterator input /// \param body_parameter Body parameter to receive input InputDescription(uint64_t input_index, uint64_t body_parameter_index); + InputDescription() = default; public: + using type_info_t = DiscreteTypeInfo; virtual ~InputDescription() {} virtual std::shared_ptr copy() const = 0; - virtual const DiscreteTypeInfo& get_type_info() const = 0; + virtual const type_info_t& get_type_info() const = 0; + virtual bool visit_attributes(AttributeVisitor& visitor); - uint64_t m_input_index; - uint64_t m_body_parameter_index; + uint64_t m_input_index{0}; + uint64_t m_body_parameter_index{0}; }; /// \brief Describes a body input formed from slices of an input to @@ -80,8 +88,8 @@ namespace ngraph class NGRAPH_API SliceInputDescription : public InputDescription { public: - static constexpr DiscreteTypeInfo type_info{"SliceInputDescription", 0}; - const DiscreteTypeInfo& get_type_info() const override { return type_info; } + static constexpr type_info_t type_info{"SliceInputDescription", 0}; + const type_info_t& get_type_info() const override { return type_info; } /// \param input_index Position of the TensorIterator input /// \param body_parameter_index Body parameter position to receive input /// \param start First index for slices @@ -96,13 +104,14 @@ namespace ngraph int64_t part_size, int64_t end, int64_t axis); + SliceInputDescription() = default; std::shared_ptr copy() const override; - - int64_t m_start; - int64_t m_stride; - int64_t m_part_size; - int64_t m_end; - int64_t m_axis; + bool visit_attributes(AttributeVisitor& visitor) override; + int64_t m_start{0}; + int64_t m_stride{0}; + int64_t m_part_size{0}; + int64_t m_end{0}; + int64_t m_axis{0}; }; /// \brief Describes a body input initialized from a TensorIterator input on the @@ -111,8 +120,8 @@ namespace ngraph class NGRAPH_API MergedInputDescription : public InputDescription { public: - static constexpr DiscreteTypeInfo type_info{"MergedInputDescription", 0}; - const DiscreteTypeInfo& get_type_info() const override { return type_info; } + static constexpr type_info_t type_info{"MergedInputDescription", 0}; + const type_info_t& get_type_info() const override { return type_info; } /// \param input_index Position of the TensorIterator input supplying a /// value to /// body_parameter @@ -124,18 +133,21 @@ namespace ngraph MergedInputDescription(uint64_t input_index, uint64_t body_parameter_index, uint64_t body_value_index); + MergedInputDescription() = default; std::shared_ptr copy() const override; - - uint64_t m_body_value_index; + bool visit_attributes(AttributeVisitor& visitor) override; + uint64_t m_body_value_index{0}; }; class NGRAPH_API InvariantInputDescription : public InputDescription { public: - static constexpr DiscreteTypeInfo type_info{"InvariantInputDescription", 0}; - const DiscreteTypeInfo& get_type_info() const override { return type_info; } + static constexpr type_info_t type_info{"InvariantInputDescription", 0}; + const type_info_t& get_type_info() const override { return type_info; } InvariantInputDescription(uint64_t input_index, uint64_t body_parameter_index); + InvariantInputDescription() = default; std::shared_ptr copy() const override; + bool visit_attributes(AttributeVisitor& visitor) override; }; // Forward declarations @@ -149,22 +161,25 @@ namespace ngraph /// \param body_value_index A body value that produces the output /// \param output_index The TensorIterator output index OutputDescription(uint64_t body_value_index, uint64_t output_index); + OutputDescription() = default; public: + using type_info_t = DiscreteTypeInfo; virtual ~OutputDescription() {} virtual std::shared_ptr copy() const = 0; - virtual const DiscreteTypeInfo& get_type_info() const = 0; + virtual bool visit_attributes(AttributeVisitor& visitor); + virtual const type_info_t& get_type_info() const = 0; - uint64_t m_body_value_index; - uint64_t m_output_index; + uint64_t m_body_value_index{0}; + uint64_t m_output_index{0}; }; /// \brief Produces an output by concatenating an output from each iteration class NGRAPH_API ConcatOutputDescription : public OutputDescription { public: - static constexpr DiscreteTypeInfo type_info{"ConcatOutputDescription", 0}; - const DiscreteTypeInfo& get_type_info() const override { return type_info; } + static constexpr type_info_t type_info{"ConcatOutputDescription", 0}; + const type_info_t& get_type_info() const override { return type_info; } /// \param body_value_index A body value that produces the output /// \param output_index The TensorIterator output index /// \param start First index for slices @@ -179,22 +194,23 @@ namespace ngraph int64_t part_size, int64_t end, int64_t axis); + ConcatOutputDescription() = default; virtual std::shared_ptr copy() const override; - - int64_t m_start; - int64_t m_stride; - int64_t m_part_size; - int64_t m_end; - int64_t m_axis; + bool visit_attributes(AttributeVisitor& visitor) override; + int64_t m_start{0}; + int64_t m_stride{0}; + int64_t m_part_size{0}; + int64_t m_end{0}; + int64_t m_axis{0}; }; /// \brief Produces an output from a specific iteration class NGRAPH_API BodyOutputDescription : public OutputDescription { public: - static constexpr DiscreteTypeInfo type_info{"BodyOutputDescription", 0}; - const DiscreteTypeInfo& get_type_info() const override { return type_info; } + static constexpr type_info_t type_info{"BodyOutputDescription", 0}; + const type_info_t& get_type_info() const override { return type_info; } /// \param body_value_index A body value that produces the output /// \param output_index The TensorIterator output index /// \param iteration which iteration (typically -1, final) will supply the @@ -202,9 +218,10 @@ namespace ngraph BodyOutputDescription(uint64_t body_value_index, uint64_t output_index, int64_t iteration); + BodyOutputDescription() = default; std::shared_ptr copy() const override; - - int64_t m_iteration; + bool visit_attributes(AttributeVisitor& visitor) override; + int64_t m_iteration{0}; }; /// \brief Indicate that a body parameter comes from slices of a value @@ -316,4 +333,69 @@ namespace ngraph } using v0::TensorIterator; } + template class NGRAPH_API FactoryRegistry; + + template <> + class NGRAPH_API AttributeAdapter> + : public FactoryAttributeAdapter + { + public: + using FactoryAttributeAdapter::FactoryAttributeAdapter; + static constexpr DiscreteTypeInfo type_info{ + "AttributeAdapter>" + ">>", + 0}; + const DiscreteTypeInfo& get_type_info() const override { return type_info; } + }; + + template <> + class NGRAPH_API + AttributeAdapter>> + : public VisitorAdapter + { + public: + AttributeAdapter(std::vector>& ref); + + bool visit_attributes(AttributeVisitor& visitor) override; + static constexpr DiscreteTypeInfo type_info{ + "AttributeAdapter>" + ">>", + 0}; + const DiscreteTypeInfo& get_type_info() const override { return type_info; } + protected: + std::vector>& m_ref; + }; + + template class NGRAPH_API FactoryRegistry; + + template <> + class NGRAPH_API AttributeAdapter> + : public FactoryAttributeAdapter + { + public: + using FactoryAttributeAdapter::FactoryAttributeAdapter; + static constexpr DiscreteTypeInfo type_info{ + "AttributeAdapter>" + ">>", + 0}; + const DiscreteTypeInfo& get_type_info() const override { return type_info; } + }; + + template <> + class NGRAPH_API + AttributeAdapter>> + : public VisitorAdapter + { + public: + AttributeAdapter(std::vector>& ref); + + bool visit_attributes(AttributeVisitor& visitor) override; + static constexpr DiscreteTypeInfo type_info{ + "AttributeAdapter>" + ">>", + 0}; + const DiscreteTypeInfo& get_type_info() const override { return type_info; } + protected: + std::vector>& m_ref; + }; } diff --git a/ngraph/src/ngraph/op/util/attr_types.cpp b/ngraph/src/ngraph/op/util/attr_types.cpp index 4d4efdbf848..14d8eec0af8 100644 --- a/ngraph/src/ngraph/op/util/attr_types.cpp +++ b/ngraph/src/ngraph/op/util/attr_types.cpp @@ -15,6 +15,7 @@ //***************************************************************************** #include +#include "ngraph/attribute_visitor.hpp" #include "ngraph/check.hpp" #include "ngraph/enum_names.hpp" #include "ngraph/op/util/attr_types.hpp" @@ -171,5 +172,35 @@ namespace ngraph return allowed_values.at(type); } - NGRAPH_API constexpr DiscreteTypeInfo AttributeAdapter::type_info; + bool AttributeAdapter::visit_attributes(AttributeVisitor& visitor) + { + // Maintain back-compatibility + std::string name = visitor.finish_structure(); + visitor.on_attribute(name, m_ref.m_type); + visitor.start_structure(name); + if (m_ref.m_type == op::AutoBroadcastType::PDPD) + { + visitor.on_attribute("auto_broadcast_axis", m_ref.m_axis); + } + return true; + } + + constexpr DiscreteTypeInfo AttributeAdapter::type_info; + + bool AttributeAdapter::visit_attributes(AttributeVisitor& visitor) + { + // Maintain back-compatibility + std::string name = visitor.finish_structure(); + visitor.on_attribute(name, m_ref.m_type); + visitor.start_structure(name); + if (m_ref.m_type == op::BroadcastType::PDPD) + { + visitor.start_structure(name); + visitor.on_attribute("axis", m_ref.m_axis); + visitor.finish_structure(); + } + return true; + } + + constexpr DiscreteTypeInfo AttributeAdapter::type_info; } diff --git a/ngraph/src/ngraph/op/util/attr_types.hpp b/ngraph/src/ngraph/op/util/attr_types.hpp index 722ae18485b..3f265c7f1a6 100644 --- a/ngraph/src/ngraph/op/util/attr_types.hpp +++ b/ngraph/src/ngraph/op/util/attr_types.hpp @@ -263,7 +263,7 @@ namespace ngraph { enum class TopKSortType { - // Returned values are not sorted + // Returned values are not sorte NONE, // Sort result based on element indices SORT_INDICES, @@ -355,17 +355,19 @@ namespace ngraph } template <> - class NGRAPH_API AttributeAdapter - : public ValueReference, public ValueAccessor + class AttributeAdapter : public VisitorAdapter { public: AttributeAdapter(op::AutoBroadcastSpec& value) - : ValueReference(value) + : m_ref(value) { } + bool visit_attributes(AttributeVisitor& visitor) override; static constexpr DiscreteTypeInfo type_info{"AttributeAdapter", 0}; const DiscreteTypeInfo& get_type_info() const override { return type_info; } + protected: + op::AutoBroadcastSpec& m_ref; }; namespace op @@ -402,17 +404,20 @@ namespace ngraph } }; } + template <> - class NGRAPH_API AttributeAdapter - : public ValueReference, public ValueAccessor + class AttributeAdapter : public VisitorAdapter { public: AttributeAdapter(op::BroadcastModeSpec& value) - : ValueReference(value) + : m_ref(value) { } + bool visit_attributes(AttributeVisitor& visitor) override; static constexpr DiscreteTypeInfo type_info{"AttributeAdapter", 0}; const DiscreteTypeInfo& get_type_info() const override { return type_info; } + protected: + op::BroadcastModeSpec& m_ref; }; } diff --git a/ngraph/src/ngraph/op/util/broadcast_base.cpp b/ngraph/src/ngraph/op/util/broadcast_base.cpp index 9addf848385..c3dd6c62d4e 100644 --- a/ngraph/src/ngraph/op/util/broadcast_base.cpp +++ b/ngraph/src/ngraph/op/util/broadcast_base.cpp @@ -138,14 +138,15 @@ void op::util::BroadcastBase::validate_and_infer_types() } PartialShape result_shape{PartialShape::dynamic()}; - + auto input_rank = input_value(0).get_partial_shape().rank(); + auto output_rank = input_value(1).get_partial_shape(); + if (input_rank.is_static() && output_rank.is_static() && output_rank[0].is_static()) + { + result_shape = PartialShape::dynamic(std::max(input_rank.get_length(), output_rank[0].get_length())); + } const auto shape_constant = as_type_ptr(input_value(1).get_node_shared_ptr()); - if (shape_constant) - { - result_shape = shape_constant->get_shape_val(); - } - else if (auto concat = as_type_ptr(input_value(1).get_node_shared_ptr())) + if (auto concat = as_type_ptr(input_value(1).get_node_shared_ptr())) { auto concat_inputs = concat->inputs(); @@ -171,6 +172,10 @@ void op::util::BroadcastBase::validate_and_infer_types() if (m_mode.m_type == BroadcastType::NONE) { + if (shape_constant) + { + result_shape = shape_constant->get_shape_val(); + } // Validate axes_mapping if (get_input_partial_shape(0).is_static() && get_input_partial_shape(1).is_static() && get_input_partial_shape(2).is_static()) diff --git a/ngraph/src/ngraph/opsets/opset3_tbl.hpp b/ngraph/src/ngraph/opsets/opset3_tbl.hpp index 7b96a4c1319..2a18e253781 100644 --- a/ngraph/src/ngraph/opsets/opset3_tbl.hpp +++ b/ngraph/src/ngraph/opsets/opset3_tbl.hpp @@ -21,12 +21,9 @@ NGRAPH_OP(Abs, ngraph::op::v0) NGRAPH_OP(Acos, ngraph::op::v0) -NGRAPH_OP(Acosh, ngraph::op::v3) NGRAPH_OP(Add, ngraph::op::v1) NGRAPH_OP(Asin, ngraph::op::v0) -NGRAPH_OP(Asinh, ngraph::op::v3) NGRAPH_OP(Atan, ngraph::op::v0) -NGRAPH_OP(Atanh, ngraph::op::v3) NGRAPH_OP(AvgPool, ngraph::op::v1) NGRAPH_OP(BatchNormInference, ngraph::op::v0) NGRAPH_OP(BinaryConvolution, ngraph::op::v1) @@ -65,7 +62,7 @@ NGRAPH_OP(GroupConvolution, ngraph::op::v1) NGRAPH_OP(GroupConvolutionBackpropData, ngraph::op::v1) NGRAPH_OP(GRN, ngraph::op::v0) NGRAPH_OP(HardSigmoid, ngraph::op::v0) -NGRAPH_OP(Interpolate, ngraph::op::v3) +NGRAPH_OP(Interpolate, ngraph::op::v0) NGRAPH_OP(Less, ngraph::op::v1) NGRAPH_OP(LessEqual, ngraph::op::v1) NGRAPH_OP(Log, ngraph::op::v0) @@ -129,9 +126,6 @@ NGRAPH_OP(Selu, ngraph::op::v0) // Superseded // NGRAPH_OP(ShapeOf, ngraph::op::v0) -// Moved out of opset2, it was added to opset1 by mistake -// NGRAPH_OP(ShuffleChannels, ngraph::op::v0) - NGRAPH_OP(Sign, ngraph::op::v0) NGRAPH_OP(Sigmoid, ngraph::op::v0) NGRAPH_OP(Sin, ngraph::op::v0) @@ -170,7 +164,6 @@ NGRAPH_OP(RNNCell, ngraph::op::v0) NGRAPH_OP(ROIAlign, ngraph::op::v3) NGRAPH_OP(ScatterElementsUpdate, ngraph::op::v3) NGRAPH_OP(ScatterUpdate, ngraph::op::v3) -NGRAPH_OP(ScatterNDUpdate, ngraph::op::v3) NGRAPH_OP(ShuffleChannels, ngraph::op::v0) NGRAPH_OP(ShapeOf, ngraph::op::v3) NGRAPH_OP(TopK, ngraph::op::v3) diff --git a/ngraph/src/ngraph/partial_shape.cpp b/ngraph/src/ngraph/partial_shape.cpp index 3433a5cccae..b3a049adfc0 100644 --- a/ngraph/src/ngraph/partial_shape.cpp +++ b/ngraph/src/ngraph/partial_shape.cpp @@ -438,4 +438,45 @@ Dimension& PartialShape::operator[](size_t i) return m_dimensions[i]; } +const std::vector& ngraph::AttributeAdapter::get() +{ + if (!m_buffer_valid) + { + m_buffer.clear(); + if (m_ref.rank().is_dynamic()) + { + m_buffer.push_back(-2); + } + else + { + for (size_t i = 0; i < m_ref.rank().get_length(); ++i) + { + auto& elt = m_ref[i]; + m_buffer.push_back(elt.is_dynamic() ? -1 : elt.get_length()); + } + } + m_buffer_valid = true; + } + return m_buffer; +} + +void ngraph::AttributeAdapter::set(const std::vector& value) +{ + m_ref = PartialShape(); + if (value.size() == 1 && value[0] == -2) + { + m_ref = PartialShape::dynamic(); + } + else + { + std::vector dims; + for (auto elt : value) + { + dims.push_back(elt == -1 ? Dimension::dynamic() : elt); + } + m_ref = PartialShape(dims); + } + m_buffer_valid = false; +} + NGRAPH_API constexpr DiscreteTypeInfo AttributeAdapter::type_info; diff --git a/ngraph/src/ngraph/partial_shape.hpp b/ngraph/src/ngraph/partial_shape.hpp index f4df92ce5c8..78f34d66c82 100644 --- a/ngraph/src/ngraph/partial_shape.hpp +++ b/ngraph/src/ngraph/partial_shape.hpp @@ -293,16 +293,22 @@ namespace ngraph std::ostream& operator<<(std::ostream& str, const PartialShape& shape); template <> - class NGRAPH_API AttributeAdapter : public ValueReference, - public ValueAccessor + class NGRAPH_API AttributeAdapter : public ValueAccessor> { public: AttributeAdapter(PartialShape& value) - : ValueReference(value) + : m_ref(value) { } + const std::vector& get() override; + void set(const std::vector& value) override; static constexpr DiscreteTypeInfo type_info{"AttributeAdapter", 0}; const DiscreteTypeInfo& get_type_info() const override { return type_info; } + operator PartialShape&() { return m_ref; } + protected: + PartialShape& m_ref; + std::vector m_buffer; + bool m_buffer_valid{false}; }; } diff --git a/ngraph/src/ngraph/pass/algebraic_simplification.cpp b/ngraph/src/ngraph/pass/algebraic_simplification.cpp index 56e247d2c3a..0a24c0b1ee2 100644 --- a/ngraph/src/ngraph/pass/algebraic_simplification.cpp +++ b/ngraph/src/ngraph/pass/algebraic_simplification.cpp @@ -576,7 +576,7 @@ template static shared_ptr multiply_by(element::Type type, size_t multiplier, shared_ptr cnst) { - T sum_cnst = static_cast(cnst->get_vector().at(0) * multiplier); + T sum_cnst = static_cast(cnst->get_data_ptr()[0] * multiplier); return op::Constant::create(type, Shape{}, {sum_cnst}); } @@ -584,7 +584,7 @@ template static shared_ptr pow_by(element::Type type, size_t multiplier, shared_ptr cnst) { T prod = static_cast(1); - T val = cnst->get_vector().at(0); + T val = cnst->get_data_ptr()[0]; for (size_t i = 0; i < multiplier; i++) { prod *= val; diff --git a/ngraph/src/ngraph/pass/constant_folding_arithmetic_reduction.cpp b/ngraph/src/ngraph/pass/constant_folding_arithmetic_reduction.cpp index d5c061fa377..ddf4cee6910 100644 --- a/ngraph/src/ngraph/pass/constant_folding_arithmetic_reduction.cpp +++ b/ngraph/src/ngraph/pass/constant_folding_arithmetic_reduction.cpp @@ -43,63 +43,63 @@ static shared_ptr if (auto max = as_type_ptr(reduction_node)) { - runtime::reference::max(constant->get_vector().data(), + runtime::reference::max(constant->get_data_ptr(), data_ptr, constant->get_output_shape(0), max->get_reduction_axes()); } else if (auto reduce_max = as_type_ptr(reduction_node)) { - runtime::reference::max(constant->get_vector().data(), + runtime::reference::max(constant->get_data_ptr(), data_ptr, constant->get_output_shape(0), reduce_max->get_reduction_axes()); } else if (auto min = as_type_ptr(reduction_node)) { - runtime::reference::min(constant->get_vector().data(), + runtime::reference::min(constant->get_data_ptr(), data_ptr, constant->get_output_shape(0), min->get_reduction_axes()); } else if (auto reduce_min = as_type_ptr(reduction_node)) { - runtime::reference::min(constant->get_vector().data(), + runtime::reference::min(constant->get_data_ptr(), data_ptr, constant->get_output_shape(0), reduce_min->get_reduction_axes()); } else if (auto prod = as_type_ptr(reduction_node)) { - runtime::reference::product(constant->get_vector().data(), + runtime::reference::product(constant->get_data_ptr(), data_ptr, constant->get_output_shape(0), prod->get_reduction_axes()); } else if (auto reduce_prod = as_type_ptr(reduction_node)) { - runtime::reference::product(constant->get_vector().data(), + runtime::reference::product(constant->get_data_ptr(), data_ptr, constant->get_output_shape(0), reduce_prod->get_reduction_axes()); } else if (auto sum = as_type_ptr(reduction_node)) { - runtime::reference::sum(constant->get_vector().data(), + runtime::reference::sum(constant->get_data_ptr(), data_ptr, constant->get_output_shape(0), sum->get_reduction_axes()); } else if (auto reduce_sum = as_type_ptr(reduction_node)) { - runtime::reference::sum(constant->get_vector().data(), + runtime::reference::sum(constant->get_data_ptr(), data_ptr, constant->get_output_shape(0), reduce_sum->get_reduction_axes()); } else if (auto reduce_mean = as_type_ptr(reduction_node)) { - runtime::reference::mean(constant->get_vector().data(), + runtime::reference::mean(constant->get_data_ptr(), data_ptr, constant->get_output_shape(0), reduce_mean->get_reduction_axes()); diff --git a/ngraph/src/ngraph/pass/constant_folding_convert.cpp b/ngraph/src/ngraph/pass/constant_folding_convert.cpp index ee1597c272f..f10b2f05cda 100644 --- a/ngraph/src/ngraph/pass/constant_folding_convert.cpp +++ b/ngraph/src/ngraph/pass/constant_folding_convert.cpp @@ -33,7 +33,7 @@ shared_ptr fold_constant_convert_helper1(shared_ptr TO* data_ptr = buffer.get_ptr(); runtime::reference::convert( - constant->get_vector().data(), data_ptr, shape_size(out_shape)); + constant->get_data_ptr(), data_ptr, shape_size(out_shape)); return make_shared(output_element_type, out_shape, data_ptr); } diff --git a/ngraph/src/ngraph/pass/constant_folding_dequantize.cpp b/ngraph/src/ngraph/pass/constant_folding_dequantize.cpp index 7746a721926..4d6c75f2d6c 100644 --- a/ngraph/src/ngraph/pass/constant_folding_dequantize.cpp +++ b/ngraph/src/ngraph/pass/constant_folding_dequantize.cpp @@ -31,9 +31,9 @@ shared_ptr fold_constant_dequantize(shared_ptr const runtime::AlignedBuffer buffer(shape_size(out_shape) * sizeof(REAL)); REAL* data_ptr = buffer.get_ptr(); - runtime::reference::dequantize(constant->get_vector().data(), - scale->get_vector().data(), - offset->get_vector().data(), + runtime::reference::dequantize(constant->get_data_ptr(), + scale->get_data_ptr(), + offset->get_data_ptr(), data_ptr, constant->get_shape(), scale->get_shape(), diff --git a/ngraph/src/ngraph/pass/constant_folding_logical_reduction.cpp b/ngraph/src/ngraph/pass/constant_folding_logical_reduction.cpp index 394a7a1e9c2..675262336af 100644 --- a/ngraph/src/ngraph/pass/constant_folding_logical_reduction.cpp +++ b/ngraph/src/ngraph/pass/constant_folding_logical_reduction.cpp @@ -48,7 +48,7 @@ static shared_ptr fold_constant_logical_reduction(shared_ptr(reduction_node)) { - runtime::reference::all(constant->get_vector().data(), + runtime::reference::all(constant->get_data_ptr(), data_ptr, constant->get_output_shape(0), reduction_node->get_shape(), @@ -56,7 +56,7 @@ static shared_ptr fold_constant_logical_reduction(shared_ptr(reduction_node)) { - runtime::reference::any(constant->get_vector().data(), + runtime::reference::any(constant->get_data_ptr(), data_ptr, constant->get_output_shape(0), reduction_node->get_shape(), @@ -67,7 +67,7 @@ static shared_ptr fold_constant_logical_reduction(shared_ptrget_reduction_axes(); const auto input_shape = reduce_and->get_input_shape(0); - runtime::reference::all(constant->get_vector().data(), + runtime::reference::all(constant->get_data_ptr(), data_ptr, constant->get_output_shape(0), get_shape_no_keep_dims(reduction_axes, input_shape), @@ -78,7 +78,7 @@ static shared_ptr fold_constant_logical_reduction(shared_ptrget_reduction_axes(); const auto input_shape = reduce_or->get_input_shape(0); - runtime::reference::any(constant->get_vector().data(), + runtime::reference::any(constant->get_data_ptr(), data_ptr, constant->get_output_shape(0), get_shape_no_keep_dims(reduction_axes, input_shape), diff --git a/ngraph/src/ngraph/pass/constant_folding_one_hot.cpp b/ngraph/src/ngraph/pass/constant_folding_one_hot.cpp index 183b8b9352f..35c57ee3b52 100644 --- a/ngraph/src/ngraph/pass/constant_folding_one_hot.cpp +++ b/ngraph/src/ngraph/pass/constant_folding_one_hot.cpp @@ -31,13 +31,14 @@ shared_ptr fold_constant_one_hot_ref(const shared_ptr out_vec(shape_size(output_shape)); - runtime::reference::one_hot(indices->get_data_ptr(), - out_vec.data(), - indices->get_shape(), - output_shape, - axis, - on_value->get_vector()[0], - off_value->get_vector()[0]); + runtime::reference::one_hot( + indices->get_data_ptr(), + out_vec.data(), + indices->get_shape(), + output_shape, + axis, + on_value->get_data_ptr()[0], + off_value->get_data_ptr()[0]); return make_shared(on_value->get_element_type(), output_shape, out_vec); } diff --git a/ngraph/src/ngraph/pass/constant_folding_quantize.cpp b/ngraph/src/ngraph/pass/constant_folding_quantize.cpp index d35279e1fb0..27117ee53ff 100644 --- a/ngraph/src/ngraph/pass/constant_folding_quantize.cpp +++ b/ngraph/src/ngraph/pass/constant_folding_quantize.cpp @@ -31,9 +31,9 @@ shared_ptr fold_constant_quantize(shared_ptr constan runtime::AlignedBuffer buffer(shape_size(out_shape) * sizeof(QUANT)); QUANT* data_ptr = buffer.get_ptr(); - runtime::reference::quantize(constant->get_vector().data(), - scale->get_vector().data(), - offset->get_vector().data(), + runtime::reference::quantize(constant->get_data_ptr(), + scale->get_data_ptr(), + offset->get_data_ptr(), data_ptr, constant->get_shape(), scale->get_shape(), diff --git a/ngraph/src/ngraph/pass/constant_folding_reverse.cpp b/ngraph/src/ngraph/pass/constant_folding_reverse.cpp index 4fa2d4b0011..1695e4c9be5 100644 --- a/ngraph/src/ngraph/pass/constant_folding_reverse.cpp +++ b/ngraph/src/ngraph/pass/constant_folding_reverse.cpp @@ -30,7 +30,7 @@ static shared_ptr fold_constant_reverse_helper(shared_ptr(); runtime::reference::reverse( - constant->get_vector().data(), data_ptr, out_shape, out_shape, reversed_axes); + constant->get_data_ptr(), data_ptr, out_shape, out_shape, reversed_axes); return make_shared(constant->get_output_element_type(0), out_shape, data_ptr); } diff --git a/ngraph/src/ngraph/runtime/aligned_buffer.cpp b/ngraph/src/ngraph/runtime/aligned_buffer.cpp index 2da52e0e7fd..3ae127d82c2 100644 --- a/ngraph/src/ngraph/runtime/aligned_buffer.cpp +++ b/ngraph/src/ngraph/runtime/aligned_buffer.cpp @@ -76,3 +76,20 @@ runtime::AlignedBuffer& runtime::AlignedBuffer::operator=(AlignedBuffer&& other) } return *this; } + +namespace ngraph +{ + constexpr DiscreteTypeInfo AttributeAdapter>::type_info; + + AttributeAdapter>::AttributeAdapter( + shared_ptr& value) + : m_ref(value) + { + } + + void* AttributeAdapter>::get_ptr() + { + return m_ref->get_ptr(); + } + size_t AttributeAdapter>::size() { return m_ref->size(); } +} diff --git a/ngraph/src/ngraph/runtime/aligned_buffer.hpp b/ngraph/src/ngraph/runtime/aligned_buffer.hpp index 654ce3504b3..1766dc13507 100644 --- a/ngraph/src/ngraph/runtime/aligned_buffer.hpp +++ b/ngraph/src/ngraph/runtime/aligned_buffer.hpp @@ -74,3 +74,22 @@ private: char* m_aligned_buffer; size_t m_byte_size; }; + +namespace ngraph +{ + template <> + class NGRAPH_API AttributeAdapter> + : public ValueAccessor + { + public: + AttributeAdapter(std::shared_ptr& value); + void* get_ptr() override; + size_t size() override; + + static constexpr DiscreteTypeInfo type_info{ + "AttributeAdapter>", 0}; + const DiscreteTypeInfo& get_type_info() const override { return type_info; } + protected: + std::shared_ptr& m_ref; + }; +} diff --git a/ngraph/src/ngraph/serializer.cpp b/ngraph/src/ngraph/serializer.cpp index 80cdc74f51a..71a9d491155 100644 --- a/ngraph/src/ngraph/serializer.cpp +++ b/ngraph/src/ngraph/serializer.cpp @@ -112,22 +112,13 @@ public: : m_json(j) { } - - void on_attribute(const std::string& name, std::string& value) override - { - m_json[name] = value; - } - void on_attribute(const std::string& name, bool& value) override { m_json[name] = value; } void on_adapter(const std::string& name, ValueAccessor& adapter) override { - if (auto a = as_type>(&adapter)) - { - m_json[name] = write_element_type(static_cast(*a)); - } - else if (auto a = as_type>(&adapter)) - { - m_json[name] = write_partial_shape(static_cast(*a)); - } + NGRAPH_CHECK(false, "Adapter ", adapter.get_type_info().name, " is not handled"); + } + void on_adapter(const std::string& name, ValueAccessor& adapter) override + { + m_json[name] = adapter.get(); } void on_adapter(const std::string& name, ValueAccessor& adapter) override { @@ -145,6 +136,10 @@ public: { m_json[name] = adapter.get(); } + void on_adapter(const std::string& name, ValueAccessor>& adapter) override + { + m_json[name] = adapter.get(); + } void on_adapter(const std::string& name, ValueAccessor>& adapter) override { m_json[name] = adapter.get(); @@ -198,34 +193,9 @@ public: : m_json(j) { } - void on_attribute(const std::string& name, std::string& value) override - { - if (has_key(m_json, name)) - { - value = m_json.at(name).get(); - } - } - void on_attribute(const std::string& name, bool& value) override - { - if (has_key(m_json, name)) - { - value = m_json.at(name).get(); - } - } void on_adapter(const std::string& name, ValueAccessor& adapter) override { - if (has_key(m_json, name)) - { - if (auto a = as_type>(&adapter)) - { - static_cast(*a) = - read_element_type(m_json.at(name).get()); - } - else if (auto a = as_type>(&adapter)) - { - static_cast(*a) = read_partial_shape(m_json.at(name)); - } - } + NGRAPH_CHECK(false, "Adapter ", adapter.get_type_info().name, " is not handled"); } void on_adapter(const std::string& name, ValueAccessor& adapter) override { @@ -234,6 +204,14 @@ public: adapter.set(m_json.at(name).get()); } } + void on_adapter(const std::string& name, ValueAccessor& adapter) override + { + if (has_key(m_json, name)) + { + adapter.set(m_json.at(name).get()); + } + } + void on_adapter(const std::string& name, ValueAccessor& adapter) override { if (has_key(m_json, name)) @@ -256,6 +234,13 @@ public: adapter.set(m_json.at(name).get>()); } } + void on_adapter(const std::string& name, ValueAccessor>& adapter) override + { + if (has_key(m_json, name)) + { + adapter.set(m_json.at(name).get>()); + } + } void on_adapter(const std::string& name, ValueAccessor>& adapter) override { if (has_key(m_json, name)) @@ -3604,7 +3589,7 @@ json JSONSerializer::serialize_node(const Node& n) { const auto tmp = static_cast(&n); node["axis"] = tmp->get_axis(); - node["groups"] = tmp->get_groups(); + node["groups"] = tmp->get_group(); break; } case OP_TYPEID::Sigmoid: { break; diff --git a/ngraph/src/ngraph/shape.cpp b/ngraph/src/ngraph/shape.cpp index 6bed1c50310..4146f6068a5 100644 --- a/ngraph/src/ngraph/shape.cpp +++ b/ngraph/src/ngraph/shape.cpp @@ -69,20 +69,4 @@ ngraph::Shape& ngraph::Shape::operator=(Shape&& v) noexcept return *this; } -const vector& AttributeAdapter::get() -{ - if (!m_buffer_valid) - { - m_buffer = copy_from>(m_value); - m_buffer_valid = true; - } - return m_buffer; -} - -void AttributeAdapter::set(const vector& value) -{ - m_value = copy_from(value); - m_buffer_valid = false; -} - constexpr DiscreteTypeInfo AttributeAdapter::type_info; diff --git a/ngraph/src/ngraph/shape.hpp b/ngraph/src/ngraph/shape.hpp index cb2ca6c8563..c0f23556f3c 100644 --- a/ngraph/src/ngraph/shape.hpp +++ b/ngraph/src/ngraph/shape.hpp @@ -53,18 +53,17 @@ namespace ngraph }; template <> - class NGRAPH_API AttributeAdapter : public ValueReference, - public ValueAccessor> + class NGRAPH_API AttributeAdapter + : public IndirectVectorValueAccessor> + { public: AttributeAdapter(Shape& value) - : ValueReference(value) + : IndirectVectorValueAccessor>(value) { } static constexpr DiscreteTypeInfo type_info{"AttributeAdapter", 0}; const DiscreteTypeInfo& get_type_info() const override { return type_info; } - const std::vector& get() override; - void set(const std::vector& value) override; }; /// Number of elements in spanned by a shape diff --git a/ngraph/src/ngraph/strides.cpp b/ngraph/src/ngraph/strides.cpp index 8f5bc542f4c..ef3bd837afa 100644 --- a/ngraph/src/ngraph/strides.cpp +++ b/ngraph/src/ngraph/strides.cpp @@ -65,20 +65,4 @@ ngraph::Strides& ngraph::Strides::operator=(Strides&& v) noexcept return *this; } -const vector& AttributeAdapter::get() -{ - if (!m_buffer_valid) - { - m_buffer = copy_from>(m_value); - m_buffer_valid = true; - } - return m_buffer; -} - -void AttributeAdapter::set(const vector& value) -{ - m_value = copy_from(value); - m_buffer_valid = false; -} - constexpr DiscreteTypeInfo AttributeAdapter::type_info; diff --git a/ngraph/src/ngraph/strides.hpp b/ngraph/src/ngraph/strides.hpp index 6611d5e8230..2a2128595cf 100644 --- a/ngraph/src/ngraph/strides.hpp +++ b/ngraph/src/ngraph/strides.hpp @@ -51,18 +51,17 @@ namespace ngraph }; template <> - class NGRAPH_API AttributeAdapter : public ValueReference, - public ValueAccessor> + class NGRAPH_API AttributeAdapter + : public IndirectVectorValueAccessor> + { public: AttributeAdapter(Strides& value) - : ValueReference(value) + : IndirectVectorValueAccessor>(value) { } static constexpr DiscreteTypeInfo type_info{"AttributeAdapter", 0}; const DiscreteTypeInfo& get_type_info() const override { return type_info; } - const std::vector& get() override; - void set(const std::vector& value) override; }; NGRAPH_API diff --git a/ngraph/src/ngraph/type/element_type.cpp b/ngraph/src/ngraph/type/element_type.cpp index a66a294b080..8904b517906 100644 --- a/ngraph/src/ngraph/type/element_type.cpp +++ b/ngraph/src/ngraph/type/element_type.cpp @@ -357,10 +357,10 @@ constexpr DiscreteTypeInfo AttributeAdapter::type_info; const std::string& AttributeAdapter::get() { - return as_string(static_cast(ValueReference::m_value)); + return as_string(static_cast(m_ref)); } void AttributeAdapter::set(const std::string& value) { - ValueReference::m_value = as_enum(value); + m_ref = as_enum(value); } diff --git a/ngraph/src/ngraph/type/element_type.hpp b/ngraph/src/ngraph/type/element_type.hpp index 3aa0bb78c01..63656292ed8 100644 --- a/ngraph/src/ngraph/type/element_type.hpp +++ b/ngraph/src/ngraph/type/element_type.hpp @@ -201,12 +201,11 @@ namespace ngraph }; template <> - class NGRAPH_API AttributeAdapter : public ValueReference, - public ValueAccessor + class NGRAPH_API AttributeAdapter : public ValueAccessor { public: AttributeAdapter(element::Type& value) - : ValueReference(value) + : m_ref(value) { } @@ -215,6 +214,9 @@ namespace ngraph static constexpr DiscreteTypeInfo type_info{"AttributeAdapter", 0}; const DiscreteTypeInfo& get_type_info() const override { return type_info; } + operator element::Type&() { return m_ref; } + protected: + element::Type& m_ref; }; /// \brief Return the number of bytes in the compile-time representation of the element type. diff --git a/ngraph/test/CMakeLists.txt b/ngraph/test/CMakeLists.txt index fedadfd6c56..5bf132c8d59 100644 --- a/ngraph/test/CMakeLists.txt +++ b/ngraph/test/CMakeLists.txt @@ -254,17 +254,16 @@ endif() set_source_files_properties(includes.cpp PROPERTIES COMPILE_DEFINITIONS NGRAPH_INCLUDES="${PROJECT_SOURCE_DIR}/src/ngraph") -# if (NGRAPH_IE_ENABLE) -# if (ENABLE_MKL_DNN) -# message(STATUS "NGRAPH_TESTS: IE:CPU enabled") -# set(ACTIVE_BACKEND_LIST ${ACTIVE_BACKEND_LIST} "IE:CPU") -# endif() - -# if (ENABLE_CLDNN) -# message(STATUS "NGRAPH_TESTS: IE:GPU enabled") -# set(ACTIVE_BACKEND_LIST ${ACTIVE_BACKEND_LIST} "IE:GPU") -# endif() -# endif() +if (NGRAPH_IE_ENABLE) + if (ENABLE_MKL_DNN) + message(STATUS "NGRAPH_TESTS: IE:CPU enabled") + set(ACTIVE_BACKEND_LIST ${ACTIVE_BACKEND_LIST} "IE:CPU") + endif() + if (ENABLE_CLDNN) + message(STATUS "NGRAPH_TESTS: IE:GPU enabled") + set(ACTIVE_BACKEND_LIST ${ACTIVE_BACKEND_LIST} "IE:GPU") + endif() +endif() if (NGRAPH_INTERPRETER_ENABLE) list(APPEND SRC diff --git a/ngraph/test/attributes.cpp b/ngraph/test/attributes.cpp index 041c77beccd..709b7b5fcc7 100644 --- a/ngraph/test/attributes.cpp +++ b/ngraph/test/attributes.cpp @@ -21,8 +21,25 @@ #include "ngraph/opsets/opset1.hpp" #include "ngraph/opsets/opset3.hpp" +#include "util/visitor.hpp" + using namespace std; using namespace ngraph; +using ngraph::test::NodeBuilder; +using ngraph::test::ValueMap; + +TEST(attributes, value_map) +{ + ValueMap value_map; + bool a = true; + int8_t b = 2; + value_map.insert("a", a); + value_map.insert("b", b); + bool g_a = value_map.get("a"); + int8_t g_b = value_map.get("b"); + EXPECT_EQ(a, g_a); + EXPECT_EQ(b, g_b); +} enum class TuringModel { @@ -54,7 +71,45 @@ namespace ngraph }; constexpr DiscreteTypeInfo AttributeAdapter::type_info; -} // namespace ngraph + + struct Position + { + float x; + float y; + float z; + bool operator==(const Position& p) const { return x == p.x && y == p.y && z == p.z; } + Position& operator=(const Position& p) + { + x = p.x; + y = p.y; + z = p.z; + return *this; + } + }; + + template <> + class AttributeAdapter : public VisitorAdapter + { + public: + AttributeAdapter(Position& value) + : m_ref(value) + { + } + bool visit_attributes(AttributeVisitor& visitor) override + { + visitor.on_attribute("x", m_ref.x); + visitor.on_attribute("y", m_ref.y); + visitor.on_attribute("z", m_ref.z); + return true; + } + static constexpr DiscreteTypeInfo type_info{"AttributeAdapter", 0}; + const DiscreteTypeInfo& get_type_info() const override { return type_info; } + protected: + Position& m_ref; + }; + + constexpr DiscreteTypeInfo AttributeAdapter::type_info; +} // Given a Turing machine program and data, return scalar 1 if the program would // complete, 1 if it would not. @@ -78,6 +133,7 @@ public: int16_t val_int16_t, int32_t val_int32_t, int64_t val_int64_t, + size_t val_size_t, const std::vector& vec_string, const std::vector& vec_float, const std::vector& vec_double, @@ -88,7 +144,13 @@ public: const std::vector& vec_int8_t, const std::vector& vec_int16_t, const std::vector& vec_int32_t, - const std::vector& vec_int64_t) + const std::vector& vec_int64_t, + const std::vector& vec_size_t, + const Position& position, + const shared_ptr& node, + const NodeVector& node_vector, + const ParameterVector& parameter_vector, + const ResultVector& result_vector) : Op({program, data}) , m_turing_model(turing_model) , m_element_type(element_type) @@ -105,6 +167,7 @@ public: , m_val_int16_t(val_int16_t) , m_val_int32_t(val_int32_t) , m_val_int64_t(val_int64_t) + , m_val_size_t(val_size_t) , m_vec_string(vec_string) , m_vec_float(vec_float) , m_vec_double(vec_double) @@ -116,6 +179,12 @@ public: , m_vec_int16_t(vec_int16_t) , m_vec_int32_t(vec_int32_t) , m_vec_int64_t(vec_int64_t) + , m_vec_size_t(vec_size_t) + , m_position(position) + , m_node(node) + , m_node_vector(node_vector) + , m_parameter_vector(parameter_vector) + , m_result_vector(result_vector) { } @@ -138,6 +207,7 @@ public: int64_t get_val_int16_t() const { return m_val_int16_t; } int64_t get_val_int32_t() const { return m_val_int32_t; } int64_t get_val_int64_t() const { return m_val_int64_t; } + size_t get_val_size_t() const { return m_val_size_t; } const vector& get_vec_uint8_t() const { return m_vec_uint8_t; } const vector& get_vec_uint16_t() const { return m_vec_uint16_t; } const vector& get_vec_uint32_t() const { return m_vec_uint32_t; } @@ -149,6 +219,12 @@ public: const vector& get_vec_string() const { return m_vec_string; } const vector& get_vec_float() const { return m_vec_float; } const vector& get_vec_double() const { return m_vec_double; } + const vector& get_vec_size_t() const { return m_vec_size_t; } + const Position& get_position() const { return m_position; } + const shared_ptr& get_node() const { return m_node; } + const NodeVector& get_node_vector() const { return m_node_vector; } + const ParameterVector& get_parameter_vector() const { return m_parameter_vector; } + const ResultVector& get_result_vector() const { return m_result_vector; } shared_ptr clone_with_new_inputs(const OutputVector& args) const override { return make_shared(args[0], @@ -168,6 +244,7 @@ public: m_val_int16_t, m_val_int32_t, m_val_int64_t, + m_val_size_t, m_vec_string, m_vec_float, m_vec_double, @@ -178,7 +255,13 @@ public: m_vec_int8_t, m_vec_int16_t, m_vec_int32_t, - m_vec_int64_t); + m_vec_int64_t, + m_vec_size_t, + m_position, + m_node, + m_node_vector, + m_parameter_vector, + m_result_vector); } void validate_and_infer_types() override { set_output_type(0, element::i64, {}); } @@ -199,6 +282,7 @@ public: visitor.on_attribute("val_int16_t", m_val_int16_t); visitor.on_attribute("val_int32_t", m_val_int32_t); visitor.on_attribute("val_int64_t", m_val_int64_t); + visitor.on_attribute("val_size_t", m_val_size_t); visitor.on_attribute("vec_string", m_vec_string); visitor.on_attribute("vec_float", m_vec_float); visitor.on_attribute("vec_double", m_vec_double); @@ -210,6 +294,12 @@ public: visitor.on_attribute("vec_int16_t", m_vec_int16_t); visitor.on_attribute("vec_int32_t", m_vec_int32_t); visitor.on_attribute("vec_int64_t", m_vec_int64_t); + visitor.on_attribute("vec_size_t", m_vec_size_t); + visitor.on_attribute("position", m_position); + visitor.on_attribute("node", m_node); + visitor.on_attribute("node_vector", m_node_vector); + visitor.on_attribute("parameter_vector", m_parameter_vector); + visitor.on_attribute("result_vector", m_result_vector); return true; } @@ -229,6 +319,7 @@ protected: int16_t m_val_int16_t; int32_t m_val_int32_t; int64_t m_val_int64_t; + size_t m_val_size_t{23}; vector m_vec_string; vector m_vec_float; vector m_vec_double; @@ -240,290 +331,22 @@ protected: vector m_vec_int16_t; vector m_vec_int32_t; vector m_vec_int64_t; + vector m_vec_size_t; + Position m_position; + shared_ptr m_node; + NodeVector m_node_vector; + ParameterVector m_parameter_vector; + ResultVector m_result_vector; }; constexpr NodeTypeInfo Oracle::type_info; -class NodeSaver : public AttributeVisitor -{ -public: - NodeSaver(shared_ptr node) - : m_node_type_info(node->get_type_info()) - { - node->visit_attributes(*this); - } - const NodeTypeInfo& get_node_type_info() { return m_node_type_info; } - string& get_string(const string& name) { return m_strings.at(name); } - bool get_bool(const string& name) { return m_bools.at(name); } - float get_float(const string& name) { return m_doubles.at(name); } - double get_double(const string& name) { return m_doubles.at(name); } - int64_t get_signed(const string& name) { return m_signeds.at(name); } - uint64_t get_unsigned(const string& name) { return m_unsigneds.at(name); } - vector& get_float_vector(const string& name) { return m_float_vectors.at(name); } - vector& get_double_vector(const string& name) { return m_double_vectors.at(name); } - vector& get_int8_t_vector(const string& name) { return m_int8_t_vectors.at(name); } - vector& get_int16_t_vector(const string& name) { return m_int16_t_vectors.at(name); } - vector& get_int32_t_vector(const string& name) { return m_int32_t_vectors.at(name); } - vector& get_int64_t_vector(const string& name) { return m_int64_t_vectors.at(name); } - vector& get_uint8_t_vector(const string& name) { return m_uint8_t_vectors.at(name); } - vector& get_uint16_t_vector(const string& name) - { - return m_uint16_t_vectors.at(name); - } - vector& get_uint32_t_vector(const string& name) - { - return m_uint32_t_vectors.at(name); - } - vector& get_uint64_t_vector(const string& name) - { - return m_uint64_t_vectors.at(name); - } - - vector& get_string_vector(const string& name) { return m_string_vectors.at(name); } - HostTensorPtr get_host_tensor(const string& name) { return m_host_tensors.at(name); } - void set_string(const string& name, const string& value) { m_strings[name] = value; } - void set_bool(const string& name, bool value) { m_bools[name] = value; } - void set_double(const string& name, double value) { m_doubles[name] = value; } - void set_signed(const string& name, int64_t value) { m_signeds[name] = value; } - void set_float_vector(const string& name, const vector& value) - { - m_float_vectors[name] = value; - } - void set_double_vector(const string& name, const vector& value) - { - m_double_vectors[name] = value; - } - void set_int8_t_vector(const string& name, const vector& value) - { - m_int8_t_vectors[name] = value; - } - void set_int16_t_vector(const string& name, const vector& value) - { - m_int16_t_vectors[name] = value; - } - void set_int32_t_vector(const string& name, const vector& value) - { - m_int32_t_vectors[name] = value; - } - void set_int64_t_vector(const string& name, const vector& value) - { - m_int64_t_vectors[name] = value; - } - void set_uint8_t_vector(const string& name, const vector& value) - { - m_uint8_t_vectors[name] = value; - } - void set_uint16_t_vector(const string& name, const vector& value) - { - m_uint16_t_vectors[name] = value; - } - void set_uint32_t_vector(const string& name, const vector& value) - { - m_uint32_t_vectors[name] = value; - } - void set_uint64_t_vector(const string& name, const vector& value) - { - m_uint64_t_vectors[name] = value; - } - void set_string_vector(const string& name, const vector& value) - { - m_string_vectors[name] = value; - } - void set_host_tensor(const string& name, const HostTensorPtr& value) - { - m_host_tensors[name] = value; - } - - void on_attribute(const string& name, string& value) override { set_string(name, value); }; - void on_attribute(const string& name, bool& value) override { set_bool(name, value); } - void on_adapter(const string& name, ValueAccessor& adapter) override - { - NGRAPH_CHECK(false, "Attribute \"", name, "\" cannot be marshalled"); - } - // The remaining adapter methods fall back on the void adapter if not implemented - void on_adapter(const string& name, ValueAccessor& adapter) override - { - set_string(name, adapter.get()); - }; - void on_adapter(const string& name, ValueAccessor& adapter) override - { - set_signed(name, adapter.get()); - } - void on_adapter(const string& name, ValueAccessor& adapter) override - { - set_double(name, adapter.get()); - } - void on_adapter(const string& name, ValueAccessor>& adapter) override - { - set_string_vector(name, adapter.get()); - } - void on_adapter(const string& name, ValueAccessor>& adapter) override - { - set_float_vector(name, adapter.get()); - } - void on_adapter(const string& name, ValueAccessor>& adapter) override - { - set_double_vector(name, adapter.get()); - } - - void on_adapter(const string& name, ValueAccessor>& adapter) override - { - set_int8_t_vector(name, adapter.get()); - } - void on_adapter(const string& name, ValueAccessor>& adapter) override - { - set_int16_t_vector(name, adapter.get()); - } - void on_adapter(const string& name, ValueAccessor>& adapter) override - { - set_int32_t_vector(name, adapter.get()); - } - void on_adapter(const string& name, ValueAccessor>& adapter) override - { - set_int64_t_vector(name, adapter.get()); - } - void on_adapter(const string& name, ValueAccessor>& adapter) override - { - set_uint8_t_vector(name, adapter.get()); - } - void on_adapter(const string& name, ValueAccessor>& adapter) override - { - set_uint16_t_vector(name, adapter.get()); - } - void on_adapter(const string& name, ValueAccessor>& adapter) override - { - set_uint32_t_vector(name, adapter.get()); - } - void on_adapter(const string& name, ValueAccessor>& adapter) override - { - set_uint64_t_vector(name, adapter.get()); - } - void on_attribute(const std::string& name, void* constant_data, size_t size) override - { - HostTensorPtr data = make_shared(element::u8, Shape{size}); - data->write(constant_data, size); - set_host_tensor(name, data); - } - -protected: - NodeTypeInfo m_node_type_info; - map m_strings; - map m_bools; - map m_doubles; - map m_signeds; - map m_unsigneds; - map> m_int8_t_vectors; - map> m_int16_t_vectors; - map> m_int32_t_vectors; - map> m_int64_t_vectors; - map> m_uint8_t_vectors; - map> m_uint16_t_vectors; - map> m_uint32_t_vectors; - map> m_uint64_t_vectors; - map> m_float_vectors; - map> m_double_vectors; - map> m_string_vectors; - map m_host_tensors; -}; - -class NodeBuilder : public AttributeVisitor -{ -public: - NodeBuilder(const shared_ptr& node) - : m_values(node) - { - } - - // Does not validate, since inputs aren't set - shared_ptr create() - { - shared_ptr node(FactoryRegistry::get().create(m_values.get_node_type_info())); - node->visit_attributes(*this); - return node; - } - - void on_attribute(const string& name, string& value) override - { - value = m_values.get_string(name); - }; - void on_attribute(const string& name, bool& value) override { value = m_values.get_bool(name); } - void on_adapter(const string& name, ValueAccessor& adapter) override - { - NGRAPH_CHECK(false, "Attribute \"", name, "\" cannot be unmarshalled"); - } - // The remaining adapter methods fall back on the void adapter if not implemented - void on_adapter(const string& name, ValueAccessor& adapter) override - { - adapter.set(m_values.get_string(name)); - }; - void on_adapter(const string& name, ValueAccessor& adapter) override - { - adapter.set(m_values.get_signed(name)); - } - void on_adapter(const string& name, ValueAccessor& adapter) override - { - adapter.set(m_values.get_double(name)); - } - - void on_adapter(const string& name, ValueAccessor>& adapter) override - { - adapter.set(m_values.get_int8_t_vector(name)); - } - void on_adapter(const string& name, ValueAccessor>& adapter) override - { - adapter.set(m_values.get_int16_t_vector(name)); - } - void on_adapter(const string& name, ValueAccessor>& adapter) override - { - adapter.set(m_values.get_int32_t_vector(name)); - } - void on_adapter(const string& name, ValueAccessor>& adapter) override - { - adapter.set(m_values.get_int64_t_vector(name)); - } - void on_adapter(const string& name, ValueAccessor>& adapter) override - { - adapter.set(m_values.get_uint8_t_vector(name)); - } - void on_adapter(const string& name, ValueAccessor>& adapter) override - { - adapter.set(m_values.get_uint16_t_vector(name)); - } - void on_adapter(const string& name, ValueAccessor>& adapter) override - { - adapter.set(m_values.get_uint32_t_vector(name)); - } - void on_adapter(const string& name, ValueAccessor>& adapter) override - { - adapter.set(m_values.get_uint64_t_vector(name)); - } - void on_adapter(const string& name, ValueAccessor>& adapter) override - { - adapter.set(m_values.get_string_vector(name)); - } - void on_adapter(const string& name, ValueAccessor>& adapter) override - { - adapter.set(m_values.get_float_vector(name)); - } - void on_adapter(const string& name, ValueAccessor>& adapter) override - { - adapter.set(m_values.get_double_vector(name)); - } - void on_attribute(const std::string& name, void* constant_data, size_t size) override - { - HostTensorPtr data = m_values.get_host_tensor(name); - data->read(constant_data, size); - } - -protected: - NodeSaver m_values; -}; - TEST(attributes, user_op) { FactoryRegistry::get().register_factory(); auto program = make_shared(element::i32, Shape{200}); auto data = make_shared(element::i32, Shape{200}); + auto result = make_shared(data); auto oracle = make_shared(program, data, TuringModel::XL1200, @@ -541,6 +364,7 @@ TEST(attributes, user_op) -2, -4, -8, + 34, vector{"Hello", "World"}, vector{1.0f, 2.0f}, vector{1.0, 2.0}, @@ -551,8 +375,25 @@ TEST(attributes, user_op) vector{1, 2, 4, 8}, vector{1, 2, 4, 8}, vector{1, 2, 4, 8}, - vector{1, 2, 4, 8}); - NodeBuilder builder(oracle); + vector{1, 2, 4, 8}, + vector{1, 3, 8, 4, 2}, + Position{1.3f, 5.1f, 2.3f}, + data, + NodeVector{program, result, data}, + ParameterVector{data, data, program}, + ResultVector{result}); + NodeBuilder builder; + AttributeVisitor& saver = builder.get_node_saver(); + AttributeVisitor& loader = builder.get_node_loader(); + loader.register_node(program, "program"); + ASSERT_EQ(loader.get_registered_node("program"), program); + ASSERT_EQ(loader.get_registered_node_id(program), "program"); + loader.register_node(data, "data"); + loader.register_node(result, "result"); + saver.register_node(program, "program"); + saver.register_node(data, "data"); + saver.register_node(result, "result"); + builder.save_node(oracle); auto g_oracle = as_type_ptr(builder.create()); EXPECT_EQ(g_oracle->get_turing_model(), oracle->get_turing_model()); @@ -570,6 +411,7 @@ TEST(attributes, user_op) EXPECT_EQ(g_oracle->get_val_int16_t(), oracle->get_val_int16_t()); EXPECT_EQ(g_oracle->get_val_int32_t(), oracle->get_val_int32_t()); EXPECT_EQ(g_oracle->get_val_int64_t(), oracle->get_val_int64_t()); + EXPECT_EQ(g_oracle->get_val_size_t(), oracle->get_val_size_t()); EXPECT_EQ(g_oracle->get_vec_uint8_t(), oracle->get_vec_uint8_t()); EXPECT_EQ(g_oracle->get_vec_uint16_t(), oracle->get_vec_uint16_t()); EXPECT_EQ(g_oracle->get_vec_uint32_t(), oracle->get_vec_uint32_t()); @@ -581,6 +423,12 @@ TEST(attributes, user_op) EXPECT_EQ(g_oracle->get_vec_string(), oracle->get_vec_string()); EXPECT_EQ(g_oracle->get_vec_float(), oracle->get_vec_float()); EXPECT_EQ(g_oracle->get_vec_double(), oracle->get_vec_double()); + EXPECT_EQ(g_oracle->get_vec_size_t(), oracle->get_vec_size_t()); + EXPECT_EQ(g_oracle->get_position(), oracle->get_position()); + EXPECT_EQ(g_oracle->get_node(), oracle->get_node()); + EXPECT_EQ(g_oracle->get_node_vector(), oracle->get_node_vector()); + EXPECT_EQ(g_oracle->get_parameter_vector(), oracle->get_parameter_vector()); + EXPECT_EQ(g_oracle->get_result_vector(), oracle->get_result_vector()); } TEST(attributes, matmul_op) @@ -600,6 +448,43 @@ TEST(attributes, matmul_op) EXPECT_EQ(g_matmul->get_transpose_b(), matmul->get_transpose_b()); } +TEST(attributes, partial_shape) +{ + NodeBuilder builder; + AttributeVisitor& loader = builder.get_node_loader(); + AttributeVisitor& saver = builder.get_node_saver(); + + PartialShape dyn = PartialShape::dynamic(); + saver.on_attribute("dyn", dyn); + PartialShape g_dyn; + loader.on_attribute("dyn", g_dyn); + EXPECT_EQ(dyn, g_dyn); + + PartialShape scalar{}; + saver.on_attribute("scalar", scalar); + PartialShape g_scalar; + loader.on_attribute("scalar", g_scalar); + EXPECT_EQ(scalar, g_scalar); + + PartialShape dyn_vector{Dimension::dynamic()}; + saver.on_attribute("dyn_vector", dyn_vector); + PartialShape g_dyn_vector; + loader.on_attribute("dyn_vector", g_dyn_vector); + EXPECT_EQ(dyn_vector, g_dyn_vector); + + PartialShape stat_vector{7}; + saver.on_attribute("stat_vector", stat_vector); + PartialShape g_stat_vector; + loader.on_attribute("stat_vector", g_stat_vector); + EXPECT_EQ(stat_vector, g_stat_vector); + + PartialShape general{7, Dimension::dynamic(), 2, Dimension::dynamic(), 4}; + saver.on_attribute("general", general); + PartialShape g_general; + loader.on_attribute("general", g_general); + EXPECT_EQ(general, g_general); +} + TEST(attributes, max_pool_op) { FactoryRegistry::get().register_factory(); @@ -1275,7 +1160,7 @@ TEST(attributes, shuffle_channels_op) auto g_shuffle_channels = as_type_ptr(builder.create()); EXPECT_EQ(g_shuffle_channels->get_axis(), shuffle_channels->get_axis()); - EXPECT_EQ(g_shuffle_channels->get_groups(), shuffle_channels->get_groups()); + EXPECT_EQ(g_shuffle_channels->get_group(), shuffle_channels->get_group()); } TEST(attributes, softmax_op) diff --git a/ngraph/test/constant.cpp b/ngraph/test/constant.cpp index d46765c94b5..b11934ff342 100644 --- a/ngraph/test/constant.cpp +++ b/ngraph/test/constant.cpp @@ -1017,8 +1017,8 @@ TEST(constant, shared_data) Shape shape{100, 200}; auto c1 = make_shared(element::f16, shape, vector{123}); auto c2 = static_pointer_cast(c1->clone_with_new_inputs({})); - const float* p1 = c1->get_data_ptr(); - const float* p2 = c2->get_data_ptr(); + const int16_t* p1 = c1->get_data_ptr(); + const int16_t* p2 = c2->get_data_ptr(); EXPECT_EQ(p1, p2); } diff --git a/ngraph/test/models/onnx/flatten.prototxt b/ngraph/test/models/onnx/flatten.prototxt index b70b6caa119..53e7835dba9 100644 --- a/ngraph/test/models/onnx/flatten.prototxt +++ b/ngraph/test/models/onnx/flatten.prototxt @@ -21,20 +21,6 @@ graph { type { tensor_type { elem_type: 1 - shape { - dim { - dim_value: 1 - } - dim { - dim_value: 2 - } - dim { - dim_value: 2 - } - dim { - dim_value: 2 - } - } } } } @@ -43,17 +29,6 @@ graph { type { tensor_type { elem_type: 1 - shape { - dim { - dim_value: 1 - } - dim { - dim_value: 2 - } - dim { - dim_value: 4 - } - } } } } diff --git a/ngraph/test/onnx/onnx_import.in.cpp b/ngraph/test/onnx/onnx_import.in.cpp index b3f2bc00d23..84258db8c13 100644 --- a/ngraph/test/onnx/onnx_import.in.cpp +++ b/ngraph/test/onnx/onnx_import.in.cpp @@ -792,30 +792,6 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_axis_1) test_case.run(4); } -NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_axis_2) -{ - auto function = onnx_import::import_onnx_model( - file_util::path_join(SERIALIZED_ZOO, "onnx/softmax_axis_2.prototxt")); - - auto test_case = ngraph::test::NgraphTestCase(function, "${BACKEND_NAME}"); - test_case.add_input(SOFTMAX_INPUT); - - test_case.add_expected_output( - {0.80619486, 0.03075257, 0.1161086, 0.027393, 0.01955098, 0.07012682, 0.22670066, - 0.18689779, 0.4614171, 0.05485763, 0.04486172, 0.72286838, 0.10286818, 0.07356265, - 0.05583908, 0.01280724, 0.02448298, 0.08096658, 0.11509768, 0.76664552, - - 0.30399806, 0.1076406, 0.03371745, 0.0950595, 0.4595844, 0.13369873, 0.04866969, - 0.19944906, 0.06332151, 0.55486101, 0.39101105, 0.19217177, 0.27755913, 0.10521588, - 0.03404216, 0.01150354, 0.08279411, 0.03137732, 0.68902071, 0.18530432, - - 0.0402528, 0.31156222, 0.23747503, 0.1543129, 0.25639705, 0.10627912, 0.00436928, - 0.01439711, 0.70979614, 0.16515835, 0.06798343, 0.2957175, 0.17468555, 0.34994439, - 0.11166912, 0.03615172, 0.07108136, 0.08527994, 0.44775794, 0.35972905}); - - test_case.run(4); -} - NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_invalid_axis_1D) { ASSERT_THROW(onnx_import::import_onnx_model( @@ -1807,42 +1783,6 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_erf_int32) test_case.run(); } -NGRAPH_TEST(${BACKEND_NAME}, onnx_model_hardmax) -{ - auto hardmax_fn = onnx_import::import_onnx_model( - file_util::path_join(SERIALIZED_ZOO, "onnx/hardmax.prototxt")); - - auto test_case = ngraph::test::NgraphTestCase(hardmax_fn, "${BACKEND_NAME}"); - test_case.add_input( - {-2.02458119f, 0.00126542f, -0.58045743f, -0.75186814f, 0.9406899f, - -0.513188f, 0.85887463f, 1.61444086f, 0.23801147f, -0.26816885f, - 0.6597208f, 1.43889519f, 0.28798895f, 1.44769952f, -1.99466756f, - 0.41386644f, 0.69389555f, 1.46118255f, -1.67628606f, 1.49697552f, - - 0.06337166f, -1.15740783f, 0.8792142f, -0.95352717f, -1.87895792f, - -0.74066102f, -0.27131459f, 0.2219685f, 0.31831001f, 0.52495901f, - 0.60283089f, 0.60397976f, 0.92401468f, 0.29565101f, -1.14443776f, - -1.07399045f, -0.92266259f, 0.24017731f, -0.30105675f, 1.18513269f, - - 0.55494542f, 1.12119279f, -0.43156474f, 0.15101668f, -1.460439f, - 0.96375129f, 1.10411785f, -0.30272771f, -0.48855848f, 0.12103213f, - -0.71388492f, 1.38398178f, 0.21924434f, 0.93105052f, -0.21074303f, - 0.48213503f, -1.37810638f, 8.99060285f, 0.54794592f, -0.46820172f}); - - // values for hardmax with axis==2 - test_case.add_expected_output( - Shape{3, 4, 5}, {0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, - - 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, - 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, - - 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f}); - - test_case.run(); -} - NGRAPH_TEST(${BACKEND_NAME}, onnx_model_shrink_float) { const auto shrink_fn = onnx_import::import_onnx_model( diff --git a/ngraph/test/onnx/onnx_import_dyn_shapes.in.cpp b/ngraph/test/onnx/onnx_import_dyn_shapes.in.cpp index c584ae71891..bb2a3bb339e 100644 --- a/ngraph/test/onnx/onnx_import_dyn_shapes.in.cpp +++ b/ngraph/test/onnx/onnx_import_dyn_shapes.in.cpp @@ -816,6 +816,20 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_flatten_neg_axis) } } +NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_model_flatten) +{ + auto function = onnx_import::import_onnx_model( + file_util::path_join(SERIALIZED_ZOO, "onnx/flatten.prototxt")); + + std::vector data{1, 2, 3, 4, 5, 6, 7, 8}; + auto test_case = + ngraph::test::NgraphTestCase(function, "${BACKEND_NAME}", BackendMode::DYNAMIC); + test_case.add_input(Shape{1, 2, 2, 2}, data); + test_case.add_expected_output(Shape{1, 8}, data); + + test_case.run(); +} + NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_model_global_lp_dynamic_hw) { auto function = onnx_import::import_onnx_model( @@ -1058,3 +1072,78 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_default_axes) test_case.add_input({2, 2, 2}); test_case.add_expected_output(Shape{1, 1, 1}, {9}); } + +NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_model_hardmax) +{ + auto hardmax_fn = onnx_import::import_onnx_model( + file_util::path_join(SERIALIZED_ZOO, "onnx/hardmax.prototxt")); + + auto test_case = + ngraph::test::NgraphTestCase(hardmax_fn, "${BACKEND_NAME}", BackendMode::DYNAMIC); + test_case.add_input( + {-2.02458119f, 0.00126542f, -0.58045743f, -0.75186814f, 0.9406899f, + -0.513188f, 0.85887463f, 1.61444086f, 0.23801147f, -0.26816885f, + 0.6597208f, 1.43889519f, 0.28798895f, 1.44769952f, -1.99466756f, + 0.41386644f, 0.69389555f, 1.46118255f, -1.67628606f, 1.49697552f, + + 0.06337166f, -1.15740783f, 0.8792142f, -0.95352717f, -1.87895792f, + -0.74066102f, -0.27131459f, 0.2219685f, 0.31831001f, 0.52495901f, + 0.60283089f, 0.60397976f, 0.92401468f, 0.29565101f, -1.14443776f, + -1.07399045f, -0.92266259f, 0.24017731f, -0.30105675f, 1.18513269f, + + 0.55494542f, 1.12119279f, -0.43156474f, 0.15101668f, -1.460439f, + 0.96375129f, 1.10411785f, -0.30272771f, -0.48855848f, 0.12103213f, + -0.71388492f, 1.38398178f, 0.21924434f, 0.93105052f, -0.21074303f, + 0.48213503f, -1.37810638f, 8.99060285f, 0.54794592f, -0.46820172f}); + + // values for hardmax with axis==2 + test_case.add_expected_output( + Shape{3, 4, 5}, {0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, + + 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, + 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, + + 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f}); + + test_case.run(); +} + +NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_model_softmax_axis_2) +{ + auto function = onnx_import::import_onnx_model( + file_util::path_join(SERIALIZED_ZOO, "onnx/softmax_axis_2.prototxt")); + + const std::vector input = { + 2.75793882, -0.50841322, 0.82013929, -0.62409912, -0.96136118, 0.21004745, 1.38337255, + 1.19030397, 2.0940445, -0.03551657, -0.78686039, 1.992782, 0.04300319, -0.29230777, + -0.56797112, -1.26732165, -0.61935399, 0.57670432, 0.92844898, 2.82469233, + + 0.98721677, -0.05100663, -1.21178917, -0.17530157, 1.40051805, -0.13259761, -1.14313018, + 0.2673723, -0.87996154, 1.29053106, 1.55, 0.8396538, 1.20729817, 0.23727845, + -0.89113606, -1.70909842, 0.26460363, -0.70566808, 2.383518, 1.07024615, + + -1.21722605, 0.82919357, 0.55765697, 0.12657686, 0.63432172, 0.75425957, -2.43721014, + -1.24478184, 2.65316853, 1.19509542, -0.95523998, 0.5149006, -0.01151649, 0.68327026, + -0.4589638, -0.46554745, 0.21055324, 0.39266729, 2.05098086, 1.83207919}; + + auto test_case = + ngraph::test::NgraphTestCase(function, "${BACKEND_NAME}", BackendMode::DYNAMIC); + test_case.add_input(input); + + test_case.add_expected_output( + {0.80619486, 0.03075257, 0.1161086, 0.027393, 0.01955098, 0.07012682, 0.22670066, + 0.18689779, 0.4614171, 0.05485763, 0.04486172, 0.72286838, 0.10286818, 0.07356265, + 0.05583908, 0.01280724, 0.02448298, 0.08096658, 0.11509768, 0.76664552, + + 0.30399806, 0.1076406, 0.03371745, 0.0950595, 0.4595844, 0.13369873, 0.04866969, + 0.19944906, 0.06332151, 0.55486101, 0.39101105, 0.19217177, 0.27755913, 0.10521588, + 0.03404216, 0.01150354, 0.08279411, 0.03137732, 0.68902071, 0.18530432, + + 0.0402528, 0.31156222, 0.23747503, 0.1543129, 0.25639705, 0.10627912, 0.00436928, + 0.01439711, 0.70979614, 0.16515835, 0.06798343, 0.2957175, 0.17468555, 0.34994439, + 0.11166912, 0.03615172, 0.07108136, 0.08527994, 0.44775794, 0.35972905}); + + test_case.run(4); +} diff --git a/ngraph/test/onnx/onnx_import_reshape.in.cpp b/ngraph/test/onnx/onnx_import_reshape.in.cpp index f641e15f179..d0aba31fe4f 100644 --- a/ngraph/test/onnx/onnx_import_reshape.in.cpp +++ b/ngraph/test/onnx/onnx_import_reshape.in.cpp @@ -424,20 +424,6 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_concat_negative_axis) test_case.run(); } -NGRAPH_TEST(${BACKEND_NAME}, onnx_model_flatten) -{ - auto function = onnx_import::import_onnx_model( - file_util::path_join(SERIALIZED_ZOO, "onnx/flatten.prototxt")); - - auto input = test::NDArray({{{{1, 2}, {3, 4}}, {{5, 6}, {7, 8}}}}).get_vector(); - auto expected_output = test::NDArray({{{1, 2, 3, 4}, {5, 6, 7, 8}}}).get_vector(); - - auto test_case = ngraph::test::NgraphTestCase(function, "${BACKEND_NAME}"); - test_case.add_input(input); - test_case.add_expected_output(expected_output); - test_case.run(); -} - NGRAPH_TEST(${BACKEND_NAME}, onnx_model_split_equal_parts_default) { auto function = onnx_import::import_onnx_model( diff --git a/ngraph/test/runtime/backend_manager.hpp b/ngraph/test/runtime/backend_manager.hpp index 7a41ef08515..a958c28f9e3 100644 --- a/ngraph/test/runtime/backend_manager.hpp +++ b/ngraph/test/runtime/backend_manager.hpp @@ -55,7 +55,7 @@ public: /// \param backend_constructor A BackendConstructor which will be called to //// construct an instance of the registered backend. static BACKEND_API void register_backend(const std::string& name, - BackendConstructor backend_constructor); + BackendConstructor backend_constructor); /// \brief Query the list of registered devices /// \returns A vector of all registered devices. diff --git a/ngraph/test/runtime/ie/unit_test.manifest b/ngraph/test/runtime/ie/unit_test.manifest index 8227b742542..a18f5b22449 100644 --- a/ngraph/test/runtime/ie/unit_test.manifest +++ b/ngraph/test/runtime/ie/unit_test.manifest @@ -131,6 +131,10 @@ expand_5_dyn_shape expand_6_dyn_shape expand_uint16_dyn_shape minimum_int64 +floor_int64 +ceiling_int64 +matmul_2x2x3_2x3x1_int64 +matmul_2x2x3_2x1x3_transpose_int64 # TopK Incorrect input data/index values precision onnx_model_argmax_int32 @@ -296,7 +300,7 @@ dyn_slice_106 dyn_slice_109 dyn_slice_114 reduce_sum_keep_dynamic -reduce_sum_keep_stable_simple_double +reduce_sum_keep_stable_simple_double reduce_sum_keep_stable_acc_double reduce_sum_keep_stable_acc reduce_sum_keep_3d_eliminate_zero_dim @@ -386,6 +390,9 @@ dyn_slice_180 tile_3d_small_data_rank tile_3d_few_repeats +# Error of validate layer: MatMul_683292 with type: Gemm. Gemm input shapes must have at least 2 dimensions +matmul_2_2 + # Result mismatch sum_large_1d_to_scalar sum_stable_acc @@ -420,6 +427,77 @@ max_to_scalar_int8 gather_4d_indices_no_axis_uint8 tensor_constant_with_op constant_equality_bool +reduce_product_matrix_rows +reduce_product_3d_to_matrix_most_sig +reduce_product_3d_to_matrix_least_sig +reduce_product_keep_matrix_columns +reduce_product_keep_matrix_rows +reduce_product_keep_3d_to_matrix_most_sig +reduce_product_keep_3d_to_matrix_least_sig +reduce_product_matrix_columns_dynamic +reduce_product_matrix_rows_dynamic +reduce_product_keep_matrix_columns_dynamic +reduce_product_keep_matrix_rows_dynamic +reduce_min_matrix_columns +reduce_min_matrix_rows +reduce_min_matrix_rows_int32 +reduce_min_3d_to_matrix_most_sig +reduce_min_3d_to_matrix_least_sig +reduce_min_keep_matrix_columns +reduce_min_keep_matrix_rows +reduce_min_keep_matrix_rows_int32 +reduce_min_keep_3d_to_matrix_most_sig +reduce_min_keep_3d_to_matrix_least_sig +reduce_min_matrix_columns_dynamic +reduce_min_matrix_rows_dynamic +reduce_min_keep_matrix_columns_dynamic +reduce_min_keep_matrix_rows_dynamic + +# zero dimension / result mismatch +reduce_product_matrix_rows_zero +reduce_product_matrix_cols_zero +reduce_product_vector_zero +reduce_product_matrix_to_scalar_zero_by_zero +reduce_product_3d_eliminate_zero_dim +reduce_product_to_scalar_int8 +reduce_product_keep_matrix_rows_zero +reduce_product_keep_matrix_cols_zero +reduce_product_keep_vector_zero +reduce_product_keep_matrix_to_scalar_zero_by_zero +reduce_product_keep_3d_eliminate_zero_dim +reduce_product_keep_to_scalar_int8 +reduce_min_to_scalar_int8 +reduce_min_matrix_rows_zero +reduce_min_matrix_cols_zero +reduce_min_vector_zero +reduce_min_matrix_to_scalar_zero_by_zero +reduce_min_3d_eliminate_zero_dim +reduce_min_keep_to_scalar_int8 +reduce_min_keep_matrix_rows_zero +reduce_min_keep_matrix_cols_zero +reduce_min_keep_vector_zero +reduce_min_keep_matrix_to_scalar_zero_by_zero +reduce_min_keep_3d_eliminate_zero_dim +reduce_mean_to_scalar_int8 +reduce_mean_matrix_rows_int32 +reduce_mean_keep_to_scalar_int8 +reduce_mean_keep_matrix_rows_int32 +reduce_max_to_scalar_int8 +reduce_max_matrix_rows_zero +reduce_max_matrix_rows_zero_int32 +reduce_max_matrix_cols_zero +reduce_max_vector_zero +reduce_max_matrix_to_scalar_zero_by_zero +reduce_max_3d_to_scalar_double +reduce_max_3d_eliminate_zero_dim +reduce_max_keep_to_scalar_int8 +reduce_max_keep_matrix_rows_zero +reduce_max_keep_matrix_rows_zero_int32 +reduce_max_keep_matrix_cols_zero +reduce_max_keep_vector_zero +reduce_max_keep_matrix_to_scalar_zero_by_zero +reduce_max_keep_3d_to_scalar_double +reduce_max_keep_3d_eliminate_zero_dim # Incorrect precision f64! sum_trivial_in_double @@ -1194,7 +1272,7 @@ shape_of_matrix_v3 shape_of_5d_v0 shape_of_5d_v3 -# Need use evaluate, only applicable to INTERPRETER +# Need use evaluate, only applicable to INTERPRETER non_zero non_zero_all_1s non_zero_all_0s @@ -1239,6 +1317,33 @@ IE_CPU.backwards_log # Unsupported op detected IE_CPU.backwards_batchmatmultranspose_tensor2_tensor2 IE_CPU.fuse_batch_mat_mul_transpose_forward +IE_CPU.round_int64 + +# Can't convert type f16 to IE Precision! +IE_CPU.fused_clamp_float16 + +# [NOT_IMPLEMENTED] Input image format BF16 is not supported yet... +IE_CPU.fused_clamp_bfloat16 + +# Operations were removed from opset +IE_CPU.atanh +IE_CPU.asinh +IE_CPU.acosh + +# Dynamic backend wrapper stops being used for IE +IE_CPU.onnx_dyn_shapes_model_acosh_1_3 +IE_CPU.onnx_dyn_shapes_model_acosh_3_2 +IE_CPU.onnx_dyn_shapes_model_asinh_1_3 +IE_CPU.onnx_dyn_shapes_model_asinh_3_2 +IE_CPU.onnx_dyn_shapes_model_atanh_1_3 +IE_CPU.onnx_dyn_shapes_model_atanh_3_2 +IE_CPU.onnx_dyn_shapes_avg_pool_dyn_shape +IE_CPU.onnx_dyn_shapes_max_pool_dyn_shape +IE_CPU.onnx_dyn_shapes_global_avg_pool_dyn_shape +IE_CPU.onnx_dyn_shapes_global_max_pool_dyn_shape +IE_CPU.onnx_dyn_shapes_model_flatten +IE_CPU.onnx_dyn_shapes_slice_10_default_axes +IE_CPU.fused_clamp_float #------------------------------------------------------------------------------- # diff --git a/ngraph/test/runtime/interpreter/CMakeLists.txt b/ngraph/test/runtime/interpreter/CMakeLists.txt index 38459b16516..ee3c0c8b431 100644 --- a/ngraph/test/runtime/interpreter/CMakeLists.txt +++ b/ngraph/test/runtime/interpreter/CMakeLists.txt @@ -28,5 +28,6 @@ if (NGRAPH_INTERPRETER_ENABLE) install(TARGETS interpreter_backend LIBRARY DESTINATION "${NGRAPH_INSTALL_LIB}" ARCHIVE DESTINATION "${NGRAPH_INSTALL_LIB}" + RUNTIME DESTINATION "${NGRAPH_INSTALL_LIB}" ) endif() diff --git a/ngraph/test/runtime/interpreter/unit_test.manifest b/ngraph/test/runtime/interpreter/unit_test.manifest index b55cfcb1d25..90855f7b58c 100644 --- a/ngraph/test/runtime/interpreter/unit_test.manifest +++ b/ngraph/test/runtime/interpreter/unit_test.manifest @@ -9,7 +9,3 @@ INTERPRETER.onnx_top_k_opset_10 reduce_sum_large_1d_to_scalar reduce_sum_keep_large_1d_to_scalar - -#ONNX Flatten with dynamic reshape -onnx_dyn_shapes_flatten_axis -onnx_dyn_shapes_flatten_neg_axis diff --git a/ngraph/test/serialize.cpp b/ngraph/test/serialize.cpp index 4e6d25ee3b5..d5e2dc8133c 100644 --- a/ngraph/test/serialize.cpp +++ b/ngraph/test/serialize.cpp @@ -34,6 +34,7 @@ #include "nlohmann/json.hpp" #include "util/all_close_f.hpp" #include "util/test_tools.hpp" +#include "util/visitor.hpp" using namespace std; using namespace ngraph; @@ -498,6 +499,33 @@ TEST(serialize, tensor_iterator_raw) auto f = make_shared(results, ParameterVector{X, Hinit, WH, WX, bH, WY, bY}); string s = serialize(f); shared_ptr g = deserialize(s); + + ngraph::test::NodeBuilder builder; + // Uncomment to see serialization + // builder.set_print(true); + builder.save_node(tensor_iterator); + auto g_tensor_iterator = as_type_ptr(builder.create()); + ASSERT_TRUE(g_tensor_iterator); + auto& inputs = tensor_iterator->get_input_descriptions(); + auto& g_inputs = g_tensor_iterator->get_input_descriptions(); + ASSERT_EQ(inputs.size(), g_inputs.size()); + for (size_t i = 0; i < tensor_iterator->get_input_descriptions().size(); ++i) + { + auto& val = inputs[i]; + auto& g_val = g_inputs[i]; + ASSERT_EQ(val->get_type_info(), g_val->get_type_info()); + ASSERT_EQ(val->m_input_index, g_val->m_input_index); + ASSERT_EQ(val->m_body_parameter_index, g_val->m_body_parameter_index); + } + auto& outputs = tensor_iterator->get_output_descriptions(); + auto& g_outputs = g_tensor_iterator->get_output_descriptions(); + ASSERT_EQ(outputs.size(), g_outputs.size()); + for (size_t i = 0; i < tensor_iterator->get_output_descriptions().size(); ++i) + { + auto& val = outputs[i]; + auto& g_val = g_outputs[i]; + ASSERT_EQ(val->get_type_info(), g_val->get_type_info()); + } } TEST(serialize, tensor_iterator_lstm) diff --git a/ngraph/test/type_prop/broadcast.cpp b/ngraph/test/type_prop/broadcast.cpp index 48aa6899e16..b4102f87d8f 100644 --- a/ngraph/test/type_prop/broadcast.cpp +++ b/ngraph/test/type_prop/broadcast.cpp @@ -699,3 +699,34 @@ TEST(type_prop, broadcast_v3_incorrect_target_shape_2) FAIL() << "Deduced type check failed for unexpected reason"; } } + +TEST(type_prop, broadcast_v3_output_rank_not_deduced) +{ + const auto arg = make_shared(element::f32, PartialShape::dynamic()); + const auto shape = make_shared(element::i64, PartialShape::dynamic(1)); + const auto broadcast_spec = op::BroadcastType::BIDIRECTIONAL; + + const auto broadcast_v3 = make_shared(arg, shape, broadcast_spec); + + ASSERT_TRUE(broadcast_v3->get_output_partial_shape(0).same_scheme(PartialShape::dynamic())); +} + +TEST(type_prop, broadcast_v3_output_rank_deduced_from_arg) +{ + const auto arg = make_shared(element::f32, PartialShape::dynamic(4)); + const auto shape = op::Constant::create(element::i64, {3}, {8, 6, 4}); + const auto broadcast_spec = op::BroadcastType::BIDIRECTIONAL; + + const auto broadcast_v3 = make_shared(arg, shape, broadcast_spec); + ASSERT_TRUE(broadcast_v3->get_output_partial_shape(0).same_scheme(PartialShape::dynamic(4))); +} + +TEST(type_prop, broadcast_v3_output_rank_deduced_from_new_shape_input) +{ + const auto arg = make_shared(element::f32, PartialShape::dynamic(4)); + const auto shape = op::Constant::create(element::i64, {5}, {8, 6, 1, 5, 1}); + const auto broadcast_spec = op::BroadcastType::BIDIRECTIONAL; + + const auto broadcast_v3 = make_shared(arg, shape, broadcast_spec); + ASSERT_TRUE(broadcast_v3->get_output_partial_shape(0).same_scheme(PartialShape::dynamic(5))); +} diff --git a/ngraph/test/util/CMakeLists.txt b/ngraph/test/util/CMakeLists.txt index 74ad10a6e58..32d7321ed0c 100644 --- a/ngraph/test/util/CMakeLists.txt +++ b/ngraph/test/util/CMakeLists.txt @@ -25,6 +25,7 @@ set (SRC test_tools.cpp test_control.cpp test_case.cpp + visitor.hpp provenance_enabler.hpp ) diff --git a/ngraph/test/util/test_case.cpp b/ngraph/test/util/test_case.cpp index 11d91a060e1..372fa353fa3 100644 --- a/ngraph/test/util/test_case.cpp +++ b/ngraph/test/util/test_case.cpp @@ -23,18 +23,23 @@ ngraph::test::NgraphTestCase::NgraphTestCase(const std::shared_ptr& fu const std::string& backend_name, const BackendMode mode) : m_function(function) - , m_backend(ngraph::runtime::Backend::create(backend_name, mode == BackendMode::DYNAMIC)) { if (mode == BackendMode::STATIC) { NGRAPH_CHECK(!m_function->is_dynamic(), "For dynamic function using dynamic backend is expected."); } + + // IE backend test should not be run with dynamic backend wrapper + const bool use_dynamic = + mode == BackendMode::DYNAMIC && backend_name.find("IE") == std::string::npos; + + m_backend = ngraph::runtime::Backend::create(backend_name, use_dynamic); m_executable = m_backend->compile(m_function); for (auto i = 0; i < m_function->get_output_size(); ++i) { const auto& output_tensor = - (mode == BackendMode::DYNAMIC) + (use_dynamic) ? m_backend->create_dynamic_tensor(m_function->get_output_element_type(i), m_function->get_output_partial_shape(i)) : m_backend->create_tensor(m_function->get_output_element_type(i), diff --git a/ngraph/test/util/visitor.hpp b/ngraph/test/util/visitor.hpp new file mode 100644 index 00000000000..f9a01cd07c6 --- /dev/null +++ b/ngraph/test/util/visitor.hpp @@ -0,0 +1,381 @@ +//***************************************************************************** +// Copyright 2017-2020 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** + +#pragma once + +#include +#include +#include + +#include "ngraph/attribute_visitor.hpp" +#include "ngraph/factory.hpp" +#include "ngraph/runtime/host_tensor.hpp" + +namespace ngraph +{ + namespace test + { + class ValueHolder + { + template + T& invalid() + { + NGRAPH_CHECK(false, "Invalid type access"); + } + + public: + virtual ~ValueHolder() {} + virtual operator bool&() { NGRAPH_CHECK(false, "Invalid type access"); } + virtual operator float&() { NGRAPH_CHECK(false, "Invalid type access"); } + virtual operator double&() { NGRAPH_CHECK(false, "Invalid type access"); } + virtual operator std::string&() { NGRAPH_CHECK(false, "Invalid type access"); } + virtual operator int8_t&() { NGRAPH_CHECK(false, "Invalid type access"); } + virtual operator int16_t&() { NGRAPH_CHECK(false, "Invalid type access"); } + virtual operator int32_t&() { NGRAPH_CHECK(false, "Invalid type access"); } + virtual operator int64_t&() { NGRAPH_CHECK(false, "Invalid type access"); } + virtual operator uint8_t&() { NGRAPH_CHECK(false, "Invalid type access"); } + virtual operator uint16_t&() { NGRAPH_CHECK(false, "Invalid type access"); } + virtual operator uint32_t&() { NGRAPH_CHECK(false, "Invalid type access"); } + virtual operator uint64_t&() { NGRAPH_CHECK(false, "Invalid type access"); } + virtual operator std::vector&() + { + NGRAPH_CHECK(false, "Invalid type access"); + } + virtual operator std::vector&() { NGRAPH_CHECK(false, "Invalid type access"); } + virtual operator std::vector&() { NGRAPH_CHECK(false, "Invalid type access"); } + virtual operator std::vector&() { NGRAPH_CHECK(false, "Invalid type access"); } + virtual operator std::vector&() { NGRAPH_CHECK(false, "Invalid type access"); } + virtual operator std::vector&() { NGRAPH_CHECK(false, "Invalid type access"); } + virtual operator std::vector&() { NGRAPH_CHECK(false, "Invalid type access"); } + virtual operator std::vector&() { NGRAPH_CHECK(false, "Invalid type access"); } + virtual operator std::vector&() + { + NGRAPH_CHECK(false, "Invalid type access"); + } + virtual operator std::vector&() + { + NGRAPH_CHECK(false, "Invalid type access"); + } + virtual operator std::vector&() + { + NGRAPH_CHECK(false, "Invalid type access"); + } + virtual operator HostTensorPtr&() { NGRAPH_CHECK(false, "Invalid type access"); } + uint64_t get_index() { return m_index; } + protected: + uint64_t m_index{0}; + }; + + template + class ValueHolderImp : public ValueHolder + { + public: + ValueHolderImp(const T& value, uint64_t index) + : m_value(value) + { + m_index = index; + } + operator T&() override { return m_value; } + protected: + T m_value; + }; + + class ValueMap + { + using map_type = std::unordered_map>; + + public: + /// \brief Set to print serialization information + void set_print(bool value) { m_print = value; } + template + void insert(const std::string& name, const T& value) + { + std::pair result = m_values.insert(map_type::value_type( + name, std::make_shared>(value, m_write_count++))); + NGRAPH_CHECK(result.second, name, " is already in use"); + } + template + void insert_scalar(const std::string& name, const T& value) + { + std::pair result = m_values.insert(map_type::value_type( + name, std::make_shared>(value, m_write_count++))); + NGRAPH_CHECK(result.second, name, " is already in use"); + if (m_print) + { + std::cerr << "SER: " << name << " = " << value << std::endl; + } + } + template + void insert_vector(const std::string& name, const T& value) + { + std::pair result = m_values.insert(map_type::value_type( + name, std::make_shared>(value, m_write_count++))); + NGRAPH_CHECK(result.second, name, " is already in use"); + if (m_print) + { + std::cerr << "SER: " << name << " = ["; + std::string comma = ""; + for (auto val : value) + { + std::cerr << comma << val; + comma = ", "; + } + std::cerr << "]" << std::endl; + } + } + template + T& get(const std::string& name) + { + auto& value_holder = *m_values.at(name); + NGRAPH_CHECK(m_read_count++ == value_holder.get_index()); + return static_cast(*m_values.at(name)); + } + + protected: + map_type m_values; + uint64_t m_write_count{0}; + uint64_t m_read_count{0}; + bool m_print{false}; + }; + + class DeserializeAttributeVisitor : public AttributeVisitor + { + public: + DeserializeAttributeVisitor(ValueMap& value_map) + : m_values(value_map) + { + } + void on_adapter(const std::string& name, ValueAccessor& adapter) override + { + NGRAPH_CHECK(false, "Attribute \"", name, "\" cannot be unmarshalled"); + } + // The remaining adapter methods fall back on the void adapter if not implemented + void on_adapter(const std::string& name, ValueAccessor& adapter) override + { + adapter.set(m_values.get(name)); + }; + void on_adapter(const std::string& name, ValueAccessor& adapter) override + { + adapter.set(m_values.get(name)); + }; + void on_adapter(const std::string& name, ValueAccessor& adapter) override + { + adapter.set(m_values.get(name)); + } + void on_adapter(const std::string& name, ValueAccessor& adapter) override + { + adapter.set(m_values.get(name)); + } + + void on_adapter(const std::string& name, + ValueAccessor>& adapter) override + { + adapter.set(m_values.get>(name)); + } + void on_adapter(const std::string& name, + ValueAccessor>& adapter) override + { + adapter.set(m_values.get>(name)); + } + void on_adapter(const std::string& name, + ValueAccessor>& adapter) override + { + adapter.set(m_values.get>(name)); + } + void on_adapter(const std::string& name, + ValueAccessor>& adapter) override + { + adapter.set(m_values.get>(name)); + } + void on_adapter(const std::string& name, + ValueAccessor>& adapter) override + { + adapter.set(m_values.get>(name)); + } + void on_adapter(const std::string& name, + ValueAccessor>& adapter) override + { + adapter.set(m_values.get>(name)); + } + void on_adapter(const std::string& name, + ValueAccessor>& adapter) override + { + adapter.set(m_values.get>(name)); + } + void on_adapter(const std::string& name, + ValueAccessor>& adapter) override + { + adapter.set(m_values.get>(name)); + } + void on_adapter(const std::string& name, + ValueAccessor>& adapter) override + { + adapter.set(m_values.get>(name)); + } + void on_adapter(const std::string& name, + ValueAccessor>& adapter) override + { + adapter.set(m_values.get>(name)); + } + void on_adapter(const std::string& name, + ValueAccessor>& adapter) override + { + adapter.set(m_values.get>(name)); + } + void on_adapter(const std::string& name, ValueAccessor& adapter) override + { + HostTensorPtr& data = m_values.get(name); + data->read(adapter.get_ptr(), adapter.size()); + } + + protected: + ValueMap& m_values; + }; + + class SerializeAttributeVisitor : public AttributeVisitor + { + public: + SerializeAttributeVisitor(ValueMap& value_map) + : m_values(value_map) + { + } + + void on_adapter(const std::string& name, ValueAccessor& adapter) override + { + NGRAPH_CHECK(false, "Attribute \"", name, "\" cannot be marshalled"); + } + // The remaining adapter methods fall back on the void adapter if not implemented + void on_adapter(const std::string& name, ValueAccessor& adapter) override + { + m_values.insert_scalar(name, adapter.get()); + }; + void on_adapter(const std::string& name, ValueAccessor& adapter) override + { + m_values.insert_scalar(name, adapter.get()); + }; + + void on_adapter(const std::string& name, ValueAccessor& adapter) override + { + m_values.insert_scalar(name, adapter.get()); + } + void on_adapter(const std::string& name, ValueAccessor& adapter) override + { + m_values.insert_scalar(name, adapter.get()); + } + void on_adapter(const std::string& name, + ValueAccessor>& adapter) override + { + m_values.insert_vector(name, adapter.get()); + } + void on_adapter(const std::string& name, + ValueAccessor>& adapter) override + { + m_values.insert_vector(name, adapter.get()); + } + void on_adapter(const std::string& name, + ValueAccessor>& adapter) override + { + m_values.insert_vector(name, adapter.get()); + } + void on_adapter(const std::string& name, + ValueAccessor>& adapter) override + { + m_values.insert_vector(name, adapter.get()); + } + void on_adapter(const std::string& name, + ValueAccessor>& adapter) override + { + m_values.insert_vector(name, adapter.get()); + } + void on_adapter(const std::string& name, + ValueAccessor>& adapter) override + { + m_values.insert_vector(name, adapter.get()); + } + void on_adapter(const std::string& name, + ValueAccessor>& adapter) override + { + m_values.insert_vector(name, adapter.get()); + } + void on_adapter(const std::string& name, + ValueAccessor>& adapter) override + { + m_values.insert_vector(name, adapter.get()); + } + void on_adapter(const std::string& name, + ValueAccessor>& adapter) override + { + m_values.insert_vector(name, adapter.get()); + } + void on_adapter(const std::string& name, + ValueAccessor>& adapter) override + { + m_values.insert_vector(name, adapter.get()); + } + void on_adapter(const std::string& name, + ValueAccessor>& adapter) override + { + m_values.insert_vector(name, adapter.get()); + } + void on_adapter(const std::string& name, ValueAccessor& adapter) override + { + HostTensorPtr data = + std::make_shared(element::u8, Shape{adapter.size()}); + data->write(adapter.get_ptr(), adapter.size()); + m_values.insert(name, data); + } + + protected: + ValueMap& m_values; + }; + + class NodeBuilder : public ValueMap, public DeserializeAttributeVisitor + { + public: + NodeBuilder() + : DeserializeAttributeVisitor(static_cast(*this)) + , m_serializer(*this) + { + } + + NodeBuilder(const std::shared_ptr& node) + : DeserializeAttributeVisitor(static_cast(*this)) + , m_serializer(*this) + { + save_node(node); + } + + void save_node(std::shared_ptr node) + { + m_node_type_info = node->get_type_info(); + node->visit_attributes(m_serializer); + } + + // Does not validate, since inputs aren't set + std::shared_ptr create() + { + std::shared_ptr node(FactoryRegistry::get().create(m_node_type_info)); + node->visit_attributes(*this); + return node; + } + AttributeVisitor& get_node_saver() { return m_serializer; } + AttributeVisitor& get_node_loader() { return *this; } + protected: + Node::type_info_t m_node_type_info; + SerializeAttributeVisitor m_serializer; + }; + } +} diff --git a/tests/stress_tests/.gitignore b/tests/stress_tests/.gitignore index 228d3af5afa..25131b99402 100644 --- a/tests/stress_tests/.gitignore +++ b/tests/stress_tests/.gitignore @@ -1,2 +1,2 @@ # Name of virtualenv created by stress_tests/scripts/get_testdata.py -.stress_venv +.stress_venv \ No newline at end of file diff --git a/tests/stress_tests/common/utils.cpp b/tests/stress_tests/common/utils.cpp index 56d5f717205..3d47da38c72 100644 --- a/tests/stress_tests/common/utils.cpp +++ b/tests/stress_tests/common/utils.cpp @@ -43,7 +43,7 @@ size_t getVmSizeInKB() { return pmc.WorkingSetSize; } #else -size_t getVirtualMemoryInKB(char *name){ +size_t getSystemDataByName(char *name){ FILE* file = fopen("/proc/self/status", "r"); size_t result = 0; if (file != nullptr) { @@ -60,10 +60,11 @@ size_t getVirtualMemoryInKB(char *name){ return result; } -size_t getVmSizeInKB() {return getVirtualMemoryInKB((char*) "VmSize:");} -size_t getVmPeakInKB() {return getVirtualMemoryInKB((char*) "VmPeak:");} -size_t getVmRSSInKB() {return getVirtualMemoryInKB((char*) "VmRSS:");} -size_t getVmHWMInKB() {return getVirtualMemoryInKB((char*) "VmHWM:");} +size_t getVmSizeInKB() {return getSystemDataByName((char*) "VmSize:");} +size_t getVmPeakInKB() {return getSystemDataByName((char*) "VmPeak:");} +size_t getVmRSSInKB() {return getSystemDataByName((char*) "VmRSS:");} +size_t getVmHWMInKB() {return getSystemDataByName((char*) "VmHWM:");} +size_t getThreadsNum() {return getSystemDataByName((char*) "Threads:");} #endif diff --git a/tests/stress_tests/common/utils.h b/tests/stress_tests/common/utils.h index 5eb9448c539..759f8d8213f 100644 --- a/tests/stress_tests/common/utils.h +++ b/tests/stress_tests/common/utils.h @@ -37,6 +37,7 @@ size_t getVmSizeInKB(); size_t getVmPeakInKB(); size_t getVmRSSInKB(); size_t getVmHWMInKB(); +size_t getThreadsNum(); template int run_in_processes(const int &numprocesses, Function const &function, Args ... args) { diff --git a/tests/stress_tests/memleaks_tests/tests_pipelines/tests_pipelines.cpp b/tests/stress_tests/memleaks_tests/tests_pipelines/tests_pipelines.cpp index 55671f95463..44182113b5e 100644 --- a/tests/stress_tests/memleaks_tests/tests_pipelines/tests_pipelines.cpp +++ b/tests/stress_tests/memleaks_tests/tests_pipelines/tests_pipelines.cpp @@ -28,7 +28,7 @@ using namespace InferenceEngine; #define THRESHOLD 0.1 // Measure values -enum MeasureValue { VMRSS = 0, VMHWM, VMSIZE, VMPEAK, MeasureValueMax }; +enum MeasureValue { VMRSS = 0, VMHWM, VMSIZE, VMPEAK, THREADS, MeasureValueMax }; namespace util { template @@ -58,16 +58,17 @@ TestResult common_test_pipeline(const std::function& test_pipeline, cons past.resize(std::min(n / 2, MAX_AVERAGE)); log_info("Warming up for " << WARMUP_STEPS << " iterations"); - log_info("i\tVMRSS\tVMHWM\tVMSIZE\tVMPEAK"); + log_info("i\tVMRSS\tVMHWM\tVMSIZE\tVMPEAK\tTHREADS"); int measure_count = n; for (size_t iteration = 0; measure_count > 0; iteration++) { // Warm up to take reference values test_pipeline(); getVmValues(cur[VMSIZE], cur[VMPEAK], cur[VMRSS], cur[VMHWM]); + cur[THREADS] = getThreadsNum(); past[iteration % past.size()] = cur; progress_str = std::to_string(iteration + 1) + "\t" + std::to_string(cur[VMRSS]) + "\t" + std::to_string(cur[VMHWM]) + "\t" + std::to_string(cur[VMSIZE]) + "\t" + - std::to_string(cur[VMPEAK]); + std::to_string(cur[VMPEAK]) + "\t" + std::to_string(cur[THREADS]); // measure if (iteration >= WARMUP_STEPS) { diff --git a/tests/stress_tests/scripts/memcheck-template/base.html b/tests/stress_tests/scripts/memcheck-template/base.html new file mode 100644 index 00000000000..f34958e832c --- /dev/null +++ b/tests/stress_tests/scripts/memcheck-template/base.html @@ -0,0 +1,15 @@ + + + + {% block head %} + {% block title %}{% endblock %} + {% endblock %} + + +
{% block content %}{% endblock %}
+ + + diff --git a/tests/stress_tests/scripts/memcheck-template/timeline_report.html b/tests/stress_tests/scripts/memcheck-template/timeline_report.html new file mode 100644 index 00000000000..e5674021119 --- /dev/null +++ b/tests/stress_tests/scripts/memcheck-template/timeline_report.html @@ -0,0 +1,128 @@ +{% extends "base.html" %} +{% block title %}Memcheck report{% endblock %} +{% block head %} + {{ super() }} + + + + +{% endblock %} +{% block content %} +
+

Memcheck report

+ {% for timeline in timelines %} + + {% endfor %} +
+ +{% endblock %} diff --git a/tests/stress_tests/scripts/memcheck_upload.py b/tests/stress_tests/scripts/memcheck_upload.py index 891ded10473..d23bc4d5277 100644 --- a/tests/stress_tests/scripts/memcheck_upload.py +++ b/tests/stress_tests/scripts/memcheck_upload.py @@ -17,6 +17,7 @@ import os import re import sys import argparse +from inspect import getsourcefile from glob import glob import xml.etree.ElementTree as ET import hashlib @@ -36,14 +37,11 @@ PRECISSIONS = ('FP32', 'FP16', 'INT8') KEY_FIELDS = ('test_name', 'model', 'device', 'build_url') -def globber(paths): - """Generator extending paths with wildcards""" - for path in paths: - if any(magic in path for magic in ['*', '?', '!', '[', ']']): - for resolved in glob(path, recursive=True): - yield resolved - else: - yield path +def abs_path(relative_path): + """Return absolute path given path relative to the current file. + """ + return os.path.realpath( + os.path.join(os.path.dirname(getsourcefile(lambda: 0)), relative_path)) def parse_memcheck_log(log_path): @@ -118,6 +116,79 @@ def upload_memcheck_records(records, db_url, db_collection): collection.replace_one({'_id': record['_id']}, record, upsert=True) +def _transpose_dicts(items, template=None): + """ Build dictionary of arrays from array of dictionaries + Example: + > in = [{'a':1, 'b':3}, {'a':2}] + > _transpose_dicts(in, template=in[0]) + {'a':[1,2], 'b':[3, None]} + """ + result = {} + if not items: + return result + if not template: + template = items[0] + for key, template_val in template.items(): + if isinstance(template_val, dict): + result[key] = _transpose_dicts( + [item[key] for item in items if key in item], template_val) + else: + result[key] = [item.get(key, None) for item in items] + return result + + +TIMELINE_SIMILARITY = ('test_name', 'model', 'device', 'target_branch') + + +def query_timeline(records, db_url, db_collection, max_items=20, similarity=TIMELINE_SIMILARITY): + """ Query database for similar memcheck items committed previously + """ + client = MongoClient(db_url) + collection = client[DATABASE][db_collection] + result = [] + for record in records: + query = dict((key, record[key]) for key in similarity) + query['commit_date'] = {'$lt': record['commit_date']} + pipeline = [ + {'$match': query}, + {'$addFields': {'commit_date': {'$dateFromString': {'dateString': '$commit_date'}}}}, + {'$sort': {'commit_date': -1}}, + {'$limit': max_items}, + {'$sort': {'commit_date': 1}}, + ] + items = list(collection.aggregate(pipeline)) + [record] + timeline = _transpose_dicts(items, template=record) + result += [timeline] + return result + + +def create_memcheck_report(records, db_url, db_collection, output_path): + """ Create memcheck timeline HTML report for records. + """ + if db_collection == 'pre_commit': + db_collection = 'commit' # pre-commit jobs building report from past commits + records.sort( + key=lambda item: f"{item['status']}{item['device']}{item['model']}{item['test_name']}") + timelines = query_timeline(records, db_url, db_collection) + import jinja2 # pylint: disable=import-outside-toplevel + env = jinja2.Environment( + loader=jinja2.FileSystemLoader( + searchpath=os.path.join(abs_path('.'), 'memcheck-template')), + autoescape=False) + template = env.get_template('timeline_report.html') + template.stream(records=records, timelines=timelines).dump(output_path) + + +def globber(paths): + """Generator extending paths with wildcards""" + for path in paths: + if any(magic in path for magic in ['*', '?', '!', '[', ']']): + for resolved in glob(path, recursive=True): + yield resolved + else: + yield path + + def main(): """Main entry point. """ diff --git a/tests/stress_tests/scripts/requirements.txt b/tests/stress_tests/scripts/requirements.txt index 8c7d698b2b7..79e8313d653 100644 --- a/tests/stress_tests/scripts/requirements.txt +++ b/tests/stress_tests/scripts/requirements.txt @@ -1 +1,2 @@ -pymongo \ No newline at end of file +pymongo +Jinja2 \ No newline at end of file