diff --git a/.ci/azure/linux.yml b/.ci/azure/linux.yml index f45f4e410c6..146775f6189 100644 --- a/.ci/azure/linux.yml +++ b/.ci/azure/linux.yml @@ -112,6 +112,7 @@ jobs: -DNGRAPH_ONNX_IMPORT_ENABLE=ON -DNGRAPH_ONNX_EDITOR_ENABLE=ON -DENABLE_FASTER_BUILD=ON + -DENABLE_STRICT_DEPENDENCIES=OFF -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)/modules $(REPO_DIR) workingDirectory: $(BUILD_DIR) diff --git a/.ci/azure/mac.yml b/.ci/azure/mac.yml index 680ef281ac2..04d4c16ea23 100644 --- a/.ci/azure/mac.yml +++ b/.ci/azure/mac.yml @@ -90,7 +90,7 @@ jobs: # Disable errors with Ninja export CXXFLAGS="-Wno-error=unused-command-line-argument" export CFLAGS="-Wno-error=unused-command-line-argument" - cmake -GNinja -DVERBOSE_BUILD=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_PYTHON=ON -DENABLE_TESTS=ON -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)/modules $(REPO_DIR) + cmake -GNinja -DVERBOSE_BUILD=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_PYTHON=ON -DENABLE_TESTS=ON -DENABLE_STRICT_DEPENDENCIES=OFF -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)/modules $(REPO_DIR) workingDirectory: $(BUILD_DIR) displayName: 'CMake' diff --git a/.ci/azure/windows.yml b/.ci/azure/windows.yml index 6b4e5203dd0..21a36392e33 100644 --- a/.ci/azure/windows.yml +++ b/.ci/azure/windows.yml @@ -92,7 +92,7 @@ jobs: - script: | set PATH=$(WORK_DIR)\ninja-win;%PATH% - call "$(MSVS_VARS_PATH)" && cmake -GNinja -DENABLE_FASTER_BUILD=ON -DENABLE_TEMPLATE_PLUGIN=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_TESTS=ON -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)\modules -DCMAKE_C_COMPILER:PATH="$(MSVC_COMPILER_PATH)" -DCMAKE_CXX_COMPILER:PATH="$(MSVC_COMPILER_PATH)" $(REPO_DIR) + call "$(MSVS_VARS_PATH)" && cmake -GNinja -DENABLE_FASTER_BUILD=ON -DENABLE_TEMPLATE_PLUGIN=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_TESTS=ON -DENABLE_STRICT_DEPENDENCIES=OFF -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)\modules -DCMAKE_C_COMPILER:PATH="$(MSVC_COMPILER_PATH)" -DCMAKE_CXX_COMPILER:PATH="$(MSVC_COMPILER_PATH)" $(REPO_DIR) workingDirectory: $(BUILD_DIR) displayName: 'CMake' diff --git a/cmake/features.cmake b/cmake/features.cmake index 7518c99c868..aff805adb15 100644 --- a/cmake/features.cmake +++ b/cmake/features.cmake @@ -6,7 +6,7 @@ ie_dependent_option (ENABLE_MKL_DNN "MKL-DNN plugin for inference engine" ON "X8 ie_option (ENABLE_TESTS "unit, behavior and functional tests" OFF) -ie_option (ENABLE_STRICT_DEPENDENCIES "Skip configuring \"convinient\" dependencies for efficient parallel builds" OFF) +ie_option (ENABLE_STRICT_DEPENDENCIES "Skip configuring \"convinient\" dependencies for efficient parallel builds" ON) ie_dependent_option (ENABLE_CLDNN "clDnn based plugin for inference engine" ON "X86_64;NOT APPLE;NOT MINGW;NOT WINDOWS_STORE;NOT WINDOWS_PHONE" OFF) diff --git a/docs/IE_DG/Intro_to_Performance.md b/docs/IE_DG/Intro_to_Performance.md index 94d0173dbbe..0c9457ed4bf 100644 --- a/docs/IE_DG/Intro_to_Performance.md +++ b/docs/IE_DG/Intro_to_Performance.md @@ -22,7 +22,8 @@ $ benchmark_app -m -enforcebf16=false Notice that for quantized (e.g. INT8) models the bfloat16 calculations (of the layers that remain in FP32) is disabled by default. Refer to the [CPU Plugin documentation](supported_plugins/CPU.md) for more details. -Similarly, the GPU device has a dedicated config key to enable FP16 execution of the layers that remain in FP32 in the quantized models (as the quantization is typically performed on the FP32 models), refer to the ENABLE_FP16_FOR_QUANTIZED_MODELS key in the [GPU Plugin documentation](supported_plugins/GPU.md) +Similarly, the GPU device automatically executes FP16 for the layers that remain in FP16 in the quantized models (assuming that the FP16 model was quantized). +Refer to the ENABLE_FP16_FOR_QUANTIZED_MODELS key in the [GPU Plugin documentation](supported_plugins/GPU.md). ## Latency vs. Throughput One way to increase computational efficiency is batching, which combines many (potentially tens) of diff --git a/inference-engine/include/ie_version.hpp b/inference-engine/include/ie_version.hpp index 13215d0b68d..10e649a09d3 100644 --- a/inference-engine/include/ie_version.hpp +++ b/inference-engine/include/ie_version.hpp @@ -20,8 +20,8 @@ * @brief Defines Inference Engine patch version */ -#define IE_VERSION_MAJOR 2021 -#define IE_VERSION_MINOR 4 +#define IE_VERSION_MAJOR 2022 +#define IE_VERSION_MINOR 1 #define IE_VERSION_PATCH 0 #include "ie_api.h" diff --git a/inference-engine/src/gna_plugin/backend/gna_limitations.hpp b/inference-engine/src/gna_plugin/backend/gna_limitations.hpp index 9b0eccaea59..3a283cae895 100644 --- a/inference-engine/src/gna_plugin/backend/gna_limitations.hpp +++ b/inference-engine/src/gna_plugin/backend/gna_limitations.hpp @@ -10,13 +10,18 @@ namespace GNAPluginNS { namespace GNALimitations { +constexpr uint32_t bufferMaxSize = 65528; + constexpr uint32_t convMinFiltersNum = 4; constexpr uint32_t convMaxFiltersNum = 65532; constexpr uint32_t convFiltersNumDivider = 4; +constexpr uint32_t convFilterMaxSize = 768; constexpr uint32_t convEachKernelByteAlignment = 16; constexpr uint32_t noOfInputsDivisor = 8; constexpr uint32_t noOfInputsLowPrecDivisor = 16; +constexpr uint32_t affineMaxBatchSize = 8; + namespace Cnn2D { struct RangeLimit { uint32_t min; diff --git a/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp b/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp index 3c1fdaac0e7..11f13a7a9ac 100644 --- a/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp +++ b/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp @@ -370,14 +370,8 @@ class ScaleFactorPerLayer { auto minOutValue = quantizedParams->_dst_quant.GetMinValues().front(); auto maxOutValue = quantizedParams->_dst_quant.GetMaxValues().front(); auto absMax = std::max(std::abs(minOutValue), std::abs(maxOutValue)); - auto absMin = std::min(std::abs(minOutValue), std::abs(maxOutValue)); result = (quantizedParams->_dst_quant.GetLevels() - 1) / (maxOutValue - minOutValue); - if (0 && fp32eq(absMin, 0.0f) && !fp32eq(absMax, 0.0f)) { - result = (quantizedParams->_dst_quant.GetLevels() - 1) / (2 * absMax); - } - // - //result = MAX_VAL_2B_FEAT / absMax; if (std::isinf(result) || fp32eq(absMax, 0.0f)) { result = max_activation_scale_factor; } @@ -401,6 +395,7 @@ class ScaleFactorPerLayer { (layer.isIdentity() || layer.isFakeQuantize()) && LayerInfo(prevLayer).isWeightableIdentity()) { auto prevLayerQuant = InferenceEngine::getInjectedData(*prevLayer); if (!fp32eq(prevLayerQuant->_src_quant.GetScale(), 1.0f) && + prevLayerQuant->_src_quant.IsStatsSet() && (prevLayer2 == nullptr || LayerInfo(prevLayer2).has8BOr16BOutput())) { result = prevLayerQuant->_src_quant.GetScale(); usePrevScaleFactor = true; diff --git a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp index 23685b4734f..bf44e437af0 100644 --- a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp +++ b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp @@ -158,25 +158,27 @@ void GNAGraphCompiler::fillSplitConnections(InferenceEngine::CNNLayerPtr layer) THROW_GNA_LAYER_EXCEPTION(layer) << " outData["<< i << "]" << " connected by " << j <<" connection doesnt connect to functional layer"; } - auto dataOutput = outFunctionalLayer.first->insData[outFunctionalLayer.second].lock(); + for (int idx : outFunctionalLayer.second) { + auto dataOutput = outFunctionalLayer.first->insData[idx].lock(); - padding = std::max(padding, LayerInfo(outFunctionalLayer.first).paddingSize()) - * dataOutput->getPrecision().size(); - output_layer_size = - InferenceEngine::details::product(begin(dataOutput->getDims()), - end(dataOutput->getDims())) * dataOutput->getPrecision().size(); + padding = std::max(padding, LayerInfo(outFunctionalLayer.first).paddingSize()) + * dataOutput->getPrecision().size(); + output_layer_size = + InferenceEngine::details::product(begin(dataOutput->getDims()), + end(dataOutput->getDims())) * dataOutput->getPrecision().size(); - if (LayerInfo(outFunctionalLayer.first).isAffineFilter()) { - size_t aligned64_offset = outFunctionalLayer.first->GetParamAsInt("offset"); - layerInfoItem.splitOutputLayers.emplace_back( - outFunctionalLayer.first, - outFunctionalLayer.second, - aligned64_offset * dataOutput->getPrecision().size(), - output_layer_size); - } else { - layerInfoItem.splitOutputLayers.emplace_back( - outFunctionalLayer.first, outFunctionalLayer.second, split_size, output_layer_size); - } + if (LayerInfo(outFunctionalLayer.first).isAffineFilter()) { + size_t aligned64_offset = outFunctionalLayer.first->GetParamAsInt("offset"); + layerInfoItem.splitOutputLayers.emplace_back( + outFunctionalLayer.first, + idx, + aligned64_offset * dataOutput->getPrecision().size(), + output_layer_size); + } else { + layerInfoItem.splitOutputLayers.emplace_back( + outFunctionalLayer.first, idx, split_size, output_layer_size); + } + } } // in case of unconnected split - we need properly increment size diff --git a/inference-engine/src/gna_plugin/gna_graph_tools.hpp b/inference-engine/src/gna_plugin/gna_graph_tools.hpp index e9cf70790ac..51701268209 100644 --- a/inference-engine/src/gna_plugin/gna_graph_tools.hpp +++ b/inference-engine/src/gna_plugin/gna_graph_tools.hpp @@ -155,14 +155,14 @@ inline InferenceEngine::CNNLayerPtr CNNNetPrevLayerSkipCertain(Layer layer, int */ template -inline std::pair CNNNetCheckNextLayerSkipCertain(Layer layer, int oidx, int iidx, bool bOnlyCheck, +inline std::pair> CNNNetCheckNextLayerSkipCertain(Layer layer, int oidx, int iidx, bool bOnlyCheck, const std::function &shouldSkip) { if (oidx >= layer->outData.size()) { - if (bOnlyCheck) return {nullptr, 0}; + if (bOnlyCheck) return {nullptr, {}}; THROW_GNA_LAYER_EXCEPTION(layer) << " no next output layer for outdata: " << oidx; } if (getInputTo(layer->outData[oidx]).empty() || iidx >= getInputTo(layer->outData[oidx]).size()) { - if (bOnlyCheck) return {nullptr, 0}; + if (bOnlyCheck) return {nullptr, {}}; THROW_GNA_LAYER_EXCEPTION(layer) << " no next output layer for outdata: " << oidx << " and inputTo index: " << iidx; } @@ -174,12 +174,12 @@ inline std::pair CNNNetCheckNextLayerSkipCer while (shouldSkip(outLayer->second)) { if (outLayer->second->outData.size() <= new_oidx) { - if (bOnlyCheck) return { nullptr, 0 }; + if (bOnlyCheck) return { nullptr, {} }; THROW_GNA_LAYER_EXCEPTION(outLayer->second) << " no next output layer for outdata: " << new_oidx; } if (getInputTo(outLayer->second->outData[new_oidx]).size() <= new_iidx) { - if (bOnlyCheck) return { nullptr, 0 }; + if (bOnlyCheck) return { nullptr, {} }; THROW_GNA_LAYER_EXCEPTION(outLayer->second) << " no next output layer for outdata: " << new_oidx << " and inputTo index: " << new_iidx; } @@ -188,11 +188,7 @@ inline std::pair CNNNetCheckNextLayerSkipCer } auto insDataIdx = CNNLayerFindInsDataIdxes(layer->outData[new_oidx], outLayer->second); - if (insDataIdx.size() != 1) { - if (bOnlyCheck) return { nullptr, 0 }; - THROW_GNA_LAYER_EXCEPTION(layer) << " has multiple connection to " << new_oidx << " outData"; - } - return { outLayer->second, insDataIdx.front() }; + return { outLayer->second, insDataIdx }; } /** @@ -256,7 +252,7 @@ inline std::pair CNNNetCheckNextLayerSkipCer /// @brief alias for strict checkNextLayer (false) template -inline std::pair CNNNetGetNextLayerSkipCertain(Layer layer, int oidx, int iidx, +inline std::pair> CNNNetGetNextLayerSkipCertain(Layer layer, int oidx, int iidx, const std::function &shouldSkip) { return CNNNetCheckNextLayerSkipCertain(layer, oidx, iidx, false, shouldSkip); } diff --git a/inference-engine/src/gna_plugin/gna_groups.hpp b/inference-engine/src/gna_plugin/gna_groups.hpp index 21abe5d0124..2449338821c 100644 --- a/inference-engine/src/gna_plugin/gna_groups.hpp +++ b/inference-engine/src/gna_plugin/gna_groups.hpp @@ -46,14 +46,10 @@ inline InferenceEngine::DataPtr Get2DReshapedData(InferenceEngine::DataPtr input * @param layer */ inline bool HasTo2DReshapeData(InferenceEngine::CNNLayerPtr layer) { - if (GNAPluginNS::LayerInfo(layer).isPower()) + if (GNAPluginNS::LayerInfo(layer).isPower() || GNAPluginNS::LayerInfo(layer).isCopy()) return true; - if (!GNAPluginNS::LayerInfo(layer).isScaleShift()) - return false; - - // Don't reshape user-defined ScaleShift layers - if (layer->name.rfind("SyntheticScaleShift", 0) == std::string::npos) + if (!GNAPluginNS::LayerInfo(layer).isSyntheticScaleShift()) return false; // Don't reshape the first dnn layer since it breaks groups recognition @@ -61,8 +57,7 @@ inline bool HasTo2DReshapeData(InferenceEngine::CNNLayerPtr layer) { return LayerInfo(ptr).isNonValuesChangable(); }); IE_ASSERT(prevLayer != nullptr); - if (LayerInfo(prevLayer).isInput()) - return false; + if (LayerInfo(prevLayer).isInput()) return false; // Don't reshape diagonallayers with bias connection return !GNAPluginNS::LayerInfo(getCreatorLayer(layer->insData.front().lock()).lock()).has32BOutput(); diff --git a/inference-engine/src/gna_plugin/gna_model_serial.cpp b/inference-engine/src/gna_plugin/gna_model_serial.cpp index fdb99d7f273..e32ded8a9e3 100644 --- a/inference-engine/src/gna_plugin/gna_model_serial.cpp +++ b/inference-engine/src/gna_plugin/gna_model_serial.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #endif @@ -133,10 +134,11 @@ GNAPluginNS::HeaderLatest::ModelHeader GNAModelSerial::ReadHeader(std::istream & } case 5: case 6: + case 7: readNBytes(&header, sizeof(HeaderLatest::ModelHeader), is); break; default: - THROW_GNA_EXCEPTION << "Imported file unsupported. minor version should have values in range 1 to 4 and is: " << header.version.minor; + THROW_GNA_EXCEPTION << "Imported file unsupported. minor version should have values in range 1 to 7 and is: " << header.version.minor; } break; default: @@ -154,6 +156,40 @@ GNAPluginNS::HeaderLatest::ModelHeader GNAModelSerial::ReadHeader(std::istream & return header; } +GNAPluginNS::HeaderLatest::RuntimeEndPoint GNAModelSerial::ReadEndPoint(std::istream &is) { + is.exceptions(std::istream::failbit); + + HeaderLatest::RuntimeEndPoint endPoint; + switch (modelHeader.version.major) { + case 2: + switch (modelHeader.version.minor) { + case 1: + case 2: + case 3: + case 4: + case 5: + case 6: + { + Header2dot6::RuntimeEndPoint tempEndPoint2dot6; + readBits(tempEndPoint2dot6, is); + endPoint = HeaderLatest::RuntimeEndPoint(tempEndPoint2dot6, modelHeader.nGroup); + break; + } + case 7: + readNBytes(&endPoint, sizeof(HeaderLatest::RuntimeEndPoint), is); + break; + default: + THROW_GNA_EXCEPTION << "Imported file unsupported. minor version should have values in range 1 to 7 and is: " << modelHeader.version.minor; + } + break; + default: + THROW_GNA_EXCEPTION << "Imported file unsupported. Import for files with major version equal to: " + << modelHeader.version.major << " is not implemented"; + } + + return endPoint; +} + #define offsetFromBase(field)\ getOffsetFromBase(field, #field) @@ -324,18 +360,6 @@ void GNAModelSerial::Import(void *basePointer, is.read(reinterpret_cast(basePointer), gnaGraphSize); } - -uint32_t guessGrouping(Gna2Model const& model) { - if (model.NumberOfOperations == 0 || - model.Operations == nullptr || - model.Operations[0].Operands == nullptr || - model.Operations[0].NumberOfOperands == 0 || - model.Operations[0].Operands[0]->Shape.NumberOfDimensions < 2) { - THROW_GNA_EXCEPTION << "Can not guess grouping"; - } - return (std::min)(model.Operations[0].Operands[0]->Shape.Dimensions[0], model.Operations[0].Operands[0]->Shape.Dimensions[1]); -} - void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostream & os) const { os.exceptions(std::ostream::failbit); @@ -366,6 +390,9 @@ void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostrea out.descriptor_offset = offsetFromBase(ep.descriptor_ptr); out.scaleFactor = ep.scaleFactor; out.element_size = ep.element_size; + out.shape = ep.shape; + out.layout = ep.layout; + out.precision = ep.precision; out.orientation = ep.orientation; return out; }; @@ -381,7 +408,7 @@ void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostrea header.headerSize = sizeof(HeaderLatest::ModelHeader); header.gnaMemSize = gnaGraphSize; header.layersCount = layers.size(); - header.nGroup = guessGrouping(*gna2Model); + header.nGroup = 1; // just to support the old models header.nInputs = inputs.size(); header.nOutputs = outputs.size(); header.nTransposeInputs = transposeInputsInfo.size(); @@ -796,13 +823,22 @@ std::vector GNAModelSerial::serializeOutputs(cons std::size_t outputIndex = 0; for (auto const &output : outputsDataMap) { auto outputName = output.first; - auto inputDims = output.second->getTensorDesc().getDims(); - uint32_t elementsCount = static_cast(InferenceEngine::details::product(inputDims.begin(), inputDims.end())); - + auto outputDims = output.second->getTensorDesc().getDims(); + HeaderLatest::RuntimeEndPoint::Shape outputShape; + outputShape.NumberOfDimensions = outputDims.size(); + for (size_t i=0; i < outputShape.NumberOfDimensions; ++i) { + outputShape.Dimensions[i] = static_cast(outputDims[i]); + } + uint32_t elementsCount = static_cast(InferenceEngine::details::product(outputDims.begin(), outputDims.end())); + InferenceEngine::Layout outputLayout = output.second->getLayout(); + InferenceEngine::Precision::ePrecision outputPrecision = InferenceEngine::Precision::FP32; HeaderLatest::RuntimeEndPoint endPoint(outputsDesc[outputIndex].scale_factor, outputsDesc[outputIndex].ptrs[0], outputsDesc[outputIndex].num_bytes_per_element, elementsCount, + outputShape, + outputLayout, + outputPrecision, outputsDesc[outputIndex].orientation); endPoints.push_back(endPoint); outputIndex++; @@ -818,18 +854,26 @@ std::vector GNAModelSerial::serializeInputs(const for (auto const& input : inputsDataMap) { auto inputName = input.first; auto inputDims = input.second->getTensorDesc().getDims(); - + HeaderLatest::RuntimeEndPoint::Shape inputShape; + inputShape.NumberOfDimensions = inputDims.size(); + for (size_t i=0; i < inputShape.NumberOfDimensions; ++i) { + inputShape.Dimensions[i] = static_cast(inputDims[i]); + } double scaleFactor = inputDesc->getScaleFactor(inputIndex); std::vector descriptor_ptr = inputDesc->getPtrInputsGlobal(inputName); IE_ASSERT(descriptor_ptr.size() > 0); uint32_t element_size = 2u; uint32_t elementsCount = static_cast(InferenceEngine::details::product(inputDims.begin(), inputDims.end())); intel_dnn_orientation_t orientation = inputDesc->getOrientation(inputName); - + InferenceEngine::Layout inputLayout = input.second->getLayout(); + InferenceEngine::Precision::ePrecision inputPrecision = InferenceEngine::Precision::FP32; HeaderLatest::RuntimeEndPoint endPoint(scaleFactor, descriptor_ptr[0], element_size, elementsCount, + inputShape, + inputLayout, + inputPrecision, orientation); endPoints.push_back(endPoint); inputIndex++; @@ -846,20 +890,24 @@ void GNAModelSerial::ImportInputs(std::istream &is, for (uint32_t inputIndex = 0; inputIndex < modelHeader.nInputs; inputIndex++) { const std::string& name = (modelHeader.version.major == 2 && modelHeader.version.minor >= 3) ? inputNames.at(inputIndex) : std::string("input" + std::to_string(inputIndex)); - HeaderLatest::RuntimeEndPoint input; - is.read(reinterpret_cast(&input), sizeof(input)); + + HeaderLatest::RuntimeEndPoint input = ReadEndPoint(is); inputsDesc->getPtrInputsGlobal(name).push_back(reinterpret_cast(reinterpret_cast (basePtr) + input.descriptor_offset)); inputsDesc->orientation_in[name] = input.orientation; inputsDesc->bytes_allocated_for_input[name] = input.element_size * input.elements_count; - auto inputDims = InferenceEngine::SizeVector({modelHeader.nGroup, input.elements_count / modelHeader.nGroup}); - + auto inputDims = InferenceEngine::SizeVector(); + for (auto i = 0; i < input.shape.NumberOfDimensions; ++i) { + inputDims.push_back(input.shape.Dimensions[i]); + } + InferenceEngine::Layout inputLayout = static_cast(input.layout); + InferenceEngine::Precision inputPresicion = InferenceEngine::Precision(static_cast(input.precision)); dataMap[name] = std::make_shared(); dataMap[name]->setInputData(std::make_shared(name, InferenceEngine::TensorDesc( - InferenceEngine::Precision::FP32, + inputPresicion, inputDims, - InferenceEngine::Layout::NC))); + inputLayout))); inputsDesc->inputScaleFactors.push_back(input.scaleFactor); } } @@ -875,8 +923,8 @@ void GNAModelSerial::ImportOutputs(std::istream &is, for (uint32_t outputIndex = 0; outputIndex < modelHeader.nOutputs; outputIndex++) { const std::string& name = (modelHeader.version.major == 2 && modelHeader.version.minor >= 3) ? outputNames.at(outputIndex) : std::string("output" + std::to_string(outputIndex)); - HeaderLatest::RuntimeEndPoint output; - is.read(reinterpret_cast(&output), sizeof(output)); + + HeaderLatest::RuntimeEndPoint output = ReadEndPoint(is); OutputDesc description; description.ptrs.push_back(reinterpret_cast(reinterpret_cast (basePtr) + output.descriptor_offset)); description.orientation = kDnnInterleavedOrientation; @@ -884,12 +932,17 @@ void GNAModelSerial::ImportOutputs(std::istream &is, description.num_bytes_per_element = output.element_size; description.scale_factor = output.scaleFactor; - auto outputDims = InferenceEngine::SizeVector({modelHeader.nGroup, output.elements_count / modelHeader.nGroup}); + auto outputDims = InferenceEngine::SizeVector(); + for (auto i = 0; i < output.shape.NumberOfDimensions; ++i) { + outputDims.push_back(output.shape.Dimensions[i]); + } + InferenceEngine::Layout outputLayout = static_cast(output.layout); + InferenceEngine::Precision outputPresicion = InferenceEngine::Precision(static_cast(output.precision)); dataMap[name] = std::make_shared(name, InferenceEngine::TensorDesc( - InferenceEngine::Precision::FP32, + outputPresicion, outputDims, - InferenceEngine::Layout::NC)); + outputLayout)); desc.at(outputIndex) = description; } } diff --git a/inference-engine/src/gna_plugin/gna_model_serial.hpp b/inference-engine/src/gna_plugin/gna_model_serial.hpp index d756a23f9fc..f5310d826c4 100644 --- a/inference-engine/src/gna_plugin/gna_model_serial.hpp +++ b/inference-engine/src/gna_plugin/gna_model_serial.hpp @@ -138,6 +138,8 @@ private: */ static GNAPluginNS::HeaderLatest::ModelHeader ReadHeader(std::istream &is); + GNAPluginNS::HeaderLatest::RuntimeEndPoint ReadEndPoint(std::istream &is); + /** * @brief Import model from FS into preallocated buffer, * buffers for pLayers, and pStructs are allocated here and required manual deallocation using mm_free diff --git a/inference-engine/src/gna_plugin/gna_plugin.cpp b/inference-engine/src/gna_plugin/gna_plugin.cpp index e76eafa6d53..f49d543def1 100644 --- a/inference-engine/src/gna_plugin/gna_plugin.cpp +++ b/inference-engine/src/gna_plugin/gna_plugin.cpp @@ -54,12 +54,17 @@ #include #include #include +#include #include "transformations/remove_extra_reshapes.hpp" #include "transformations/insert_transpose_after_convolution_or_pooling.hpp" #include "transformations/insert_transpose_before_matmul.hpp" #include "transformations/reorder_activation_and_pooling.hpp" #include "transformations/swap_input_matmul_gna.hpp" +#include "transformations/convert_matmul_to_pointwise_convolution.hpp" +#include "transformations/split_convolution_with_large_buffer_size.hpp" + +#include #if GNA_LIB_VER == 2 #include @@ -667,6 +672,15 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) { // WA: ConvertPriorBox must be executed before the 1st ConstantFolding pass manager.register_pass(); manager.register_pass(); + // TODO enable this transformation for networks with convolutions + if (!ngraph::op::util::has_op_with_type(graph)) { + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + } + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); manager.register_pass(); manager.register_pass(); manager.register_pass(); @@ -735,6 +749,7 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) { passes->registerPass(); passes->registerPass(); + passes->registerPass(); passes->registerPass(); passes->registerPass(); passes->registerPass(); @@ -753,7 +768,6 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) { passes->registerPass(); passes->registerPass(); - passes->registerPass(); passes->registerPass(); passes->registerPass(); #if GNA_LIB_VER == 2 @@ -1465,7 +1479,11 @@ static InferenceEngine::Layout GetLayoutForDims(const InferenceEngine::SizeVecto Blob::Ptr GNAPlugin::GetOutputBlob(const std::string& name, InferenceEngine::Precision precision) { // need to have intermediate blob for interleave conversion InferenceEngine::Blob::Ptr outputBlob; - auto outputDims = outputsDataMap[name]->getTensorDesc().getDims(); + auto outputDataIt = outputsDataMap.find(name); + if (outputDataIt == std::end(outputsDataMap)) { + THROW_GNA_EXCEPTION << "Output " << name << " isn't found"; + } + auto outputDims = outputDataIt->second->getTensorDesc().getDims(); outputBlob = make_blob_with_precision(TensorDesc(precision, outputDims, GetLayoutForDims(outputDims))); outputBlob->allocate(); return outputBlob; @@ -1475,7 +1493,11 @@ Blob::Ptr GNAPlugin::GetInputBlob(const std::string& name, InferenceEngine::Prec InferenceEngine::Blob::Ptr inputBlob; // need to have intermediate blob for interleave conversion // TODO: NCHW format support is experimental = c++ MO did insert reshape, while TF mo - not - auto inputDims = inputsDataMap[name]->getTensorDesc().getDims(); + auto inputDataIt = inputsDataMap.find(name); + if (inputDataIt == std::end(inputsDataMap)) { + THROW_GNA_EXCEPTION << "Input " << name << " isn't found"; + } + auto inputDims = inputDataIt->second->getTensorDesc().getDims(); inputBlob = make_blob_with_precision(TensorDesc(precision, inputDims, GetLayoutForDims(inputDims))); inputBlob->allocate(); return inputBlob; diff --git a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp index 4d3b71b9622..b8962cebd36 100644 --- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp +++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp @@ -86,7 +86,7 @@ static void insertDiagonalLayerBetween(InferenceEngine::CNNLayerPtr prevLayer, }); IE_ASSERT(inputLayer != nullptr); size_t weightsSize = (LayerInfo(prevLayer).has32BOutput() || LayerInfo(inputLayer).isInput()) ? - weightsSize = nextLayer->outData[0]->getDims().back() : + nextLayer->outData[0]->getDims().back() : Get2DReshapedData(nextLayer->outData[0], 8)->getDims()[1]; std::vector weightsValues(weightsSize, fillValue); IE_ASSERT(diagLayer != nullptr); @@ -314,6 +314,7 @@ void HandleMultipleActivationsForTheLayerPass::run() { LayerInfo info(inputTo.second); if (info.isActivation()) { + if (odata->getDims().empty()) continue; if (!activations.empty() && odata->getDims()[0] != 1) { THROW_GNA_EXCEPTION << "Unsupported batch size " << odata->getDims()[0] << " for diagonal layer insertion"; @@ -741,12 +742,17 @@ void RemovePermutationsNHWCToNCHWPass::run() { IE_ASSERT(!input_to.empty()); auto current_layer = input_to.begin()->second; setNHWCOrder(current_layer->input()); - while (current_layer != pattern_end) { - setNHWCOrder(current_layer->outData[0]); - input_to = getInputTo(current_layer->outData[0]); - IE_ASSERT(!input_to.empty()); - current_layer = input_to.begin()->second; - } + std::function propogateNHWCOrderRecursive = + [pattern_end, &propogateNHWCOrderRecursive, &setNHWCOrder](CNNLayerPtr current_layer) { + if (current_layer == pattern_end) return; + for (size_t i = 0; i < current_layer->outData.size(); ++i) { + setNHWCOrder(current_layer->outData[i]); + auto input_to = getInputTo(current_layer->outData[i]); + IE_ASSERT(!input_to.empty()); + propogateNHWCOrderRecursive(input_to.begin()->second); + } + }; + propogateNHWCOrderRecursive(current_layer); if (LayerInfo(pattern_start).isPermute() && !getInputTo(pattern_start->outData.front()).empty()) { auto layer_before_permute = CNNNetPrevLayer(pattern_start); @@ -1447,21 +1453,19 @@ void EltwiseSplitOverChannelsPass::run() { THROW_GNA_LAYER_EXCEPTION(l) << "number of outputs expected to be 1"; } auto oData = l->outData.front(); + auto out_width = GetDataDimSize(oData, DataDimName::W); auto totalElementsForOutput = details::product(oData->getDims().begin(), oData->getDims().end()); auto maxAffineElements = getPassManager()->getPolicy().GNAAffineDiagonalPolicy.limitedTo; if (totalElementsForOutput <= maxAffineElements) { continue; } - // TODO: for now lets put split of 2 elements as restrictions auto totalSplits = 1 + totalElementsForOutput / maxAffineElements; - if (totalSplits > 2) { - THROW_GNA_LAYER_EXCEPTION(l) << "split layer over output channels on more than 2 layers unsupported"; - } pass_trace() << "transforming " << LAYER_NAME(l) << " by splitting it to multiple eltwise operations\n"; auto quantized = InferenceEngine::getInjectedData(l); + bool sameInputs = l->insData[0].lock() == l->insData[1].lock(); std::vector splitLayers(2); for (size_t kThEltwiseInput = 0; kThEltwiseInput != 2; kThEltwiseInput++) { // create split layer @@ -1472,31 +1476,38 @@ void EltwiseSplitOverChannelsPass::run() { split->insData.push_back(l->insData[kThEltwiseInput]); auto inputDesc = l->insData[kThEltwiseInput].lock()->getTensorDesc(); - // need to split this desc - if (inputDesc.getLayout() != Layout::NC) { - THROW_GNA_LAYER_EXCEPTION(l) - << "cannot split over channel: input " << std::to_string(kThEltwiseInput) - << " layout need to be NC"; - } // create split layer outputs - for (size_t i = 0;; i++) { - auto elements_num = std::min(totalElementsForOutput - i * maxAffineElements, + size_t usedElements = 0; + for (size_t i = 0; i < totalSplits; i++) { + SizeVector newDims; + size_t elements_num = std::min(totalElementsForOutput - usedElements, static_cast(maxAffineElements)); + if (inputDesc.getDims().size() == 2) { + newDims = SizeVector{1, elements_num}; + } else { + elements_num = elements_num - elements_num % out_width; + newDims = SizeVector{1, elements_num / out_width, out_width}; + } - SizeVector newDims = {1, elements_num}; auto newDesc = TensorDesc(inputDesc.getPrecision(), newDims, inputDesc.getLayout()); auto data = std::make_shared(l->name + "/" + std::to_string(kThEltwiseInput) + "/1", newDesc); getCreatorLayer(data) = split; split->outData.push_back(data); - if (elements_num != maxAffineElements) { + usedElements += elements_num; + if (usedElements == totalElementsForOutput) { break; } } // replacing connection X->eltwise to X->split auto oData = CNNLayerFindOutData(l, kThEltwiseInput); oData.second->second = split; + + if (sameInputs) { + splitLayers[1] = splitLayers[0]; + break; + } } // create concatlayer @@ -1507,8 +1518,6 @@ void EltwiseSplitOverChannelsPass::run() { concat->outData.push_back(masterEltwise->outData.front()); getCreatorLayer(masterEltwise->outData.front()) = concat; - - // create new eltwise layers - here 2 hardcode for (size_t k = 0; k != totalSplits; k++) { auto eltwiseRaw = std::make_shared( LayerParams{l->name + "/eltwise/" + std::to_string(k), "Eltwise", Precision::FP32}); @@ -1517,7 +1526,6 @@ void EltwiseSplitOverChannelsPass::run() { eltwiseRaw->coeff = masterEltwise->coeff; auto eltwise = quantized ? InferenceEngine::injectData(eltwiseRaw) : eltwiseRaw; - eltwise->insData.push_back(splitLayers[0]->outData[k]); eltwise->insData.push_back(splitLayers[1]->outData[k]); getInputTo(splitLayers[0]->outData[k])[eltwise->name] = eltwise; @@ -1529,6 +1537,15 @@ void EltwiseSplitOverChannelsPass::run() { auto data = std::make_shared(l->name + "/elwise/out/" + std::to_string(k), newDesc); getCreatorLayer(data) = eltwise; eltwise->outData.push_back(data); + if (quantized) { + auto eltwiseQuant = InferenceEngine::getInjectedData(eltwise); + if (quantized->_src_quant.IsStatsSet()) { + eltwiseQuant->_src_quant.CopyStats(quantized->_src_quant); + } + if (quantized->_dst_quant.IsStatsSet()) { + eltwiseQuant->_dst_quant.CopyStats(quantized->_dst_quant); + } + } getInputTo(data)[concat->name] = concat; concat->insData.push_back(data); } @@ -1919,13 +1936,20 @@ void FuseFQIntoWeightsPass::run() { } GNAFakeQuantizeLayer gnaFakeQuantizeLayer(fqLayer); - size_t layers_connected_to_fq_count = getInputTo(fqLayer->outData[0]).size(); + auto inputTo = getInputTo(fqLayer->outData[0]); + size_t layers_connected_to_fq_count = inputTo.size(); + auto layerBeforeWeightable = fqLayer; + while (layers_connected_to_fq_count == 1 && LayerInfo(inputTo.begin()->second).isNonFunctional()) { + layerBeforeWeightable = inputTo.begin()->second; + inputTo = getInputTo(layerBeforeWeightable->outData[0]); + layers_connected_to_fq_count = inputTo.size(); + } for (int index = 0; index < layers_connected_to_fq_count; index++) { - auto weightableLayer = CNNNetGetNextLayerSkipCertain(fqLayer, 0, index, isNonFunctional).first; + auto weightableLayer = CNNNetGetNextLayerSkipCertain(layerBeforeWeightable, 0, index, isNonFunctional).first; if (!LayerInfo(weightableLayer).isWeightable()) { continue; } - if (weightableLayer->insData.size() != 3) { + if (weightableLayer->insData.size() < 2) { continue; } @@ -1942,7 +1966,8 @@ void FuseFQIntoWeightsPass::run() { pass_trace() << "found " << LAYER_NAME(fqLayer) << " that will be converted to weights of " << LAYER_NAME(weightableLayer) << "\n"; - auto biases = LayerUtils::getParamFromInputAsBlob(weightableLayer, biasesIdx); + auto biases = weightableLayer->insData.size() == 3 ? + LayerUtils::getParamFromInputAsBlob(weightableLayer, biasesIdx) : nullptr; auto quantizedWeights = gnaFakeQuantizeLayer.getConstInputData(); // 1. broke existing connections - by detaching fq subgraph from rest of graph @@ -2149,8 +2174,11 @@ void MoveFakeQuantizeLayerIntoQuantParamsPass :: run() { } GNAFakeQuantizeLayer fqLayer(l); auto prevLayer = CNNNetPrevLayerSkipCertain(*fqLayer, 0, donotSkip); - if (prevLayer->outData.size() != 1) { - THROW_GNA_LAYER_EXCEPTION(prevLayer) << " fake quantize input that connected to something else not supported"; + auto prevDataIt = std::find_if(std::begin(prevLayer->outData), std::end(prevLayer->outData), [l](DataPtr data) { + return getInputTo(data).find(l->name) != std::end(getInputTo(data)); + }); + if (prevDataIt == std::end(prevLayer->outData)) { + THROW_GNA_LAYER_EXCEPTION(fqLayer) << "Invalid connection between " << prevLayer->name << " and " << l->name; } auto inputRange = fqLayer.getInputRange(); @@ -2181,8 +2209,18 @@ void MoveFakeQuantizeLayerIntoQuantParamsPass :: run() { quantParamsPrevLayer->_dst_quant.SetMinValues({ outputRange.first[0] }, false); quantParamsPrevLayer->_dst_quant.SetMaxValues({ outputRange.second[0] }, false); + // Propogate destination statistics to multiply layer if it's set for the next sum/sub layer (is considered as bias) + if (LayerInfo(prevLayer).isEltwiseSum() || LayerInfo(prevLayer).isEltwiseSub()) { + auto eltwPrevLayer = CNNNetPrevLayerSkipCertain(prevLayer, 0, donotSkip); + auto constLayer = CNNNetPrevLayerSkipCertain(prevLayer, 1, donotSkip); + if (LayerInfo(eltwPrevLayer).isEltwise() && LayerInfo(constLayer).isConst()) { + auto quantParamsEltwLayer = InferenceEngine::getInjectedData(eltwPrevLayer); + quantParamsEltwLayer->_dst_quant.CopyStats(quantParamsPrevLayer->_dst_quant); + } + } + auto fqQauntParams = InferenceEngine::getInjectedData(l); - fqQauntParams->_dst_quant.SetLevels(fqLevels); + fqQauntParams->_dst_quant.SetLevels(UINT16_MAX); fqQauntParams->_dst_quant.SetMinValues({ inputRange.first[0] }, true); fqQauntParams->_dst_quant.SetMaxValues({ inputRange.second[0] }, true); fqQauntParams->_dst_quant.SetMinValues({ outputRange.first[0] }, false); @@ -2198,7 +2236,7 @@ void MoveFakeQuantizeLayerIntoQuantParamsPass :: run() { // FQ Layer is fused only when previous layer is const, memory or activation layer // or a next layer is activation layer. bool isFQFuseAllowed = allowFQFuse(l); - auto prevData = prevLayer->outData.front(); + auto prevData = *prevDataIt; // Find all output layers connected to FQ auto nextLayers = CNNNetGetAllNextLayersSkipCertain(*fqLayer, -1, donotSkip); @@ -2207,7 +2245,7 @@ void MoveFakeQuantizeLayerIntoQuantParamsPass :: run() { } if (isFQFuseAllowed) { - getInputTo(prevLayer->outData.front()).clear(); + getInputTo(prevData).clear(); } // Connect all next layers after FQ to the layer that is before FQ @@ -2222,7 +2260,7 @@ void MoveFakeQuantizeLayerIntoQuantParamsPass :: run() { for (int insDataIdx : insDatas) { nextLayers[i]->insData[insDataIdx] = prevData; } - getInputTo(prevLayer->outData.front())[nextLayers[i]->name] = nextLayers[i]; + getInputTo(prevData)[nextLayers[i]->name] = nextLayers[i]; } propagateStatistics(quantParamsPrevLayer, nextLayers[i]); diff --git a/inference-engine/src/gna_plugin/serial/headers/2dot7/gna_model_header.hpp b/inference-engine/src/gna_plugin/serial/headers/2dot7/gna_model_header.hpp new file mode 100644 index 00000000000..14badf3adcf --- /dev/null +++ b/inference-engine/src/gna_plugin/serial/headers/2dot7/gna_model_header.hpp @@ -0,0 +1,197 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include "backend/dnn_types.h" +#include "serial/headers/2dot4/gna_model_header.hpp" +#include "serial/headers/2dot6/gna_model_header.hpp" +#include "serial/headers/latest/gna_model_header.hpp" +#include "gna_data_types.hpp" + +#pragma pack(push, 1) + +namespace GNAPluginNS { +namespace Header2dot7 { + +/** + Maximal number of supported shape dimensions. + */ +#define GNA_SHAPE_MAXIMUM_NUMBER_OF_DIMENSIONS 8 + +/** + * @brief Header version 2.7 + */ +struct ModelHeader { + /** + *@brief MagicNumber – GNAM in ascii table, equals to hex 0x474e414d + */ + char gnam[4] = {}; + /** + * @brief if header size is not equal to sizeof ModelHeader - some reserved data append in the end of header + * usually it is an indicator of working with version of model different that is current export function produce + */ + uint32_t headerSize = 0u; + struct Version { + /** + * @details Version of format Major – unsigned int, ex: 0x0001 + * every change in the header or in the layers definition should be reflected in version change + * for backward compatibility new parsers can read old versions of model with certain restrictions + */ + uint16_t major = 2u; + /** + * @details Version of Format Minor – unsigned int, corresponding to build revision for example + * changes in minor version are not affected layout of model + */ + uint32_t minor = 7u; + } version; + /** + * @brief Memory required to be allocated using GNAAlloc() + */ + uint64_t gnaMemSize = 0ull; + /** + * @brief Number of GNA Layers + */ + uint64_t layersCount = 0ull; + /** + * @brief Grouping level + * This is depricted field and used for old models only (<=2.6) + */ + uint32_t nGroup = 0u; + + /** + * Convolution related setting - they are affecting input transformation + */ + uint32_t nRotateRows = 0u; + uint32_t nRotateColumns = 0u; + bool doRotateInput = false; + + uint32_t nInputs = 0u; + uint32_t nOutputs = 0u; + + /** + * Convolution related setting - they are affecting output transformation + */ + uint32_t nRotateOutputRows = 0u; + uint32_t nRotateOutputColumns = 0u; + bool doRotateOutput = false; + + uint32_t nTransposeInputs = 0u; + uint32_t nTransposeOutputs = 0u; + + /** + * Reserved Data might be here + */ + ModelHeader() = default; + ModelHeader(GNAPluginNS::Header2dot1::ModelHeader const &old) { + gnaMemSize = old.gnaMemSize; + layersCount = old.layersCount; + nGroup = old.nGroup; + nRotateRows = old.nRotateRows; + nRotateColumns = old.nRotateColumns; + nInputs = old.nInputs; + nOutputs = old.nOutputs; + version.minor = old.version.minor; + } + ModelHeader(GNAPluginNS::Header2dot4::ModelHeader const &old) { + gnaMemSize = old.gnaMemSize; + layersCount = old.layersCount; + nGroup = old.nGroup; + nRotateRows = old.nRotateRows; + nRotateColumns = old.nRotateColumns; + nInputs = old.nInputs; + nOutputs = old.nOutputs; + nRotateOutputRows = old.nRotateOutputRows; + nRotateOutputColumns = old.nRotateOutputColumns; + doRotateOutput = old.doRotateOutput; + version.minor = old.version.minor; + } +}; +#pragma pack(pop) + +/* + * In runtime endpoint mostly same as in serial version, except of descriptor field + */ +struct RuntimeEndPoint { + /** + * if scale factor is different then pased into infer , network might need to be requantized + */ + float scaleFactor = 0; + /** + * Pointer descriptor + */ + void* descriptor_ptr = nullptr; + /** + * Endpoint resolution in bytes. + */ + uint32_t element_size = 0; + /** + * Number of elements + */ + uint32_t elements_count = 0; + /** + * Offset in bytes of pointer descriptor + */ + uint64_t descriptor_offset = 0ull; + /** + Shape specifying dimension values. + */ + struct Shape { + /** + Number of dimensions or rank or order. + */ + uint32_t NumberOfDimensions = 0; + /** + array specifying value of each dimension. + Set all zeros for scalars. + */ + uint32_t Dimensions[GNA_SHAPE_MAXIMUM_NUMBER_OF_DIMENSIONS] = {0}; + } shape; + /** + * Blob layout + */ + uint8_t layout = InferenceEngine::Layout::NC; + /** + * Blob precision + */ + uint8_t precision = InferenceEngine::Precision::FP32; + + intel_dnn_orientation_t orientation = kDnnUnknownOrientation; + + RuntimeEndPoint() = default; + RuntimeEndPoint(const GNAPluginNS::Header2dot6::RuntimeEndPoint &old, uint32_t ngroup) { + scaleFactor = old.scaleFactor; + descriptor_ptr = old.descriptor_ptr; + element_size = old.element_size; + elements_count = old.elements_count; + orientation = old.orientation; + layout = InferenceEngine::Layout::NC; + precision = InferenceEngine::Precision::FP32; + descriptor_offset = old.descriptor_offset; + InferenceEngine::SizeVector dims = {ngroup, elements_count / ngroup}; + shape.NumberOfDimensions = static_cast(dims.size()); + for (auto i = 0; i < dims.size(); i++) { + shape.Dimensions[i] = dims[i]; + } + } + RuntimeEndPoint(double scaleFactor, + void* descriptor_ptr, + uint32_t element_size, + uint32_t elements_count, + Shape shape, + uint8_t layout, + uint8_t precision, + intel_dnn_orientation_t orientation) : scaleFactor(scaleFactor), + descriptor_ptr(descriptor_ptr), + element_size(element_size), + elements_count(elements_count), + shape(shape), + layout(layout), + precision(precision), + orientation(orientation) { } +}; +} // namespace Header2dot7 +} // namespace GNAPluginNS diff --git a/inference-engine/src/gna_plugin/serial/headers/latest/gna_model_header.hpp b/inference-engine/src/gna_plugin/serial/headers/latest/gna_model_header.hpp index 89292ab88af..7ec27b2caed 100644 --- a/inference-engine/src/gna_plugin/serial/headers/latest/gna_model_header.hpp +++ b/inference-engine/src/gna_plugin/serial/headers/latest/gna_model_header.hpp @@ -4,11 +4,11 @@ #pragma once -#include "serial/headers/2dot6/gna_model_header.hpp" +#include "serial/headers/2dot7/gna_model_header.hpp" namespace GNAPluginNS { namespace HeaderLatest { -using ModelHeader = GNAPluginNS::Header2dot6::ModelHeader; -using RuntimeEndPoint = GNAPluginNS::Header2dot6::RuntimeEndPoint; +using ModelHeader = GNAPluginNS::Header2dot7::ModelHeader; +using RuntimeEndPoint = GNAPluginNS::Header2dot7::RuntimeEndPoint; } } diff --git a/inference-engine/src/gna_plugin/transformations/convert_matmul_to_pointwise_convolution.cpp b/inference-engine/src/gna_plugin/transformations/convert_matmul_to_pointwise_convolution.cpp new file mode 100644 index 00000000000..da7e6279624 --- /dev/null +++ b/inference-engine/src/gna_plugin/transformations/convert_matmul_to_pointwise_convolution.cpp @@ -0,0 +1,180 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/convert_matmul_to_pointwise_convolution.hpp" + +#include +#include +#include + +#include "layers/gna_permute.hpp" +#include "backend/gna_limitations.hpp" + +using namespace GNAPluginNS; + +NGRAPH_RTTI_DEFINITION(ConvertMatmulToPointWiseConvolution, "ConvertMatmulToPointWiseConvolution", 0); +NGRAPH_RTTI_DEFINITION(ConvertMatmulWithBiasToPointWiseConvolution, "ConvertMatmulWithBiasToPointWiseConvolution", 0); +NGRAPH_RTTI_DEFINITION(ConvertMatmulWithFqToPointWiseConvolution, "ConvertMatmulWithFqToPointWiseConvolution", 0); + +static std::tuple VerifyAndGetConvParams(std::shared_ptr matmul_node) { + auto input1_shape = matmul_node->get_input_shape(0); + auto input2_shape = matmul_node->get_input_shape(1); + auto output_shape = matmul_node->get_output_shape(0); + if (input1_shape.size() == 3 && input1_shape.front() == 1) { + input1_shape.erase(std::begin(input1_shape)); + } + + if (input1_shape.size() != 2 || input2_shape.size() != 2 || output_shape.size() < 2) { + return std::make_tuple(false, 0, 0, 0); + } + + // Check if MatMul or corresponding pointwise convolution are supported by GNA + const uint32_t width = input1_shape.front(); + const uint32_t in_channels = input2_shape.back(); + const uint32_t out_channels = input2_shape.front(); + if (input1_shape.front() <= GNALimitations::affineMaxBatchSize || + out_channels % GNALimitations::convFiltersNumDivider != 0 || + out_channels > GNALimitations::convMaxFiltersNum || + in_channels > GNALimitations::convFilterMaxSize) { + return std::make_tuple(false, 0, 0, 0); + } + + return std::make_tuple(true, width, in_channels, out_channels); +} + +static bool Convert(std::shared_ptr matmul_node, + std::shared_ptr add, + std::shared_ptr bias, + std::shared_ptr fq) { + bool supported; + uint32_t width, in_channels, out_channels; + std::tie(supported, width, in_channels, out_channels) = VerifyAndGetConvParams(matmul_node); + if (!supported) return false; + + auto input_node = matmul_node->input_value(0).get_node_shared_ptr(); + auto weights_node = matmul_node->input_value(1).get_node_shared_ptr(); + auto base_name = matmul_node->get_friendly_name(); + + auto reshape_const_before = std::make_shared(ngraph::element::Type_t::i64, + ngraph::Shape{4}, + ngraph::Shape{1, 1, width, in_channels}); + auto reshape_before = std::make_shared(input_node, reshape_const_before, false); + reshape_before->set_friendly_name(base_name + "/reshape_in"); + + auto transpose_before = std::make_shared(reshape_before, + ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{4}, + GetPermuteOrder(InferenceEngine::Layout::NHWC, InferenceEngine::Layout::NCHW))); + transpose_before->set_friendly_name(base_name + "/transpose_in"); + + auto weights_reshape_const = std::make_shared(ngraph::element::Type_t::i64, + ngraph::Shape{4}, ngraph::Shape{out_channels, in_channels, 1, 1}); + auto weights_reshaped = std::make_shared(weights_node, weights_reshape_const, false); + + std::shared_ptr conv_node = std::make_shared(transpose_before, weights_reshaped, + ngraph::Strides{1, 1}, ngraph::CoordinateDiff{0, 0}, ngraph::CoordinateDiff{0, 0}, + ngraph::Strides{1, 1}, ngraph::op::PadType::VALID); + conv_node->set_friendly_name(base_name + "/conv"); + + std::shared_ptr root_node = matmul_node; + if (bias != nullptr) { + conv_node = std::make_shared(conv_node, bias); + root_node = add; + } + + if (fq != nullptr) { + conv_node = fq->clone_with_new_inputs({conv_node, fq->input_value(1), fq->input_value(2), + fq->input_value(3), fq->input_value(4)}); + root_node = fq; + } + + auto transpose_after = std::make_shared(conv_node, + ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{4}, + GetPermuteOrder(InferenceEngine::Layout::NCHW, InferenceEngine::Layout::NHWC))); + transpose_after->set_friendly_name(base_name + "/transpose_out"); + + auto output_shape = matmul_node->get_output_shape(0); + output_shape[output_shape.size() - 1] = out_channels; + output_shape[output_shape.size() - 2] = width; + auto reshape_const_after = std::make_shared(ngraph::element::Type_t::i64, + ngraph::Shape{output_shape.size()}, + output_shape); + auto reshape_after = std::make_shared(transpose_after, reshape_const_after, false); + reshape_after->set_friendly_name(base_name); + + ngraph::replace_node(root_node, reshape_after); + return true; +} + +ConvertMatmulToPointWiseConvolution::ConvertMatmulToPointWiseConvolution() { + auto const_input = ngraph::pattern::wrap_type(); + auto const_fq = ngraph::pattern::wrap_type({const_input, + ngraph::pattern::wrap_type(), + ngraph::pattern::wrap_type(), + ngraph::pattern::wrap_type(), + ngraph::pattern::wrap_type()}); + auto second_input = std::make_shared(ngraph::OutputVector{const_input, const_fq}); + auto matmul = ngraph::pattern::wrap_type({ngraph::pattern::any_input(), second_input}); + + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { + const auto& pattern_map = m.get_pattern_value_map(); + return Convert(pattern_map.at(matmul).get_node_shared_ptr(), nullptr, nullptr, nullptr); + }; + + auto m = std::make_shared(matmul, "ConvertMatmulToPointWiseConvolution"); + this->register_matcher(m, callback); +} + +ConvertMatmulWithBiasToPointWiseConvolution::ConvertMatmulWithBiasToPointWiseConvolution() { + auto const_input = ngraph::pattern::wrap_type(); + auto const_fq = ngraph::pattern::wrap_type({const_input, + ngraph::pattern::wrap_type(), + ngraph::pattern::wrap_type(), + ngraph::pattern::wrap_type(), + ngraph::pattern::wrap_type()}); + auto second_input = std::make_shared(ngraph::OutputVector{const_input, const_fq}); + auto matmul = ngraph::pattern::wrap_type({ngraph::pattern::any_input(), second_input}); + auto bias = ngraph::pattern::wrap_type(); + auto add = ngraph::pattern::wrap_type({matmul, bias}); + + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { + const auto& pattern_map = m.get_pattern_value_map(); + return Convert(pattern_map.at(matmul).get_node_shared_ptr(), pattern_map.at(add).get_node_shared_ptr(), + pattern_map.at(bias).get_node_shared_ptr(), nullptr); + }; + + auto m = std::make_shared(add, "ConvertMatmulWithBiasToPointWiseConvolution"); + this->register_matcher(m, callback); +} + +ConvertMatmulWithFqToPointWiseConvolution::ConvertMatmulWithFqToPointWiseConvolution() { + auto const_input = ngraph::pattern::wrap_type(); + auto const_fq = ngraph::pattern::wrap_type({const_input, + ngraph::pattern::wrap_type(), + ngraph::pattern::wrap_type(), + ngraph::pattern::wrap_type(), + ngraph::pattern::wrap_type()}); + auto second_input = std::make_shared(ngraph::OutputVector{const_input, const_fq}); + auto matmul = ngraph::pattern::wrap_type({ngraph::pattern::any_input(), second_input}); + auto bias = ngraph::pattern::wrap_type(); + auto add = ngraph::pattern::wrap_type({matmul, bias}); + auto matmul_out = std::make_shared(ngraph::OutputVector{add, matmul}); + auto out_fq = ngraph::pattern::wrap_type({matmul_out, + ngraph::pattern::wrap_type(), + ngraph::pattern::wrap_type(), + ngraph::pattern::wrap_type(), + ngraph::pattern::wrap_type()}); + + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { + const auto& pattern_map = m.get_pattern_value_map(); + auto add_it = pattern_map.find(add); + auto add_node = (add_it == std::end(pattern_map) ? nullptr : add_it->second.get_node_shared_ptr()); + auto bias_it = pattern_map.find(bias); + auto bias_node = (bias_it == std::end(pattern_map) ? nullptr : bias_it->second.get_node_shared_ptr()); + return Convert(pattern_map.at(matmul).get_node_shared_ptr(), add_node, bias_node, + pattern_map.at(out_fq).get_node_shared_ptr()); + }; + + auto m = std::make_shared(out_fq, "ConvertMatmulWithFqToPointWiseConvolution"); + this->register_matcher(m, callback); +} \ No newline at end of file diff --git a/inference-engine/src/gna_plugin/transformations/convert_matmul_to_pointwise_convolution.hpp b/inference-engine/src/gna_plugin/transformations/convert_matmul_to_pointwise_convolution.hpp new file mode 100644 index 00000000000..999b529194d --- /dev/null +++ b/inference-engine/src/gna_plugin/transformations/convert_matmul_to_pointwise_convolution.hpp @@ -0,0 +1,71 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +namespace GNAPluginNS { + +/** + * @brief Convert a MatMul with batch size unsupported by GNA to a point-wise convolution with NHWC layout + * with transposes around it: + * Transose (NHWC -> NCHW) + * | + * Matmul Convolution in NHWC layout + * Input1: [A, B] B > 8 -------> Input: [1, 1, A, B] + * Input2: [B, C] Kernel: [C, B, 1, 1] + * Output: [A, C] Output: [1, 1, A, C] + * | + * Transose (NCHW -> NHWC) + */ +class ConvertMatmulToPointWiseConvolution : public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + ConvertMatmulToPointWiseConvolution(); +}; + +/** + * @brief Convert a MatMul with batch size unsupported by GNA to a point-wise convolution with NHWC layout + * with transposes around it, moved add with bias before the last transpose: + * Transose (NHWC -> NCHW) + * | + * Matmul Convolution in NHWC layout + * Input1: [A, B] B > 8 -------> Input: [1, 1, A, B] + * Input2: [B, C] Kernel: [C, B, 1, 1] + * Output: [A, C] Output: [1, 1, A, C] + * | | + * Add (const) Add (const) + * | + * Transose (NCHW -> NHWC) + */ +class ConvertMatmulWithBiasToPointWiseConvolution : public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + ConvertMatmulWithBiasToPointWiseConvolution(); +}; + +/** + * @brief Convert a MatMul with batch size unsupported by GNA to a point-wise convolution with NHWC layout + * with transposes around it, moved add with bias and/or fake quantize before the last transpose: + * Transose (NHWC -> NCHW) + * | + * Matmul Convolution in NHWC layout + * Input1: [A, B] B > 8 -------> Input: [1, 1, A, B] + * Input2: [B, C] Kernel: [C, B, 1, 1] + * Output: [A, C] Output: [1, 1, A, C] + * | | + * Add (const) Add (const) + * | | + * FakeQuantize FakeQuantize + * | + * Transose (NCHW -> NHWC) + */ +class ConvertMatmulWithFqToPointWiseConvolution : public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + ConvertMatmulWithFqToPointWiseConvolution(); +}; + +} // namespace GNAPluginNS \ No newline at end of file diff --git a/inference-engine/src/gna_plugin/transformations/split_convolution_with_large_buffer_size.cpp b/inference-engine/src/gna_plugin/transformations/split_convolution_with_large_buffer_size.cpp new file mode 100644 index 00000000000..a9d79c831ab --- /dev/null +++ b/inference-engine/src/gna_plugin/transformations/split_convolution_with_large_buffer_size.cpp @@ -0,0 +1,131 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/split_convolution_with_large_buffer_size.hpp" + +#include + +#include +#include +#include + +#include "backend/gna_limitations.hpp" + +using namespace GNAPluginNS; + +NGRAPH_RTTI_DEFINITION(SplitConvolution, "SplitConvolution", 0); +NGRAPH_RTTI_DEFINITION(SplitConvolutionWithBias, "SplitConvolutionWithBias", 0); +NGRAPH_RTTI_DEFINITION(SplitConvolutionWithFq, "SplitConvolutionWithFq", 0); + +static std::vector GetConvSplitSizes(std::shared_ptr conv) { + uint32_t width = conv->get_input_shape(0).back(); + uint32_t in_channels = conv->get_input_shape(0).at(1); + uint32_t usedWidth = 0; + std::vector split_sizes; + uint32_t width_max_size = GNALimitations::bufferMaxSize / in_channels; + width_max_size = width_max_size - width_max_size % 64; + while (usedWidth < width) { + uint32_t width_part = std::min(width - usedWidth, width_max_size); + split_sizes.push_back(width_part); + usedWidth += width_part; + } + IE_ASSERT(usedWidth == width); + return split_sizes; +} + +static bool Convert(std::shared_ptr conv, + std::shared_ptr add, + std::shared_ptr bias, + std::shared_ptr fq) { + auto input_size = std::accumulate(std::begin(conv->get_input_shape(0)), + std::end(conv->get_input_shape(0)), 1, std::multiplies()); + if (input_size <= GNALimitations::bufferMaxSize) { + return false; + } + + auto split_sizes = GetConvSplitSizes(conv); + IE_ASSERT(split_sizes.size() > 1); + + /* TODO check if it's NHWC convolution wrapped with transposes or all input dimensions except of width == 1, + otherwise this split axis isn't supported */ + const int64_t width_axis = conv->get_input_shape(0).size() - 1; + auto split_node = std::make_shared(conv->input_value(0), + ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape({1}), std::vector{width_axis}), + ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape({split_sizes.size()}), split_sizes)); + split_node->set_friendly_name(conv->get_friendly_name() + "/split"); + ngraph::OutputVector convOutputs; + std::shared_ptr root_node = fq ? fq : (add ? add : conv); + for (int i = 0; i < split_sizes.size(); ++i) { + std::shared_ptr output = conv->clone_with_new_inputs({split_node->output(i), conv->input_value(1)}); + output->set_friendly_name(conv->get_friendly_name() + "_" + std::to_string(i)); + if (bias) { + output = std::make_shared(output, bias); + } + + if (fq) { + output = fq->clone_with_new_inputs({output, fq->input_value(1), fq->input_value(2), + fq->input_value(3), fq->input_value(4)}); + } + convOutputs.push_back(output); + } + + auto concat = std::make_shared(convOutputs, width_axis); + concat->set_friendly_name(conv->get_friendly_name()); + ngraph::replace_node(root_node, concat); + return true; +} + +SplitConvolution::SplitConvolution() { + auto conv = ngraph::pattern::wrap_type({ngraph::pattern::any_input(), + ngraph::pattern::any_input()}); + + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { + const auto& pattern_map = m.get_pattern_value_map(); + return Convert(pattern_map.at(conv).get_node_shared_ptr(), nullptr, nullptr, nullptr); + }; + + auto m = std::make_shared(conv, "SplitConvolution"); + this->register_matcher(m, callback); +} + +SplitConvolutionWithBias::SplitConvolutionWithBias() { + auto conv = ngraph::pattern::wrap_type({ngraph::pattern::any_input(), + ngraph::pattern::any_input()}); + auto bias = ngraph::pattern::wrap_type(); + auto add = ngraph::pattern::wrap_type({conv, bias}); + + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { + const auto& pattern_map = m.get_pattern_value_map(); + return Convert(pattern_map.at(conv).get_node_shared_ptr(), pattern_map.at(add).get_node_shared_ptr(), + pattern_map.at(bias).get_node_shared_ptr(), nullptr); + }; + + auto m = std::make_shared(add, "SplitConvolutionWithBias"); + this->register_matcher(m, callback); +} + +SplitConvolutionWithFq::SplitConvolutionWithFq() { + auto conv = ngraph::pattern::wrap_type({ngraph::pattern::any_input(), + ngraph::pattern::any_input()}); + auto bias = ngraph::pattern::wrap_type(); + auto add = ngraph::pattern::wrap_type({conv, bias}); + auto conv_output = std::make_shared(ngraph::OutputVector{conv, add}); + auto out_fq = ngraph::pattern::wrap_type({conv_output, + ngraph::pattern::wrap_type(), + ngraph::pattern::wrap_type(), + ngraph::pattern::wrap_type(), + ngraph::pattern::wrap_type()}); + + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { + const auto& pattern_map = m.get_pattern_value_map(); + auto add_it = pattern_map.find(add); + auto add_node = (add_it == std::end(pattern_map) ? nullptr : add_it->second.get_node_shared_ptr()); + auto bias_it = pattern_map.find(bias); + auto bias_node = (bias_it == std::end(pattern_map) ? nullptr : bias_it->second.get_node_shared_ptr()); + return Convert(pattern_map.at(conv).get_node_shared_ptr(), add_node, bias_node, pattern_map.at(out_fq).get_node_shared_ptr()); + }; + + auto m = std::make_shared(out_fq, "SplitConvolutionWithFq"); + this->register_matcher(m, callback); +} \ No newline at end of file diff --git a/inference-engine/src/gna_plugin/transformations/split_convolution_with_large_buffer_size.hpp b/inference-engine/src/gna_plugin/transformations/split_convolution_with_large_buffer_size.hpp new file mode 100644 index 00000000000..8667f4273bf --- /dev/null +++ b/inference-engine/src/gna_plugin/transformations/split_convolution_with_large_buffer_size.hpp @@ -0,0 +1,34 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +namespace GNAPluginNS { + +// @brief Splits convolution with large input buffer +class SplitConvolution : public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + SplitConvolution(); +}; + +// @brief Splits convolution with large input buffer, move add with bias to each convolution before concat +class SplitConvolutionWithBias : public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + SplitConvolutionWithBias(); +}; + +/* @brief Splits convolution with large input buffer, + * move add with bias and/or fake quantize to each convolution before concat + */ +class SplitConvolutionWithFq : public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + SplitConvolutionWithFq(); +}; + +} // namespace GNAPluginNS \ No newline at end of file diff --git a/inference-engine/src/hetero_plugin/hetero_executable_network.cpp b/inference-engine/src/hetero_plugin/hetero_executable_network.cpp index 9f0135aa25e..994ba866f7a 100644 --- a/inference-engine/src/hetero_plugin/hetero_executable_network.cpp +++ b/inference-engine/src/hetero_plugin/hetero_executable_network.cpp @@ -312,6 +312,7 @@ HeteroExecutableNetwork::HeteroExecutableNetwork(const InferenceEngine::CNNNetwo struct Subgraph { ngraph::ResultVector _results; ngraph::ParameterVector _parameters; + ngraph::SinkVector _sinks; std::string _affinity; }; std::unordered_map subgraphs; @@ -325,6 +326,9 @@ HeteroExecutableNetwork::HeteroExecutableNetwork(const InferenceEngine::CNNNetwo } else if (ngraph::op::is_parameter(node)) { subgraph._parameters.emplace_back( std::dynamic_pointer_cast(node->shared_from_this())); + } else if (ngraph::op::is_sink(node)) { + subgraph._sinks.emplace_back( + std::dynamic_pointer_cast(node->shared_from_this())); } auto itAffinity = affinities.find(node); if (itAffinity != affinities.end()) { @@ -373,7 +377,7 @@ HeteroExecutableNetwork::HeteroExecutableNetwork(const InferenceEngine::CNNNetwo for (auto&& subgraph : orderedSubgraphs) { _networks[id]._device = subgraph._affinity; subFunctions[id] = - std::make_shared(subgraph._results, subgraph._parameters, + std::make_shared(subgraph._results, subgraph._sinks, subgraph._parameters, _name + '_' + std::to_string(id)); _networks[id]._clonedNetwork = CNNNetwork{subFunctions[id]}; // update of pre-processing info diff --git a/inference-engine/src/transformations/include/transformations/op_conversions/gather_normalize_negative_indices.hpp b/inference-engine/src/transformations/include/transformations/op_conversions/gather_normalize_negative_indices.hpp new file mode 100644 index 00000000000..1ec1ffe628e --- /dev/null +++ b/inference-engine/src/transformations/include/transformations/op_conversions/gather_normalize_negative_indices.hpp @@ -0,0 +1,29 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +namespace ngraph { +namespace pass { + + class TRANSFORMATIONS_API GatherNegativeConstIndicesNormalize; + +} // namespace pass +} // namespace ngraph + +/** + * @ingroup ie_transformation_common_api + * @brief GatherNegativeConstIndicesNormalize checks if indices value is negative scalar and + * normalizes it using ShapeOf->Add->Cast subgraph. + * We need to remove this transformation after adding support of negative indices in + * future version of Gather operation. + */ +class ngraph::pass::GatherNegativeConstIndicesNormalize : public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + GatherNegativeConstIndicesNormalize(); +}; diff --git a/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp b/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp index 79f1dee8882..4ab5cf1e80d 100644 --- a/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp +++ b/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp @@ -70,6 +70,7 @@ #include "transformations/op_conversions/log_softmax_decomposition.hpp" #include "transformations/op_conversions/mvn6_decomposition.hpp" #include "transformations/op_conversions/simplify_ctc_greedy_decoder_seq_len.hpp" +#include "transformations/op_conversions/gather_normalize_negative_indices.hpp" #include #include @@ -157,6 +158,7 @@ bool ngraph::pass::CommonOptimizations::run_on_function(std::shared_ptradd_matcher(); decomp->add_matcher(); decomp->add_matcher(); + decomp->add_matcher(); decomp->set_name("ngraph::pass::CommonDecompositions"); // CF is required after all decompositions diff --git a/inference-engine/src/transformations/src/transformations/op_conversions/gather_normalize_negative_indices.cpp b/inference-engine/src/transformations/src/transformations/op_conversions/gather_normalize_negative_indices.cpp new file mode 100644 index 00000000000..86713451869 --- /dev/null +++ b/inference-engine/src/transformations/src/transformations/op_conversions/gather_normalize_negative_indices.cpp @@ -0,0 +1,77 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/op_conversions/gather_normalize_negative_indices.hpp" + +#include + +#include +#include +#include +#include "itt.hpp" + +NGRAPH_RTTI_DEFINITION(ngraph::pass::GatherNegativeConstIndicesNormalize, "GatherNegativeConstIndicesNormalize", 0); + +ngraph::pass::GatherNegativeConstIndicesNormalize::GatherNegativeConstIndicesNormalize() { + MATCHER_SCOPE(GatherNegativeConstIndicesNormalize); + auto data_input = ngraph::pattern::any_input(pattern::has_static_rank()); + auto axis_input = ngraph::pattern::wrap_type(); + auto indices_input = ngraph::pattern::wrap_type(); + auto gather_node = std::make_shared(data_input, indices_input, axis_input); + + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { + auto& pattern_to_output = m.get_pattern_value_map(); + auto gather = std::dynamic_pointer_cast(pattern_to_output.at(gather_node).get_node_shared_ptr()); + auto data = pattern_to_output.at(data_input); + auto axis_constant = std::dynamic_pointer_cast(pattern_to_output.at(axis_input).get_node_shared_ptr()); + auto indices_constant = std::dynamic_pointer_cast(pattern_to_output.at(indices_input).get_node_shared_ptr()); + + if (!gather || !axis_constant || !indices_constant) { + return false; + } + + auto indices = indices_constant->cast_vector(); + if (indices.size() != 1 || indices[0] >= 0) { + return false; + } + + auto axis = axis_constant->cast_vector(); + if (axis.size() != 1) { + return false; + } + + auto axis_value = axis[0]; + + // normalize `axis` value if it is negative + if (axis_value < 0) { + axis_value = axis_value + data.get_partial_shape().rank().get_length(); + } + + if (data.get_partial_shape().rank().get_length() < axis_value) { + return false; + } + + // check `axis` dimension of data tensor is static + if (!data.get_partial_shape()[axis_value].is_static()) { + return false; + } + + auto input_type = indices_constant->get_element_type(); + auto shape_of = std::make_shared(data, input_type); + auto input_gather = std::make_shared(shape_of, + ngraph::opset7::Constant::create(input_type, Shape{}, {axis_value}), ngraph::opset7::Constant::create(input_type, Shape{}, {0})); + + auto add = std::make_shared(input_gather, indices_constant); + auto gather_new = gather_node->copy_with_new_inputs({data, add, axis_constant}); + gather_new->set_friendly_name(gather->get_friendly_name()); + + ngraph::copy_runtime_info(gather, {shape_of, input_gather, add, gather_new}); + ngraph::replace_node(gather, gather_new); + + return true; + }; + + auto m = std::make_shared(gather_node, matcher_name); + register_matcher(m, callback); +} diff --git a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/weights_analysis.cpp b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/weights_analysis.cpp index 213e06ee1f5..27e703ec4f5 100644 --- a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/weights_analysis.cpp +++ b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/weights_analysis.cpp @@ -92,7 +92,7 @@ bool checkGrowingOutput(const Model& model) { return false; } - static const float SCALE_THRESHOLD = 0.125f; + static const float SCALE_THRESHOLD = 0.1f; for (const auto& stage : model->getStages()) { if (stage->type() != StageType::Power && @@ -248,14 +248,13 @@ void PassImpl::run(const Model& model) { if (firstStage && shift < 4 && isGrowingOutput && weights->desc().dim(Dim::C) > 1) { normalVal = 5; } - shift = correctShift(shift, firstStage, stage->origLayer()->type); shift -= normalVal; } firstStage = false; scale = 1; - if (shift > scaleThreshold) { + if (shift >= scaleThreshold) { scale = static_cast(1ULL << static_cast(shift)); } diff --git a/inference-engine/tests/functional/inference_engine/transformations/gather_normalize_negative_indices_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/gather_normalize_negative_indices_test.cpp new file mode 100644 index 00000000000..ec6c4204a9b --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/transformations/gather_normalize_negative_indices_test.cpp @@ -0,0 +1,306 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include + +#include +#include +#include +#include +#include + +#include "common_test_utils/ngraph_test_utils.hpp" + +using namespace testing; + +TEST(TransformationTests, GatherNegativeIndicesNormalize) { + std::shared_ptr f(nullptr), f_ref(nullptr); + { + auto data = std::make_shared(ngraph::element::f32, ngraph::Shape{1, 15, 128}); + auto indices = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {-1}); + auto axis = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {1}); + + auto gather = std::make_shared(data, indices, axis, 0); + + f = std::make_shared(ngraph::NodeVector{gather}, ngraph::ParameterVector{data}); + + ngraph::pass::Manager manager; + manager.register_pass(); + manager.register_pass(); + manager.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + } + + { + auto indices_type = ngraph::element::i32; + + auto data = std::make_shared(ngraph::element::f32, ngraph::Shape{1, 15, 128}); + auto indices = ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {-1}); + auto axis = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {1}); + + auto shape_of = std::make_shared(data, indices_type); + auto input_gather = std::make_shared(shape_of, + ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {1}), ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {0})); + auto add = std::make_shared(input_gather, indices); + auto gather = std::make_shared(data, add, axis); + + f_ref = std::make_shared(ngraph::NodeVector{gather}, ngraph::ParameterVector{data}); + } + + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; +} + +TEST(TransformationTests, GatherNegativeIndicesNormalize_neg_axis) { + std::shared_ptr f(nullptr), f_ref(nullptr); + { + auto data = std::make_shared(ngraph::element::f32, ngraph::Shape{1, 15, 128}); + auto indices = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {-1}); + auto axis = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {-2}); + + auto gather = std::make_shared(data, indices, axis, 0); + + f = std::make_shared(ngraph::NodeVector{gather}, ngraph::ParameterVector{data}); + + ngraph::pass::Manager manager; + manager.register_pass(); + manager.register_pass(); + manager.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + } + + { + auto indices_type = ngraph::element::i32; + + auto data = std::make_shared(ngraph::element::f32, ngraph::Shape{1, 15, 128}); + auto indices = ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {-1}); + auto axis = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {-2}); + + auto shape_of = std::make_shared(data, indices_type); + auto input_gather = std::make_shared(shape_of, + ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {1}), ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {0})); + auto add = std::make_shared(input_gather, indices); + auto gather = std::make_shared(data, add, axis); + + f_ref = std::make_shared(ngraph::NodeVector{gather}, ngraph::ParameterVector{data}); + } + + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; +} + +TEST(TransformationTests, GatherNegativeIndicesNormalize_dif_input_types) { + std::shared_ptr f(nullptr), f_ref(nullptr); + { + auto data = std::make_shared(ngraph::element::f32, ngraph::Shape{1, 15, 128}); + auto indices = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {-1}); + auto axis = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{}, {1}); + + auto gather = std::make_shared(data, indices, axis, 0); + + f = std::make_shared(ngraph::NodeVector{gather}, ngraph::ParameterVector{data}); + + ngraph::pass::Manager manager; + manager.register_pass(); + manager.register_pass(); + manager.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + } + + { + auto indices_type = ngraph::element::i32; + + auto data = std::make_shared(ngraph::element::f32, ngraph::Shape{1, 15, 128}); + auto indices = ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {-1}); + auto axis = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{}, {1}); + + auto shape_of = std::make_shared(data, indices_type); + auto input_gather = std::make_shared(shape_of, + ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {1}), ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {0})); + auto add = std::make_shared(input_gather, indices); + auto gather = std::make_shared(data, add, axis); + + f_ref = std::make_shared(ngraph::NodeVector{gather}, ngraph::ParameterVector{data}); + } + + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; +} + +TEST(TransformationTests, GatherNegativeIndicesNormalize_static_axis_dim) { + std::shared_ptr f(nullptr), f_ref(nullptr); + { + auto data = std::make_shared(ngraph::element::f32, ngraph::PartialShape{DYN, 15, DYN}); + auto indices = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {-1}); + auto axis = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {1}); + + auto gather = std::make_shared(data, indices, axis, 0); + + f = std::make_shared(ngraph::NodeVector{gather}, ngraph::ParameterVector{data}); + + ngraph::pass::Manager manager; + manager.register_pass(); + manager.register_pass(); + manager.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + } + + { + auto indices_type = ngraph::element::i32; + + auto data = std::make_shared(ngraph::element::f32, ngraph::PartialShape{DYN, 15, DYN}); + auto indices = ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {-1}); + auto axis = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {1}); + + auto shape_of = std::make_shared(data, indices_type); + auto input_gather = std::make_shared(shape_of, + ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {1}), ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {0})); + auto add = std::make_shared(input_gather, indices); + auto gather = std::make_shared(data, add, axis); + + f_ref = std::make_shared(ngraph::NodeVector{gather}, ngraph::ParameterVector{data}); + } + + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; +} + +TEST(TransformationTests, GatherNegativeIndicesNormalize_static_axis_dim_neg_axis) { + std::shared_ptr f(nullptr), f_ref(nullptr); + { + auto data = std::make_shared(ngraph::element::f32, ngraph::PartialShape{DYN, 15, DYN}); + auto indices = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {-1}); + auto axis = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {-2}); + + auto gather = std::make_shared(data, indices, axis, 0); + + f = std::make_shared(ngraph::NodeVector{gather}, ngraph::ParameterVector{data}); + + ngraph::pass::Manager manager; + manager.register_pass(); + manager.register_pass(); + manager.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + } + + { + auto indices_type = ngraph::element::i32; + + auto data = std::make_shared(ngraph::element::f32, ngraph::PartialShape{DYN, 15, DYN}); + auto indices = ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {-1}); + auto axis = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {-2}); + + auto shape_of = std::make_shared(data, indices_type); + auto input_gather = std::make_shared(shape_of, + ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {1}), ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {0})); + auto add = std::make_shared(input_gather, indices); + auto gather = std::make_shared(data, add, axis); + + f_ref = std::make_shared(ngraph::NodeVector{gather}, ngraph::ParameterVector{data}); + } + + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; +} + +TEST(TransformationTests, GatherNegativeIndicesNormalize_non_static_axis_dim) { + std::shared_ptr f(nullptr), f_ref(nullptr); + { + auto data = std::make_shared(ngraph::element::f32, ngraph::PartialShape{DYN, DYN, DYN}); + auto indices = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {-1}); + auto axis = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {1}); + + auto gather = std::make_shared(data, indices, axis, 0); + + f = std::make_shared(ngraph::NodeVector{gather}, ngraph::ParameterVector{data}); + + ngraph::pass::Manager manager; + manager.register_pass(); + manager.register_pass(); + manager.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + } + + { + auto indices_type = ngraph::element::i32; + + auto data = std::make_shared(ngraph::element::f32, ngraph::PartialShape{DYN, DYN, DYN}); + auto indices = ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {-1}); + auto axis = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {1}); + + auto gather = std::make_shared(data, indices, axis); + + f_ref = std::make_shared(ngraph::NodeVector{gather}, ngraph::ParameterVector{data}); + } + + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; +} + +TEST(TransformationTests, GatherNegativeIndicesNormalize_positive_ind) { + std::shared_ptr f(nullptr), f_ref(nullptr); + { + auto data = std::make_shared(ngraph::element::f32, ngraph::Shape{2, 3}); + auto indices = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {1}); + auto axis = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {0}); + + auto gather = std::make_shared(data, indices, axis, 0); + + f = std::make_shared(ngraph::NodeVector{gather}, ngraph::ParameterVector{data}); + + ngraph::pass::Manager manager; + manager.register_pass(); + manager.register_pass(); + manager.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + } + + { + auto data = std::make_shared(ngraph::element::f32, ngraph::Shape{2, 3}); + auto indices = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {1}); + auto axis = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {0}); + + auto gather = std::make_shared(data, indices, axis); + + f_ref = std::make_shared(ngraph::NodeVector{gather}, ngraph::ParameterVector{data}); + } + + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; +} + +TEST(TransformationTests, GatherNegativeIndicesNormalize_non_static_rank) { + std::shared_ptr f(nullptr), f_ref(nullptr); + { + auto data = std::make_shared(ngraph::element::f32, ngraph::PartialShape::dynamic(ngraph::Rank::dynamic())); + auto indices = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {-1}); + auto axis = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {0}); + + auto gather = std::make_shared(data, indices, axis, 0); + + f = std::make_shared(ngraph::NodeVector{gather}, ngraph::ParameterVector{data}); + + ngraph::pass::Manager manager; + manager.register_pass(); + manager.register_pass(); + manager.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + } + + { + auto data = std::make_shared(ngraph::element::f32, ngraph::PartialShape::dynamic()); + auto indices = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {-1}); + auto axis = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {0}); + + auto gather = std::make_shared(data, indices, axis); + + f_ref = std::make_shared(ngraph::NodeVector{gather}, ngraph::ParameterVector{data}); + } + + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; +} diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/memory.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/memory.cpp index 9ab20c3eda4..062ea0cad91 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/memory.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/memory.cpp @@ -38,7 +38,7 @@ INSTANTIATE_TEST_CASE_P(smoke_MemoryTest, MemoryTest, ::testing::ValuesIn(iterationCount), ::testing::ValuesIn(inShapes), ::testing::ValuesIn(inputPrecisions), - ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ::testing::Values(CommonTestUtils::DEVICE_CPU, "HETERO:CPU")), MemoryTest::getTestCaseName); } // namespace diff --git a/inference-engine/tests/functional/plugin/gna/pass_tests/convert_matmul_to_pointwise_conv.cpp b/inference-engine/tests/functional/plugin/gna/pass_tests/convert_matmul_to_pointwise_conv.cpp new file mode 100644 index 00000000000..7e3d15174f3 --- /dev/null +++ b/inference-engine/tests/functional/plugin/gna/pass_tests/convert_matmul_to_pointwise_conv.cpp @@ -0,0 +1,230 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include +#include +#include + +#include + +#include "common_test_utils/common_utils.hpp" +#include "functional_test_utils/plugin_cache.hpp" +#include "shared_test_classes/base/layer_test_utils.hpp" +#include "functional_test_utils/blob_utils.hpp" +#include "ngraph_functions/utils/ngraph_helpers.hpp" +#include "ngraph_functions/builders.hpp" + +#include "ngraph_functions/pass/convert_prc.hpp" + +typedef std::tuple< + InferenceEngine::Precision, // Network Precision + std::string, // Target Device + std::map, // Configuration + std::vector // Input Shape +> convertMatmulToPointwiseConvParams; + +typedef std::tuple< + InferenceEngine::Precision, // Network Precision + std::string, // Target Device + std::map, // Configuration + std::vector, // Input Shape + std::pair // Input Min and Max +> convertMatmulToPointwiseConvWithFqParams; + +namespace LayerTestsDefinitions { + +class ConvertMatmulToPointwiseConv : public testing::WithParamInterface, + public LayerTestsUtils::LayerTestsCommon { +public: + static std::string getTestCaseName(testing::TestParamInfo obj) { + InferenceEngine::Precision netPrecision; + std::string targetDevice; + std::map configuration; + std::vector inputShape; + std::tie(netPrecision, targetDevice, configuration, inputShape) = obj.param; + + std::ostringstream result; + result << "netPRC=" << netPrecision.name() << "_"; + result << "targetDevice=" << targetDevice << "_"; + for (auto const& configItem : configuration) { + result << "_configItem=" << configItem.first << "_" << configItem.second; + } + result << "_inputShape=" << CommonTestUtils::vec2str(inputShape); + return result.str(); + } + + InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const { + InferenceEngine::Blob::Ptr blob = make_blob_with_precision(info.getTensorDesc()); + blob->allocate(); + + auto* rawBlobDataPtr = blob->buffer().as(); + std::vector values = CommonTestUtils::generate_float_numbers(blob->size(), -0.2f, 0.2f); + for (size_t i = 0; i < blob->size(); i++) { + rawBlobDataPtr[i] = values[i]; + } + return blob; + } + +protected: + void SetUp() override { + InferenceEngine::Precision netPrecision; + std::vector inputShape; + std::tie(netPrecision, targetDevice, configuration, inputShape) = this->GetParam(); + auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); + + auto params = ngraph::builder::makeParams(ngPrc, {inputShape}); + + size_t batch = inputShape[inputShape.size() - 2]; + size_t elemNum = inputShape[inputShape.size() - 1]; + std::vector weights = CommonTestUtils::generate_float_numbers(elemNum * elemNum, -0.1f, 0.1f); + auto weightsNode = std::make_shared(ngPrc, ngraph::Shape{elemNum, elemNum}, weights); + auto matmul = ngraph::builder::makeMatMul(params[0], weightsNode, false, true); + + auto bias = ngraph::builder::makeConstant(ngPrc, std::vector{1, batch, 1}, std::vector{1.0f}); + auto add = ngraph::builder::makeEltwise(matmul, bias, ngraph::helpers::EltwiseTypes::ADD); + + auto pattern = std::make_shared(ngraph::element::Type_t::i64, + ngraph::Shape{ inputShape.size() }, inputShape); + auto reshape = std::make_shared(matmul, pattern, false); + auto relu = std::make_shared(reshape); + + ngraph::ResultVector results{ std::make_shared(relu)}; + function = std::make_shared(results, params, "ConvertMatmulToPointwiseConv"); + } +}; + +class ConvertMatmulToPointwiseConvWithFq : public testing::WithParamInterface, + public LayerTestsUtils::LayerTestsCommon { + float inputDataMin = -10.0f; + float inputDataMax = 10.0f; + float inputDataResolution = 1.0f; + +public: + static std::string getTestCaseName(testing::TestParamInfo obj) { + InferenceEngine::Precision netPrecision; + std::string targetDevice; + std::map configuration; + std::vector inputShape; + std::pair inputMinMax; + std::tie(netPrecision, targetDevice, configuration, inputShape, inputMinMax) = obj.param; + + std::ostringstream result; + result << "netPRC=" << netPrecision.name() << "_"; + result << "targetDevice=" << targetDevice << "_"; + for (auto const& configItem : configuration) { + result << "_configItem=" << configItem.first << "_" << configItem.second; + } + result << "_inputShape=" << CommonTestUtils::vec2str(inputShape); + result << "_inputMinMax=(" << inputMinMax.first << ".." << inputMinMax.second << ")"; + return result.str(); + } + + InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const { + return FuncTestUtils::createAndFillBlob(info.getTensorDesc(), inputDataMax - inputDataMin, inputDataMin, + 1 / inputDataResolution); + } + +protected: + void SetUp() override { + InferenceEngine::Precision netPrecision; + std::vector inputShape; + std::pair inputMinMax; + std::tie(netPrecision, targetDevice, configuration, inputShape, inputMinMax) = this->GetParam(); + std::tie(inputDataMin, inputDataMax) = inputMinMax; + auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); + + auto params = ngraph::builder::makeParams(ngPrc, {inputShape}); + + auto inputLowNode = ngraph::builder::makeConstant(ngPrc, std::vector{ 1 }, + std::vector{ inputDataMin }); + auto inputHighNode = ngraph::builder::makeConstant(ngPrc, std::vector{ 1 }, + std::vector{ inputDataMax }); + auto inputFQ = std::make_shared(params[0], + inputLowNode, inputHighNode, inputLowNode, inputHighNode, UINT16_MAX); + + size_t elemNum = inputShape[inputShape.size() - 1]; + + const float weightsMin = -0.2f; + const float weightsMax = 0.2f; + std::vector weights = CommonTestUtils::generate_float_numbers(elemNum * elemNum, weightsMin, weightsMax); + auto weightsNode = std::make_shared(ngPrc, ngraph::Shape{elemNum, elemNum}, weights); + auto weightsLowNode = ngraph::builder::makeConstant(ngPrc, std::vector{ 1 }, + std::vector{ weightsMin }); + auto weightsHighNode = ngraph::builder::makeConstant(ngPrc, std::vector{ 1 }, + std::vector{ weightsMax }); + auto weightsFQNode = std::make_shared(weightsNode, + weightsLowNode, weightsHighNode, weightsLowNode, weightsHighNode, UINT16_MAX); + auto matmul = ngraph::builder::makeMatMul(inputFQ, weightsFQNode, false, true); + + auto bias = ngraph::builder::makeConstant(ngPrc, std::vector{1, 1, 1}, std::vector{1.0f}); + auto add = ngraph::builder::makeEltwise(matmul, bias, ngraph::helpers::EltwiseTypes::ADD); + + auto outputLowNode = ngraph::builder::makeConstant(ngPrc, std::vector{ 1 }, + std::vector{ -inputDataMax * weightsMax * elemNum }); + auto outputHighNode = ngraph::builder::makeConstant(ngPrc, std::vector{ 1 }, + std::vector{ inputDataMax * weightsMax * elemNum }); + auto outputFQ = std::make_shared(add, + outputLowNode, outputHighNode, outputLowNode, outputHighNode, UINT16_MAX); + + auto pattern = std::make_shared(ngraph::element::Type_t::i64, + ngraph::Shape{ inputShape.size() }, inputShape); + auto reshape = std::make_shared(outputFQ, pattern, false); + + auto relu = std::make_shared(reshape); + + ngraph::ResultVector results{ std::make_shared(relu)}; + function = std::make_shared(results, params, "ConvertMatmulToPointwiseConv"); + } +}; + +TEST_P(ConvertMatmulToPointwiseConv, CompareWithRefImpl) { + Run(); +}; + +TEST_P(ConvertMatmulToPointwiseConvWithFq, CompareWithRefImpl) { + Run(); +}; + +const std::vector netPrecisions = { + InferenceEngine::Precision::FP32, + InferenceEngine::Precision::FP16 +}; + +const std::vector> configs = { + { + {"GNA_DEVICE_MODE", "GNA_SW_EXACT"}, + } +}; + +const std::vector> inputShape = { + {1, 64, 64}, + {1, 256, 128}, + {1, 512, 128} +}; + +const std::vector> fqStats = { + {-0.5, 0.5} +}; + +INSTANTIATE_TEST_CASE_P(smoke_ConvertMatmulToPointwiseConvTest, ConvertMatmulToPointwiseConv, + ::testing::Combine( + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_GNA), + ::testing::ValuesIn(configs), + ::testing::ValuesIn(inputShape)), + ConvertMatmulToPointwiseConv::getTestCaseName); + +// Issue 55662 +INSTANTIATE_TEST_CASE_P(DISABLED_smoke_ConvertMatmulToPointwiseConvTest, ConvertMatmulToPointwiseConvWithFq, + ::testing::Combine( + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_GNA), + ::testing::ValuesIn(configs), + ::testing::ValuesIn(inputShape), + ::testing::ValuesIn(fqStats)), + ConvertMatmulToPointwiseConvWithFq::getTestCaseName); + +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/gna/pass_tests/eltwise_split_over_channels_pass.cpp b/inference-engine/tests/functional/plugin/gna/pass_tests/eltwise_split_over_channels_pass.cpp index 17da73dfc99..f4c6cc98d34 100644 --- a/inference-engine/tests/functional/plugin/gna/pass_tests/eltwise_split_over_channels_pass.cpp +++ b/inference-engine/tests/functional/plugin/gna/pass_tests/eltwise_split_over_channels_pass.cpp @@ -18,19 +18,21 @@ typedef std::tuple< InferenceEngine::Precision, // Network Precision std::string, // Target Device - std::map //Configuration + std::map, // Configuration + std::vector // Input Shape > EltwiseSplitOverChannelsPassParams; namespace LayerTestsDefinitions { class EltwiseSplitOverChannelsPassTest : public testing::WithParamInterface, - public LayerTestsUtils::LayerTestsCommon { + public LayerTestsUtils::LayerTestsCommon { public: static std::string getTestCaseName(testing::TestParamInfo obj) { InferenceEngine::Precision netPrecision; std::string targetDevice; std::map configuration; - std::tie(netPrecision, targetDevice, configuration) = obj.param; + std::vector inputShape; + std::tie(netPrecision, targetDevice, configuration, inputShape) = obj.param; std::ostringstream result; result << "netPRC=" << netPrecision.name() << "_"; @@ -38,20 +40,22 @@ public: for (auto const& configItem : configuration) { result << "_configItem=" << configItem.first << "_" << configItem.second; } + result << "_inputShape=" << CommonTestUtils::vec2str(inputShape); return result.str(); } protected: void SetUp() override { InferenceEngine::Precision netPrecision; - std::tie(netPrecision, targetDevice, configuration) = this->GetParam(); + std::vector inputShape; + std::tie(netPrecision, targetDevice, configuration, inputShape) = this->GetParam(); auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); - auto params = ngraph::builder::makeParams(ngPrc, { {1, 67000} }); - auto const_mult2 = ngraph::builder::makeConstant(ngPrc, {1, 67000}, {-1.0f}); + auto params = ngraph::builder::makeParams(ngPrc, { inputShape }); + auto const_mult2 = ngraph::builder::makeConstant(ngPrc, inputShape, {-1.0f}); auto sum = ngraph::builder::makeEltwise(params[0], const_mult2, ngraph::helpers::EltwiseTypes::MULTIPLY); - function = std::make_shared(sum, params, "RemovePermutationPass"); + function = std::make_shared(sum, params, "EltwiseSplitOverChannelsPassTest"); } }; @@ -71,11 +75,17 @@ const std::vector> configs = { } }; +const std::vector> inputShape = { + {1, 67000}, + {1, 500000} +}; + INSTANTIATE_TEST_CASE_P(smoke_EltwiseSplitOverChennels, EltwiseSplitOverChannelsPassTest, ::testing::Combine( ::testing::ValuesIn(netPrecisions), ::testing::Values(CommonTestUtils::DEVICE_GNA), - ::testing::ValuesIn(configs)), + ::testing::ValuesIn(configs), + ::testing::ValuesIn(inputShape)), EltwiseSplitOverChannelsPassTest::getTestCaseName); } // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp index cb4cc459a95..a59ad83eaed 100644 --- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp +++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp @@ -60,8 +60,6 @@ std::vector disabledTestPatterns() { R"(.*ConstantResultSubgraphTest.*inPrc=(U8|I8|I32|U64|I64|BOOL).*)", // TODO: Issue 51528 R"(.*CachingSupport.*_(u8|i16)_.*)", - // TODO: Issue 51527 - R"(.*CachingSupport.*_batch2_.*)", // TODO: Issue 51525 R"(.*CachingSupport.*KSOFunction.*)", // TODO: Issue 57363 (Param -> Result subgraphs) diff --git a/inference-engine/tests/functional/plugin/shared/src/base/import_export_base/import_export_base.cpp b/inference-engine/tests/functional/plugin/shared/src/base/import_export_base/import_export_base.cpp index 0db7264cb74..c30945dc914 100644 --- a/inference-engine/tests/functional/plugin/shared/src/base/import_export_base/import_export_base.cpp +++ b/inference-engine/tests/functional/plugin/shared/src/base/import_export_base/import_export_base.cpp @@ -69,13 +69,16 @@ void ImportNetworkTestBase::Run() { for (const auto& next_input : importedExecNetwork.GetInputsInfo()) { ASSERT_NO_THROW(compiledExecNetwork.GetInputsInfo()[next_input.first]); + Compare(next_input.second->getTensorDesc(), compiledExecNetwork.GetInputsInfo()[next_input.first]->getTensorDesc()); } for (const auto& next_output : importedExecNetwork.GetOutputsInfo()) { ASSERT_NO_THROW(compiledExecNetwork.GetOutputsInfo()[next_output.first]); } auto importedOutputs = GetOutputs(); ASSERT_EQ(actualOutputs.size(), importedOutputs.size()); + for (size_t i = 0; i < actualOutputs.size(); i++) { + Compare(actualOutputs[i]->getTensorDesc(), importedOutputs[i]->getTensorDesc()); Compare(actualOutputs[i], importedOutputs[i]); } } diff --git a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/base/layer_test_utils.hpp b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/base/layer_test_utils.hpp index 9b8b78b6ef0..9d132515743 100644 --- a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/base/layer_test_utils.hpp +++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/base/layer_test_utils.hpp @@ -72,6 +72,8 @@ public: virtual void Compare(const InferenceEngine::Blob::Ptr &expected, const InferenceEngine::Blob::Ptr &actual); + virtual void Compare(const InferenceEngine::TensorDesc &actualDesc, const InferenceEngine::TensorDesc &expectedDesc); + virtual void SetRefMode(RefMode mode); std::shared_ptr GetFunction(); diff --git a/inference-engine/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp b/inference-engine/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp index cc3927b25c5..056826aff86 100644 --- a/inference-engine/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp +++ b/inference-engine/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp @@ -274,6 +274,17 @@ void LayerTestsCommon::Compare(const InferenceEngine::Blob::Ptr &expected, const } } +void LayerTestsCommon::Compare(const InferenceEngine::TensorDesc &actualDesc, const InferenceEngine::TensorDesc &expectedDesc) { + auto expectedDims = actualDesc.getDims(); + auto actualDims = expectedDesc.getDims(); + ASSERT_EQ(actualDims.size(), expectedDims.size()); + for (size_t j = 0; j < actualDims.size(); ++j) { + ASSERT_EQ(actualDims.at(j), expectedDims.at(j)); + } + ASSERT_EQ(actualDesc.getLayout(), expectedDesc.getLayout()); + ASSERT_EQ(actualDesc.getPrecision(), expectedDesc.getPrecision()); +} + void LayerTestsCommon::ConfigureNetwork() { for (const auto &in : cnnNetwork.getInputsInfo()) { if (inLayout != InferenceEngine::Layout::ANY) { diff --git a/inference-engine/thirdparty/clDNN/src/program.cpp b/inference-engine/thirdparty/clDNN/src/program.cpp index ef988cb12da..97eacdf9ae1 100644 --- a/inference-engine/thirdparty/clDNN/src/program.cpp +++ b/inference-engine/thirdparty/clDNN/src/program.cpp @@ -1176,9 +1176,6 @@ void program_impl::set_layout_optimizer_attributes(layout_optimizer& lo) { size_t opt_deconv_layers_b_fs_zyx_fsv16 = 0; size_t total_crop_layers = 0; - size_t weighted_sum_feature_size = 0; - size_t weight_sum = 0; - for (auto& node : get_processing_order()) { auto &prim = *node; if (prim.type() == cldnn::convolution::type_id()) { @@ -1324,35 +1321,4 @@ void program_impl::set_layout_optimizer_attributes(layout_optimizer& lo) { if (should_use_bs_fs_yx_bsv16_fsv16) lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::bs_fs_yx_bsv16_fsv16_network, 1); - - - // This is to avoid using fsv16 for shallow-feature networks. - // This may not be exactly same as real execution graph as layer fusing is not done yet, - // but it is a reasonable approximation. - // Check the expected network efficiency after setting layer optimization attributes. - // If network depth is shallow, it is faster with fsv4. - for (auto& node : get_processing_order()) { - auto &prim = *node; - - if (prim.is_in_data_flow() && prim.type() == cldnn::convolution::type_id()) { - size_t num_feature = prim.get_output_layout().size.feature.vector()[0]; - size_t num_spatial = 1; - for (auto s : prim.get_output_layout().size.spatial.vector()) - num_spatial *= s; - - if (lo.get_preferred_format(prim) != format::b_fs_yx_fsv4) { - weight_sum += num_spatial; - weighted_sum_feature_size += num_spatial * num_feature; - } - } - } - - size_t weighted_average_feature_depth = weighted_sum_feature_size / std::max(weight_sum, static_cast(1)); - - // Need to confirm that weighted_average_feature_depth > 1 to keep unittest behavior. - if (is_quantized_int8_model && weighted_average_feature_depth < 8 && weighted_average_feature_depth > 1) { - lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::fs_b_yx_fsv32_network, 0); - lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::b_fs_yx_fsv16_network, 0); - lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::bs_fs_yx_bsv16_fsv16_network, 0); - } } diff --git a/ngraph/core/include/ngraph/op/util/op_types.hpp b/ngraph/core/include/ngraph/op/util/op_types.hpp index b672f5518c4..6d162157ab5 100644 --- a/ngraph/core/include/ngraph/op/util/op_types.hpp +++ b/ngraph/core/include/ngraph/op/util/op_types.hpp @@ -34,6 +34,8 @@ namespace ngraph NGRAPH_API bool is_output(const ngraph::Node* node); NGRAPH_API + bool is_sink(const ngraph::Node* node); + NGRAPH_API bool is_constant(const ngraph::Node* node); NGRAPH_API bool is_commutative(const ngraph::Node* node); @@ -60,6 +62,8 @@ namespace ngraph NGRAPH_API bool is_output(const std::shared_ptr& node); NGRAPH_API + bool is_sink(const std::shared_ptr& node); + NGRAPH_API bool is_constant(const std::shared_ptr& node); NGRAPH_API bool is_commutative(const std::shared_ptr& node); diff --git a/ngraph/core/src/op/util/op_types.cpp b/ngraph/core/src/op/util/op_types.cpp index f0852233ec7..354c605ced1 100644 --- a/ngraph/core/src/op/util/op_types.cpp +++ b/ngraph/core/src/op/util/op_types.cpp @@ -76,6 +76,11 @@ bool ngraph::op::is_output(const ngraph::Node* node) return dynamic_cast(node) != nullptr; } +bool ngraph::op::is_sink(const ngraph::Node* node) +{ + return dynamic_cast(node) != nullptr; +} + bool ngraph::op::is_constant(const ngraph::Node* node) { return dynamic_cast(node) != nullptr; @@ -134,6 +139,10 @@ bool ngraph::op::is_output(const std::shared_ptr& node) { return is_output(node.get()); } +bool ngraph::op::is_sink(const std::shared_ptr& node) +{ + return is_sink(node.get()); +} bool ngraph::op::is_constant(const std::shared_ptr& node) { return is_constant(node.get()); diff --git a/ngraph/python/tox.ini b/ngraph/python/tox.ini index e0ccc85785e..de7bb8337b2 100644 --- a/ngraph/python/tox.ini +++ b/ngraph/python/tox.ini @@ -7,7 +7,7 @@ skip_install=True deps = -rrequirements.txt -rrequirements_test.txt - mypy + mypy<0.900 flake8-bugbear pytest-xdist setenv = diff --git a/ngraph/test/CMakeLists.txt b/ngraph/test/CMakeLists.txt index 110d57c8b1d..eb6d83f0d70 100644 --- a/ngraph/test/CMakeLists.txt +++ b/ngraph/test/CMakeLists.txt @@ -288,7 +288,7 @@ set_source_files_properties(includes.cpp PROPERTIES COMPILE_DEFINITIONS if (ENABLE_MKL_DNN) message(STATUS "NGRAPH_TESTS: IE:CPU enabled") set(ACTIVE_BACKEND_LIST ${ACTIVE_BACKEND_LIST} "IE:CPU") - if (NOT ENABLE_STRICT_DEPENDENCIES) + if (ENABLE_STRICT_DEPENDENCIES) # For convinience add a runtime dependency to build along with this target. # Warning: Parallel build with -GNinja may not be efficient. list(APPEND UNIT_TESTS_DEPENDENCIES MKLDNNPlugin) @@ -298,7 +298,7 @@ endif() if (ENABLE_CLDNN) message(STATUS "NGRAPH_TESTS: IE:GPU enabled") set(ACTIVE_BACKEND_LIST ${ACTIVE_BACKEND_LIST} "IE:GPU") - if (NOT ENABLE_STRICT_DEPENDENCIES) + if (ENABLE_STRICT_DEPENDENCIES) # For convinience add a runtime dependency to build along with this target. # Warning: Parallel build with -GNinja may not be efficient. list(APPEND UNIT_TESTS_DEPENDENCIES clDNNPlugin)