Merge remote-tracking branch 'upstream/master' into revise_negative
This commit is contained in:
commit
05226050eb
@ -112,6 +112,7 @@ jobs:
|
|||||||
-DNGRAPH_ONNX_IMPORT_ENABLE=ON
|
-DNGRAPH_ONNX_IMPORT_ENABLE=ON
|
||||||
-DNGRAPH_ONNX_EDITOR_ENABLE=ON
|
-DNGRAPH_ONNX_EDITOR_ENABLE=ON
|
||||||
-DENABLE_FASTER_BUILD=ON
|
-DENABLE_FASTER_BUILD=ON
|
||||||
|
-DENABLE_STRICT_DEPENDENCIES=OFF
|
||||||
-DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)/modules
|
-DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)/modules
|
||||||
$(REPO_DIR)
|
$(REPO_DIR)
|
||||||
workingDirectory: $(BUILD_DIR)
|
workingDirectory: $(BUILD_DIR)
|
||||||
|
@ -90,7 +90,7 @@ jobs:
|
|||||||
# Disable errors with Ninja
|
# Disable errors with Ninja
|
||||||
export CXXFLAGS="-Wno-error=unused-command-line-argument"
|
export CXXFLAGS="-Wno-error=unused-command-line-argument"
|
||||||
export CFLAGS="-Wno-error=unused-command-line-argument"
|
export CFLAGS="-Wno-error=unused-command-line-argument"
|
||||||
cmake -GNinja -DVERBOSE_BUILD=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_PYTHON=ON -DENABLE_TESTS=ON -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)/modules $(REPO_DIR)
|
cmake -GNinja -DVERBOSE_BUILD=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_PYTHON=ON -DENABLE_TESTS=ON -DENABLE_STRICT_DEPENDENCIES=OFF -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)/modules $(REPO_DIR)
|
||||||
workingDirectory: $(BUILD_DIR)
|
workingDirectory: $(BUILD_DIR)
|
||||||
displayName: 'CMake'
|
displayName: 'CMake'
|
||||||
|
|
||||||
|
@ -92,7 +92,7 @@ jobs:
|
|||||||
|
|
||||||
- script: |
|
- script: |
|
||||||
set PATH=$(WORK_DIR)\ninja-win;%PATH%
|
set PATH=$(WORK_DIR)\ninja-win;%PATH%
|
||||||
call "$(MSVS_VARS_PATH)" && cmake -GNinja -DENABLE_FASTER_BUILD=ON -DENABLE_TEMPLATE_PLUGIN=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_TESTS=ON -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)\modules -DCMAKE_C_COMPILER:PATH="$(MSVC_COMPILER_PATH)" -DCMAKE_CXX_COMPILER:PATH="$(MSVC_COMPILER_PATH)" $(REPO_DIR)
|
call "$(MSVS_VARS_PATH)" && cmake -GNinja -DENABLE_FASTER_BUILD=ON -DENABLE_TEMPLATE_PLUGIN=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_TESTS=ON -DENABLE_STRICT_DEPENDENCIES=OFF -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)\modules -DCMAKE_C_COMPILER:PATH="$(MSVC_COMPILER_PATH)" -DCMAKE_CXX_COMPILER:PATH="$(MSVC_COMPILER_PATH)" $(REPO_DIR)
|
||||||
workingDirectory: $(BUILD_DIR)
|
workingDirectory: $(BUILD_DIR)
|
||||||
displayName: 'CMake'
|
displayName: 'CMake'
|
||||||
|
|
||||||
|
@ -6,7 +6,7 @@ ie_dependent_option (ENABLE_MKL_DNN "MKL-DNN plugin for inference engine" ON "X8
|
|||||||
|
|
||||||
ie_option (ENABLE_TESTS "unit, behavior and functional tests" OFF)
|
ie_option (ENABLE_TESTS "unit, behavior and functional tests" OFF)
|
||||||
|
|
||||||
ie_option (ENABLE_STRICT_DEPENDENCIES "Skip configuring \"convinient\" dependencies for efficient parallel builds" OFF)
|
ie_option (ENABLE_STRICT_DEPENDENCIES "Skip configuring \"convinient\" dependencies for efficient parallel builds" ON)
|
||||||
|
|
||||||
ie_dependent_option (ENABLE_CLDNN "clDnn based plugin for inference engine" ON "X86_64;NOT APPLE;NOT MINGW;NOT WINDOWS_STORE;NOT WINDOWS_PHONE" OFF)
|
ie_dependent_option (ENABLE_CLDNN "clDnn based plugin for inference engine" ON "X86_64;NOT APPLE;NOT MINGW;NOT WINDOWS_STORE;NOT WINDOWS_PHONE" OFF)
|
||||||
|
|
||||||
|
@ -22,7 +22,8 @@ $ benchmark_app -m <model.xml> -enforcebf16=false
|
|||||||
Notice that for quantized (e.g. INT8) models the bfloat16 calculations (of the layers that remain in FP32) is disabled by default.
|
Notice that for quantized (e.g. INT8) models the bfloat16 calculations (of the layers that remain in FP32) is disabled by default.
|
||||||
Refer to the [CPU Plugin documentation](supported_plugins/CPU.md) for more details.
|
Refer to the [CPU Plugin documentation](supported_plugins/CPU.md) for more details.
|
||||||
|
|
||||||
Similarly, the GPU device has a dedicated config key to enable FP16 execution of the layers that remain in FP32 in the quantized models (as the quantization is typically performed on the FP32 models), refer to the ENABLE_FP16_FOR_QUANTIZED_MODELS key in the [GPU Plugin documentation](supported_plugins/GPU.md)
|
Similarly, the GPU device automatically executes FP16 for the layers that remain in FP16 in the quantized models (assuming that the FP16 model was quantized).
|
||||||
|
Refer to the ENABLE_FP16_FOR_QUANTIZED_MODELS key in the [GPU Plugin documentation](supported_plugins/GPU.md).
|
||||||
|
|
||||||
## Latency vs. Throughput
|
## Latency vs. Throughput
|
||||||
One way to increase computational efficiency is batching, which combines many (potentially tens) of
|
One way to increase computational efficiency is batching, which combines many (potentially tens) of
|
||||||
|
@ -20,8 +20,8 @@
|
|||||||
* @brief Defines Inference Engine patch version
|
* @brief Defines Inference Engine patch version
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#define IE_VERSION_MAJOR 2021
|
#define IE_VERSION_MAJOR 2022
|
||||||
#define IE_VERSION_MINOR 4
|
#define IE_VERSION_MINOR 1
|
||||||
#define IE_VERSION_PATCH 0
|
#define IE_VERSION_PATCH 0
|
||||||
|
|
||||||
#include "ie_api.h"
|
#include "ie_api.h"
|
||||||
|
@ -10,13 +10,18 @@
|
|||||||
namespace GNAPluginNS {
|
namespace GNAPluginNS {
|
||||||
namespace GNALimitations {
|
namespace GNALimitations {
|
||||||
|
|
||||||
|
constexpr uint32_t bufferMaxSize = 65528;
|
||||||
|
|
||||||
constexpr uint32_t convMinFiltersNum = 4;
|
constexpr uint32_t convMinFiltersNum = 4;
|
||||||
constexpr uint32_t convMaxFiltersNum = 65532;
|
constexpr uint32_t convMaxFiltersNum = 65532;
|
||||||
constexpr uint32_t convFiltersNumDivider = 4;
|
constexpr uint32_t convFiltersNumDivider = 4;
|
||||||
|
constexpr uint32_t convFilterMaxSize = 768;
|
||||||
constexpr uint32_t convEachKernelByteAlignment = 16;
|
constexpr uint32_t convEachKernelByteAlignment = 16;
|
||||||
constexpr uint32_t noOfInputsDivisor = 8;
|
constexpr uint32_t noOfInputsDivisor = 8;
|
||||||
constexpr uint32_t noOfInputsLowPrecDivisor = 16;
|
constexpr uint32_t noOfInputsLowPrecDivisor = 16;
|
||||||
|
|
||||||
|
constexpr uint32_t affineMaxBatchSize = 8;
|
||||||
|
|
||||||
namespace Cnn2D {
|
namespace Cnn2D {
|
||||||
struct RangeLimit {
|
struct RangeLimit {
|
||||||
uint32_t min;
|
uint32_t min;
|
||||||
|
@ -370,14 +370,8 @@ class ScaleFactorPerLayer<InferenceEngine::CNNLayer *> {
|
|||||||
auto minOutValue = quantizedParams->_dst_quant.GetMinValues().front();
|
auto minOutValue = quantizedParams->_dst_quant.GetMinValues().front();
|
||||||
auto maxOutValue = quantizedParams->_dst_quant.GetMaxValues().front();
|
auto maxOutValue = quantizedParams->_dst_quant.GetMaxValues().front();
|
||||||
auto absMax = std::max(std::abs(minOutValue), std::abs(maxOutValue));
|
auto absMax = std::max(std::abs(minOutValue), std::abs(maxOutValue));
|
||||||
auto absMin = std::min(std::abs(minOutValue), std::abs(maxOutValue));
|
|
||||||
|
|
||||||
result = (quantizedParams->_dst_quant.GetLevels() - 1) / (maxOutValue - minOutValue);
|
result = (quantizedParams->_dst_quant.GetLevels() - 1) / (maxOutValue - minOutValue);
|
||||||
if (0 && fp32eq(absMin, 0.0f) && !fp32eq(absMax, 0.0f)) {
|
|
||||||
result = (quantizedParams->_dst_quant.GetLevels() - 1) / (2 * absMax);
|
|
||||||
}
|
|
||||||
//
|
|
||||||
//result = MAX_VAL_2B_FEAT / absMax;
|
|
||||||
if (std::isinf(result) || fp32eq(absMax, 0.0f)) {
|
if (std::isinf(result) || fp32eq(absMax, 0.0f)) {
|
||||||
result = max_activation_scale_factor;
|
result = max_activation_scale_factor;
|
||||||
}
|
}
|
||||||
@ -401,6 +395,7 @@ class ScaleFactorPerLayer<InferenceEngine::CNNLayer *> {
|
|||||||
(layer.isIdentity() || layer.isFakeQuantize()) && LayerInfo(prevLayer).isWeightableIdentity()) {
|
(layer.isIdentity() || layer.isFakeQuantize()) && LayerInfo(prevLayer).isWeightableIdentity()) {
|
||||||
auto prevLayerQuant = InferenceEngine::getInjectedData<QuantizedLayerParams>(*prevLayer);
|
auto prevLayerQuant = InferenceEngine::getInjectedData<QuantizedLayerParams>(*prevLayer);
|
||||||
if (!fp32eq(prevLayerQuant->_src_quant.GetScale(), 1.0f) &&
|
if (!fp32eq(prevLayerQuant->_src_quant.GetScale(), 1.0f) &&
|
||||||
|
prevLayerQuant->_src_quant.IsStatsSet() &&
|
||||||
(prevLayer2 == nullptr || LayerInfo(prevLayer2).has8BOr16BOutput())) {
|
(prevLayer2 == nullptr || LayerInfo(prevLayer2).has8BOr16BOutput())) {
|
||||||
result = prevLayerQuant->_src_quant.GetScale();
|
result = prevLayerQuant->_src_quant.GetScale();
|
||||||
usePrevScaleFactor = true;
|
usePrevScaleFactor = true;
|
||||||
|
@ -158,25 +158,27 @@ void GNAGraphCompiler::fillSplitConnections(InferenceEngine::CNNLayerPtr layer)
|
|||||||
THROW_GNA_LAYER_EXCEPTION(layer) << " outData["<< i << "]" << " connected by " << j <<" connection doesnt connect to functional layer";
|
THROW_GNA_LAYER_EXCEPTION(layer) << " outData["<< i << "]" << " connected by " << j <<" connection doesnt connect to functional layer";
|
||||||
}
|
}
|
||||||
|
|
||||||
auto dataOutput = outFunctionalLayer.first->insData[outFunctionalLayer.second].lock();
|
for (int idx : outFunctionalLayer.second) {
|
||||||
|
auto dataOutput = outFunctionalLayer.first->insData[idx].lock();
|
||||||
|
|
||||||
padding = std::max(padding, LayerInfo(outFunctionalLayer.first).paddingSize())
|
padding = std::max(padding, LayerInfo(outFunctionalLayer.first).paddingSize())
|
||||||
* dataOutput->getPrecision().size();
|
* dataOutput->getPrecision().size();
|
||||||
output_layer_size =
|
output_layer_size =
|
||||||
InferenceEngine::details::product(begin(dataOutput->getDims()),
|
InferenceEngine::details::product(begin(dataOutput->getDims()),
|
||||||
end(dataOutput->getDims())) * dataOutput->getPrecision().size();
|
end(dataOutput->getDims())) * dataOutput->getPrecision().size();
|
||||||
|
|
||||||
if (LayerInfo(outFunctionalLayer.first).isAffineFilter()) {
|
if (LayerInfo(outFunctionalLayer.first).isAffineFilter()) {
|
||||||
size_t aligned64_offset = outFunctionalLayer.first->GetParamAsInt("offset");
|
size_t aligned64_offset = outFunctionalLayer.first->GetParamAsInt("offset");
|
||||||
layerInfoItem.splitOutputLayers.emplace_back(
|
layerInfoItem.splitOutputLayers.emplace_back(
|
||||||
outFunctionalLayer.first,
|
outFunctionalLayer.first,
|
||||||
outFunctionalLayer.second,
|
idx,
|
||||||
aligned64_offset * dataOutput->getPrecision().size(),
|
aligned64_offset * dataOutput->getPrecision().size(),
|
||||||
output_layer_size);
|
output_layer_size);
|
||||||
} else {
|
} else {
|
||||||
layerInfoItem.splitOutputLayers.emplace_back(
|
layerInfoItem.splitOutputLayers.emplace_back(
|
||||||
outFunctionalLayer.first, outFunctionalLayer.second, split_size, output_layer_size);
|
outFunctionalLayer.first, idx, split_size, output_layer_size);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// in case of unconnected split - we need properly increment size
|
// in case of unconnected split - we need properly increment size
|
||||||
|
@ -155,14 +155,14 @@ inline InferenceEngine::CNNLayerPtr CNNNetPrevLayerSkipCertain(Layer layer, int
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
template <class Layer>
|
template <class Layer>
|
||||||
inline std::pair<InferenceEngine::CNNLayerPtr, int> CNNNetCheckNextLayerSkipCertain(Layer layer, int oidx, int iidx, bool bOnlyCheck,
|
inline std::pair<InferenceEngine::CNNLayerPtr, std::vector<int>> CNNNetCheckNextLayerSkipCertain(Layer layer, int oidx, int iidx, bool bOnlyCheck,
|
||||||
const std::function<bool(CNNLayerPtr)> &shouldSkip) {
|
const std::function<bool(CNNLayerPtr)> &shouldSkip) {
|
||||||
if (oidx >= layer->outData.size()) {
|
if (oidx >= layer->outData.size()) {
|
||||||
if (bOnlyCheck) return {nullptr, 0};
|
if (bOnlyCheck) return {nullptr, {}};
|
||||||
THROW_GNA_LAYER_EXCEPTION(layer) << " no next output layer for outdata: " << oidx;
|
THROW_GNA_LAYER_EXCEPTION(layer) << " no next output layer for outdata: " << oidx;
|
||||||
}
|
}
|
||||||
if (getInputTo(layer->outData[oidx]).empty() || iidx >= getInputTo(layer->outData[oidx]).size()) {
|
if (getInputTo(layer->outData[oidx]).empty() || iidx >= getInputTo(layer->outData[oidx]).size()) {
|
||||||
if (bOnlyCheck) return {nullptr, 0};
|
if (bOnlyCheck) return {nullptr, {}};
|
||||||
THROW_GNA_LAYER_EXCEPTION(layer) << " no next output layer for outdata: " << oidx << " and inputTo index: " << iidx;
|
THROW_GNA_LAYER_EXCEPTION(layer) << " no next output layer for outdata: " << oidx << " and inputTo index: " << iidx;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -174,12 +174,12 @@ inline std::pair<InferenceEngine::CNNLayerPtr, int> CNNNetCheckNextLayerSkipCer
|
|||||||
|
|
||||||
while (shouldSkip(outLayer->second)) {
|
while (shouldSkip(outLayer->second)) {
|
||||||
if (outLayer->second->outData.size() <= new_oidx) {
|
if (outLayer->second->outData.size() <= new_oidx) {
|
||||||
if (bOnlyCheck) return { nullptr, 0 };
|
if (bOnlyCheck) return { nullptr, {} };
|
||||||
THROW_GNA_LAYER_EXCEPTION(outLayer->second) << " no next output layer for outdata: " << new_oidx;
|
THROW_GNA_LAYER_EXCEPTION(outLayer->second) << " no next output layer for outdata: " << new_oidx;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (getInputTo(outLayer->second->outData[new_oidx]).size() <= new_iidx) {
|
if (getInputTo(outLayer->second->outData[new_oidx]).size() <= new_iidx) {
|
||||||
if (bOnlyCheck) return { nullptr, 0 };
|
if (bOnlyCheck) return { nullptr, {} };
|
||||||
THROW_GNA_LAYER_EXCEPTION(outLayer->second) << " no next output layer for outdata: " << new_oidx << " and inputTo index: " << new_iidx;
|
THROW_GNA_LAYER_EXCEPTION(outLayer->second) << " no next output layer for outdata: " << new_oidx << " and inputTo index: " << new_iidx;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -188,11 +188,7 @@ inline std::pair<InferenceEngine::CNNLayerPtr, int> CNNNetCheckNextLayerSkipCer
|
|||||||
}
|
}
|
||||||
|
|
||||||
auto insDataIdx = CNNLayerFindInsDataIdxes(layer->outData[new_oidx], outLayer->second);
|
auto insDataIdx = CNNLayerFindInsDataIdxes(layer->outData[new_oidx], outLayer->second);
|
||||||
if (insDataIdx.size() != 1) {
|
return { outLayer->second, insDataIdx };
|
||||||
if (bOnlyCheck) return { nullptr, 0 };
|
|
||||||
THROW_GNA_LAYER_EXCEPTION(layer) << " has multiple connection to " << new_oidx << " outData";
|
|
||||||
}
|
|
||||||
return { outLayer->second, insDataIdx.front() };
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -256,7 +252,7 @@ inline std::pair<InferenceEngine::CNNLayerPtr, int> CNNNetCheckNextLayerSkipCer
|
|||||||
|
|
||||||
/// @brief alias for strict checkNextLayer (false)
|
/// @brief alias for strict checkNextLayer (false)
|
||||||
template <class Layer>
|
template <class Layer>
|
||||||
inline std::pair<InferenceEngine::CNNLayerPtr, int> CNNNetGetNextLayerSkipCertain(Layer layer, int oidx, int iidx,
|
inline std::pair<InferenceEngine::CNNLayerPtr, std::vector<int>> CNNNetGetNextLayerSkipCertain(Layer layer, int oidx, int iidx,
|
||||||
const std::function<bool(CNNLayerPtr)> &shouldSkip) {
|
const std::function<bool(CNNLayerPtr)> &shouldSkip) {
|
||||||
return CNNNetCheckNextLayerSkipCertain(layer, oidx, iidx, false, shouldSkip);
|
return CNNNetCheckNextLayerSkipCertain(layer, oidx, iidx, false, shouldSkip);
|
||||||
}
|
}
|
||||||
|
@ -46,14 +46,10 @@ inline InferenceEngine::DataPtr Get2DReshapedData(InferenceEngine::DataPtr input
|
|||||||
* @param layer
|
* @param layer
|
||||||
*/
|
*/
|
||||||
inline bool HasTo2DReshapeData(InferenceEngine::CNNLayerPtr layer) {
|
inline bool HasTo2DReshapeData(InferenceEngine::CNNLayerPtr layer) {
|
||||||
if (GNAPluginNS::LayerInfo(layer).isPower())
|
if (GNAPluginNS::LayerInfo(layer).isPower() || GNAPluginNS::LayerInfo(layer).isCopy())
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
if (!GNAPluginNS::LayerInfo(layer).isScaleShift())
|
if (!GNAPluginNS::LayerInfo(layer).isSyntheticScaleShift())
|
||||||
return false;
|
|
||||||
|
|
||||||
// Don't reshape user-defined ScaleShift layers
|
|
||||||
if (layer->name.rfind("SyntheticScaleShift", 0) == std::string::npos)
|
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// Don't reshape the first dnn layer since it breaks groups recognition
|
// Don't reshape the first dnn layer since it breaks groups recognition
|
||||||
@ -61,8 +57,7 @@ inline bool HasTo2DReshapeData(InferenceEngine::CNNLayerPtr layer) {
|
|||||||
return LayerInfo(ptr).isNonValuesChangable();
|
return LayerInfo(ptr).isNonValuesChangable();
|
||||||
});
|
});
|
||||||
IE_ASSERT(prevLayer != nullptr);
|
IE_ASSERT(prevLayer != nullptr);
|
||||||
if (LayerInfo(prevLayer).isInput())
|
if (LayerInfo(prevLayer).isInput()) return false;
|
||||||
return false;
|
|
||||||
|
|
||||||
// Don't reshape diagonallayers with bias connection
|
// Don't reshape diagonallayers with bias connection
|
||||||
return !GNAPluginNS::LayerInfo(getCreatorLayer(layer->insData.front().lock()).lock()).has32BOutput();
|
return !GNAPluginNS::LayerInfo(getCreatorLayer(layer->insData.front().lock()).lock()).has32BOutput();
|
||||||
|
@ -17,6 +17,7 @@
|
|||||||
#include <mm_malloc.h>
|
#include <mm_malloc.h>
|
||||||
#include <serial/headers/2dot2/gna_model_header.hpp>
|
#include <serial/headers/2dot2/gna_model_header.hpp>
|
||||||
#include <serial/headers/2dot5/gna_model_header.hpp>
|
#include <serial/headers/2dot5/gna_model_header.hpp>
|
||||||
|
#include <serial/headers/2dot6/gna_model_header.hpp>
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -133,10 +134,11 @@ GNAPluginNS::HeaderLatest::ModelHeader GNAModelSerial::ReadHeader(std::istream &
|
|||||||
}
|
}
|
||||||
case 5:
|
case 5:
|
||||||
case 6:
|
case 6:
|
||||||
|
case 7:
|
||||||
readNBytes(&header, sizeof(HeaderLatest::ModelHeader), is);
|
readNBytes(&header, sizeof(HeaderLatest::ModelHeader), is);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
THROW_GNA_EXCEPTION << "Imported file unsupported. minor version should have values in range 1 to 4 and is: " << header.version.minor;
|
THROW_GNA_EXCEPTION << "Imported file unsupported. minor version should have values in range 1 to 7 and is: " << header.version.minor;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
@ -154,6 +156,40 @@ GNAPluginNS::HeaderLatest::ModelHeader GNAModelSerial::ReadHeader(std::istream &
|
|||||||
return header;
|
return header;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GNAPluginNS::HeaderLatest::RuntimeEndPoint GNAModelSerial::ReadEndPoint(std::istream &is) {
|
||||||
|
is.exceptions(std::istream::failbit);
|
||||||
|
|
||||||
|
HeaderLatest::RuntimeEndPoint endPoint;
|
||||||
|
switch (modelHeader.version.major) {
|
||||||
|
case 2:
|
||||||
|
switch (modelHeader.version.minor) {
|
||||||
|
case 1:
|
||||||
|
case 2:
|
||||||
|
case 3:
|
||||||
|
case 4:
|
||||||
|
case 5:
|
||||||
|
case 6:
|
||||||
|
{
|
||||||
|
Header2dot6::RuntimeEndPoint tempEndPoint2dot6;
|
||||||
|
readBits(tempEndPoint2dot6, is);
|
||||||
|
endPoint = HeaderLatest::RuntimeEndPoint(tempEndPoint2dot6, modelHeader.nGroup);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 7:
|
||||||
|
readNBytes(&endPoint, sizeof(HeaderLatest::RuntimeEndPoint), is);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
THROW_GNA_EXCEPTION << "Imported file unsupported. minor version should have values in range 1 to 7 and is: " << modelHeader.version.minor;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
THROW_GNA_EXCEPTION << "Imported file unsupported. Import for files with major version equal to: "
|
||||||
|
<< modelHeader.version.major << " is not implemented";
|
||||||
|
}
|
||||||
|
|
||||||
|
return endPoint;
|
||||||
|
}
|
||||||
|
|
||||||
#define offsetFromBase(field)\
|
#define offsetFromBase(field)\
|
||||||
getOffsetFromBase(field, #field)
|
getOffsetFromBase(field, #field)
|
||||||
|
|
||||||
@ -324,18 +360,6 @@ void GNAModelSerial::Import(void *basePointer,
|
|||||||
is.read(reinterpret_cast<char*>(basePointer), gnaGraphSize);
|
is.read(reinterpret_cast<char*>(basePointer), gnaGraphSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
uint32_t guessGrouping(Gna2Model const& model) {
|
|
||||||
if (model.NumberOfOperations == 0 ||
|
|
||||||
model.Operations == nullptr ||
|
|
||||||
model.Operations[0].Operands == nullptr ||
|
|
||||||
model.Operations[0].NumberOfOperands == 0 ||
|
|
||||||
model.Operations[0].Operands[0]->Shape.NumberOfDimensions < 2) {
|
|
||||||
THROW_GNA_EXCEPTION << "Can not guess grouping";
|
|
||||||
}
|
|
||||||
return (std::min)(model.Operations[0].Operands[0]->Shape.Dimensions[0], model.Operations[0].Operands[0]->Shape.Dimensions[1]);
|
|
||||||
}
|
|
||||||
|
|
||||||
void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostream & os) const {
|
void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostream & os) const {
|
||||||
os.exceptions(std::ostream::failbit);
|
os.exceptions(std::ostream::failbit);
|
||||||
|
|
||||||
@ -366,6 +390,9 @@ void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostrea
|
|||||||
out.descriptor_offset = offsetFromBase(ep.descriptor_ptr);
|
out.descriptor_offset = offsetFromBase(ep.descriptor_ptr);
|
||||||
out.scaleFactor = ep.scaleFactor;
|
out.scaleFactor = ep.scaleFactor;
|
||||||
out.element_size = ep.element_size;
|
out.element_size = ep.element_size;
|
||||||
|
out.shape = ep.shape;
|
||||||
|
out.layout = ep.layout;
|
||||||
|
out.precision = ep.precision;
|
||||||
out.orientation = ep.orientation;
|
out.orientation = ep.orientation;
|
||||||
return out;
|
return out;
|
||||||
};
|
};
|
||||||
@ -381,7 +408,7 @@ void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostrea
|
|||||||
header.headerSize = sizeof(HeaderLatest::ModelHeader);
|
header.headerSize = sizeof(HeaderLatest::ModelHeader);
|
||||||
header.gnaMemSize = gnaGraphSize;
|
header.gnaMemSize = gnaGraphSize;
|
||||||
header.layersCount = layers.size();
|
header.layersCount = layers.size();
|
||||||
header.nGroup = guessGrouping(*gna2Model);
|
header.nGroup = 1; // just to support the old models
|
||||||
header.nInputs = inputs.size();
|
header.nInputs = inputs.size();
|
||||||
header.nOutputs = outputs.size();
|
header.nOutputs = outputs.size();
|
||||||
header.nTransposeInputs = transposeInputsInfo.size();
|
header.nTransposeInputs = transposeInputsInfo.size();
|
||||||
@ -796,13 +823,22 @@ std::vector<HeaderLatest::RuntimeEndPoint> GNAModelSerial::serializeOutputs(cons
|
|||||||
std::size_t outputIndex = 0;
|
std::size_t outputIndex = 0;
|
||||||
for (auto const &output : outputsDataMap) {
|
for (auto const &output : outputsDataMap) {
|
||||||
auto outputName = output.first;
|
auto outputName = output.first;
|
||||||
auto inputDims = output.second->getTensorDesc().getDims();
|
auto outputDims = output.second->getTensorDesc().getDims();
|
||||||
uint32_t elementsCount = static_cast<uint32_t>(InferenceEngine::details::product(inputDims.begin(), inputDims.end()));
|
HeaderLatest::RuntimeEndPoint::Shape outputShape;
|
||||||
|
outputShape.NumberOfDimensions = outputDims.size();
|
||||||
|
for (size_t i=0; i < outputShape.NumberOfDimensions; ++i) {
|
||||||
|
outputShape.Dimensions[i] = static_cast<uint32_t>(outputDims[i]);
|
||||||
|
}
|
||||||
|
uint32_t elementsCount = static_cast<uint32_t>(InferenceEngine::details::product(outputDims.begin(), outputDims.end()));
|
||||||
|
InferenceEngine::Layout outputLayout = output.second->getLayout();
|
||||||
|
InferenceEngine::Precision::ePrecision outputPrecision = InferenceEngine::Precision::FP32;
|
||||||
HeaderLatest::RuntimeEndPoint endPoint(outputsDesc[outputIndex].scale_factor,
|
HeaderLatest::RuntimeEndPoint endPoint(outputsDesc[outputIndex].scale_factor,
|
||||||
outputsDesc[outputIndex].ptrs[0],
|
outputsDesc[outputIndex].ptrs[0],
|
||||||
outputsDesc[outputIndex].num_bytes_per_element,
|
outputsDesc[outputIndex].num_bytes_per_element,
|
||||||
elementsCount,
|
elementsCount,
|
||||||
|
outputShape,
|
||||||
|
outputLayout,
|
||||||
|
outputPrecision,
|
||||||
outputsDesc[outputIndex].orientation);
|
outputsDesc[outputIndex].orientation);
|
||||||
endPoints.push_back(endPoint);
|
endPoints.push_back(endPoint);
|
||||||
outputIndex++;
|
outputIndex++;
|
||||||
@ -818,18 +854,26 @@ std::vector<HeaderLatest::RuntimeEndPoint> GNAModelSerial::serializeInputs(const
|
|||||||
for (auto const& input : inputsDataMap) {
|
for (auto const& input : inputsDataMap) {
|
||||||
auto inputName = input.first;
|
auto inputName = input.first;
|
||||||
auto inputDims = input.second->getTensorDesc().getDims();
|
auto inputDims = input.second->getTensorDesc().getDims();
|
||||||
|
HeaderLatest::RuntimeEndPoint::Shape inputShape;
|
||||||
|
inputShape.NumberOfDimensions = inputDims.size();
|
||||||
|
for (size_t i=0; i < inputShape.NumberOfDimensions; ++i) {
|
||||||
|
inputShape.Dimensions[i] = static_cast<uint32_t>(inputDims[i]);
|
||||||
|
}
|
||||||
double scaleFactor = inputDesc->getScaleFactor(inputIndex);
|
double scaleFactor = inputDesc->getScaleFactor(inputIndex);
|
||||||
std::vector<void *> descriptor_ptr = inputDesc->getPtrInputsGlobal(inputName);
|
std::vector<void *> descriptor_ptr = inputDesc->getPtrInputsGlobal(inputName);
|
||||||
IE_ASSERT(descriptor_ptr.size() > 0);
|
IE_ASSERT(descriptor_ptr.size() > 0);
|
||||||
uint32_t element_size = 2u;
|
uint32_t element_size = 2u;
|
||||||
uint32_t elementsCount = static_cast<uint32_t>(InferenceEngine::details::product(inputDims.begin(), inputDims.end()));
|
uint32_t elementsCount = static_cast<uint32_t>(InferenceEngine::details::product(inputDims.begin(), inputDims.end()));
|
||||||
intel_dnn_orientation_t orientation = inputDesc->getOrientation(inputName);
|
intel_dnn_orientation_t orientation = inputDesc->getOrientation(inputName);
|
||||||
|
InferenceEngine::Layout inputLayout = input.second->getLayout();
|
||||||
|
InferenceEngine::Precision::ePrecision inputPrecision = InferenceEngine::Precision::FP32;
|
||||||
HeaderLatest::RuntimeEndPoint endPoint(scaleFactor,
|
HeaderLatest::RuntimeEndPoint endPoint(scaleFactor,
|
||||||
descriptor_ptr[0],
|
descriptor_ptr[0],
|
||||||
element_size,
|
element_size,
|
||||||
elementsCount,
|
elementsCount,
|
||||||
|
inputShape,
|
||||||
|
inputLayout,
|
||||||
|
inputPrecision,
|
||||||
orientation);
|
orientation);
|
||||||
endPoints.push_back(endPoint);
|
endPoints.push_back(endPoint);
|
||||||
inputIndex++;
|
inputIndex++;
|
||||||
@ -846,20 +890,24 @@ void GNAModelSerial::ImportInputs(std::istream &is,
|
|||||||
for (uint32_t inputIndex = 0; inputIndex < modelHeader.nInputs; inputIndex++) {
|
for (uint32_t inputIndex = 0; inputIndex < modelHeader.nInputs; inputIndex++) {
|
||||||
const std::string& name = (modelHeader.version.major == 2 && modelHeader.version.minor >= 3)
|
const std::string& name = (modelHeader.version.major == 2 && modelHeader.version.minor >= 3)
|
||||||
? inputNames.at(inputIndex) : std::string("input" + std::to_string(inputIndex));
|
? inputNames.at(inputIndex) : std::string("input" + std::to_string(inputIndex));
|
||||||
HeaderLatest::RuntimeEndPoint input;
|
|
||||||
is.read(reinterpret_cast<char *>(&input), sizeof(input));
|
HeaderLatest::RuntimeEndPoint input = ReadEndPoint(is);
|
||||||
inputsDesc->getPtrInputsGlobal(name).push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + input.descriptor_offset));
|
inputsDesc->getPtrInputsGlobal(name).push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + input.descriptor_offset));
|
||||||
inputsDesc->orientation_in[name] = input.orientation;
|
inputsDesc->orientation_in[name] = input.orientation;
|
||||||
inputsDesc->bytes_allocated_for_input[name] = input.element_size * input.elements_count;
|
inputsDesc->bytes_allocated_for_input[name] = input.element_size * input.elements_count;
|
||||||
|
|
||||||
auto inputDims = InferenceEngine::SizeVector({modelHeader.nGroup, input.elements_count / modelHeader.nGroup});
|
auto inputDims = InferenceEngine::SizeVector();
|
||||||
|
for (auto i = 0; i < input.shape.NumberOfDimensions; ++i) {
|
||||||
|
inputDims.push_back(input.shape.Dimensions[i]);
|
||||||
|
}
|
||||||
|
InferenceEngine::Layout inputLayout = static_cast<InferenceEngine::Layout>(input.layout);
|
||||||
|
InferenceEngine::Precision inputPresicion = InferenceEngine::Precision(static_cast<InferenceEngine::Precision::ePrecision>(input.precision));
|
||||||
dataMap[name] = std::make_shared<InferenceEngine::InputInfo>();
|
dataMap[name] = std::make_shared<InferenceEngine::InputInfo>();
|
||||||
dataMap[name]->setInputData(std::make_shared<InferenceEngine::Data>(name,
|
dataMap[name]->setInputData(std::make_shared<InferenceEngine::Data>(name,
|
||||||
InferenceEngine::TensorDesc(
|
InferenceEngine::TensorDesc(
|
||||||
InferenceEngine::Precision::FP32,
|
inputPresicion,
|
||||||
inputDims,
|
inputDims,
|
||||||
InferenceEngine::Layout::NC)));
|
inputLayout)));
|
||||||
inputsDesc->inputScaleFactors.push_back(input.scaleFactor);
|
inputsDesc->inputScaleFactors.push_back(input.scaleFactor);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -875,8 +923,8 @@ void GNAModelSerial::ImportOutputs(std::istream &is,
|
|||||||
for (uint32_t outputIndex = 0; outputIndex < modelHeader.nOutputs; outputIndex++) {
|
for (uint32_t outputIndex = 0; outputIndex < modelHeader.nOutputs; outputIndex++) {
|
||||||
const std::string& name = (modelHeader.version.major == 2 && modelHeader.version.minor >= 3)
|
const std::string& name = (modelHeader.version.major == 2 && modelHeader.version.minor >= 3)
|
||||||
? outputNames.at(outputIndex) : std::string("output" + std::to_string(outputIndex));
|
? outputNames.at(outputIndex) : std::string("output" + std::to_string(outputIndex));
|
||||||
HeaderLatest::RuntimeEndPoint output;
|
|
||||||
is.read(reinterpret_cast<char *>(&output), sizeof(output));
|
HeaderLatest::RuntimeEndPoint output = ReadEndPoint(is);
|
||||||
OutputDesc description;
|
OutputDesc description;
|
||||||
description.ptrs.push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + output.descriptor_offset));
|
description.ptrs.push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + output.descriptor_offset));
|
||||||
description.orientation = kDnnInterleavedOrientation;
|
description.orientation = kDnnInterleavedOrientation;
|
||||||
@ -884,12 +932,17 @@ void GNAModelSerial::ImportOutputs(std::istream &is,
|
|||||||
description.num_bytes_per_element = output.element_size;
|
description.num_bytes_per_element = output.element_size;
|
||||||
description.scale_factor = output.scaleFactor;
|
description.scale_factor = output.scaleFactor;
|
||||||
|
|
||||||
auto outputDims = InferenceEngine::SizeVector({modelHeader.nGroup, output.elements_count / modelHeader.nGroup});
|
auto outputDims = InferenceEngine::SizeVector();
|
||||||
|
for (auto i = 0; i < output.shape.NumberOfDimensions; ++i) {
|
||||||
|
outputDims.push_back(output.shape.Dimensions[i]);
|
||||||
|
}
|
||||||
|
InferenceEngine::Layout outputLayout = static_cast<InferenceEngine::Layout>(output.layout);
|
||||||
|
InferenceEngine::Precision outputPresicion = InferenceEngine::Precision(static_cast<InferenceEngine::Precision::ePrecision>(output.precision));
|
||||||
dataMap[name] = std::make_shared<InferenceEngine::Data>(name,
|
dataMap[name] = std::make_shared<InferenceEngine::Data>(name,
|
||||||
InferenceEngine::TensorDesc(
|
InferenceEngine::TensorDesc(
|
||||||
InferenceEngine::Precision::FP32,
|
outputPresicion,
|
||||||
outputDims,
|
outputDims,
|
||||||
InferenceEngine::Layout::NC));
|
outputLayout));
|
||||||
desc.at(outputIndex) = description;
|
desc.at(outputIndex) = description;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -138,6 +138,8 @@ private:
|
|||||||
*/
|
*/
|
||||||
static GNAPluginNS::HeaderLatest::ModelHeader ReadHeader(std::istream &is);
|
static GNAPluginNS::HeaderLatest::ModelHeader ReadHeader(std::istream &is);
|
||||||
|
|
||||||
|
GNAPluginNS::HeaderLatest::RuntimeEndPoint ReadEndPoint(std::istream &is);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Import model from FS into preallocated buffer,
|
* @brief Import model from FS into preallocated buffer,
|
||||||
* buffers for pLayers, and pStructs are allocated here and required manual deallocation using mm_free
|
* buffers for pLayers, and pStructs are allocated here and required manual deallocation using mm_free
|
||||||
|
@ -54,12 +54,17 @@
|
|||||||
#include <transformations/common_optimizations/pull_transpose_through_fq.hpp>
|
#include <transformations/common_optimizations/pull_transpose_through_fq.hpp>
|
||||||
#include <transformations/common_optimizations/relu_fake_quantize_fusion.hpp>
|
#include <transformations/common_optimizations/relu_fake_quantize_fusion.hpp>
|
||||||
#include <transformations/common_optimizations/add_fake_quantize_fusion.hpp>
|
#include <transformations/common_optimizations/add_fake_quantize_fusion.hpp>
|
||||||
|
#include <transformations/utils/utils.hpp>
|
||||||
|
|
||||||
#include "transformations/remove_extra_reshapes.hpp"
|
#include "transformations/remove_extra_reshapes.hpp"
|
||||||
#include "transformations/insert_transpose_after_convolution_or_pooling.hpp"
|
#include "transformations/insert_transpose_after_convolution_or_pooling.hpp"
|
||||||
#include "transformations/insert_transpose_before_matmul.hpp"
|
#include "transformations/insert_transpose_before_matmul.hpp"
|
||||||
#include "transformations/reorder_activation_and_pooling.hpp"
|
#include "transformations/reorder_activation_and_pooling.hpp"
|
||||||
#include "transformations/swap_input_matmul_gna.hpp"
|
#include "transformations/swap_input_matmul_gna.hpp"
|
||||||
|
#include "transformations/convert_matmul_to_pointwise_convolution.hpp"
|
||||||
|
#include "transformations/split_convolution_with_large_buffer_size.hpp"
|
||||||
|
|
||||||
|
#include <ngraph/opsets/opset7.hpp>
|
||||||
|
|
||||||
#if GNA_LIB_VER == 2
|
#if GNA_LIB_VER == 2
|
||||||
#include <gna2-model-api.h>
|
#include <gna2-model-api.h>
|
||||||
@ -667,6 +672,15 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
|
|||||||
// WA: ConvertPriorBox must be executed before the 1st ConstantFolding pass
|
// WA: ConvertPriorBox must be executed before the 1st ConstantFolding pass
|
||||||
manager.register_pass<ngraph::pass::ConvertPriorBox>();
|
manager.register_pass<ngraph::pass::ConvertPriorBox>();
|
||||||
manager.register_pass<ngraph::pass::CommonOptimizations>();
|
manager.register_pass<ngraph::pass::CommonOptimizations>();
|
||||||
|
// TODO enable this transformation for networks with convolutions
|
||||||
|
if (!ngraph::op::util::has_op_with_type<ngraph::opset7::Convolution>(graph)) {
|
||||||
|
manager.register_pass<ConvertMatmulWithFqToPointWiseConvolution>();
|
||||||
|
manager.register_pass<ConvertMatmulWithBiasToPointWiseConvolution>();
|
||||||
|
manager.register_pass<ConvertMatmulToPointWiseConvolution>();
|
||||||
|
}
|
||||||
|
manager.register_pass<SplitConvolutionWithFq>();
|
||||||
|
manager.register_pass<SplitConvolutionWithBias>();
|
||||||
|
manager.register_pass<SplitConvolution>();
|
||||||
manager.register_pass<InsertTransposeBeforeMatmul>();
|
manager.register_pass<InsertTransposeBeforeMatmul>();
|
||||||
manager.register_pass<SwapInputMatMul>();
|
manager.register_pass<SwapInputMatMul>();
|
||||||
manager.register_pass<InsertTransposeAfterConvOrPool>();
|
manager.register_pass<InsertTransposeAfterConvOrPool>();
|
||||||
@ -735,6 +749,7 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
|
|||||||
passes->registerPass<SubstitutePReluPass>();
|
passes->registerPass<SubstitutePReluPass>();
|
||||||
passes->registerPass<SubstituteSoftSignPass>();
|
passes->registerPass<SubstituteSoftSignPass>();
|
||||||
|
|
||||||
|
passes->registerPass<BroadcastConstPass>();
|
||||||
passes->registerPass<ReorderMaxPoolPass>();
|
passes->registerPass<ReorderMaxPoolPass>();
|
||||||
passes->registerPass<EltwiseSplitOverChannelsPass>();
|
passes->registerPass<EltwiseSplitOverChannelsPass>();
|
||||||
passes->registerPass<InsertSplitAligningFilterPass>();
|
passes->registerPass<InsertSplitAligningFilterPass>();
|
||||||
@ -753,7 +768,6 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
|
|||||||
|
|
||||||
passes->registerPass<InsertIdentityLayerPass>();
|
passes->registerPass<InsertIdentityLayerPass>();
|
||||||
passes->registerPass<BreakFusingOfOutputLayersPass>();
|
passes->registerPass<BreakFusingOfOutputLayersPass>();
|
||||||
passes->registerPass<BroadcastConstPass>();
|
|
||||||
passes->registerPass<InsertDiagonalLayerPass>();
|
passes->registerPass<InsertDiagonalLayerPass>();
|
||||||
passes->registerPass<HandleMultipleActivationsForTheLayerPass>();
|
passes->registerPass<HandleMultipleActivationsForTheLayerPass>();
|
||||||
#if GNA_LIB_VER == 2
|
#if GNA_LIB_VER == 2
|
||||||
@ -1465,7 +1479,11 @@ static InferenceEngine::Layout GetLayoutForDims(const InferenceEngine::SizeVecto
|
|||||||
Blob::Ptr GNAPlugin::GetOutputBlob(const std::string& name, InferenceEngine::Precision precision) {
|
Blob::Ptr GNAPlugin::GetOutputBlob(const std::string& name, InferenceEngine::Precision precision) {
|
||||||
// need to have intermediate blob for interleave conversion
|
// need to have intermediate blob for interleave conversion
|
||||||
InferenceEngine::Blob::Ptr outputBlob;
|
InferenceEngine::Blob::Ptr outputBlob;
|
||||||
auto outputDims = outputsDataMap[name]->getTensorDesc().getDims();
|
auto outputDataIt = outputsDataMap.find(name);
|
||||||
|
if (outputDataIt == std::end(outputsDataMap)) {
|
||||||
|
THROW_GNA_EXCEPTION << "Output " << name << " isn't found";
|
||||||
|
}
|
||||||
|
auto outputDims = outputDataIt->second->getTensorDesc().getDims();
|
||||||
outputBlob = make_blob_with_precision(TensorDesc(precision, outputDims, GetLayoutForDims(outputDims)));
|
outputBlob = make_blob_with_precision(TensorDesc(precision, outputDims, GetLayoutForDims(outputDims)));
|
||||||
outputBlob->allocate();
|
outputBlob->allocate();
|
||||||
return outputBlob;
|
return outputBlob;
|
||||||
@ -1475,7 +1493,11 @@ Blob::Ptr GNAPlugin::GetInputBlob(const std::string& name, InferenceEngine::Prec
|
|||||||
InferenceEngine::Blob::Ptr inputBlob;
|
InferenceEngine::Blob::Ptr inputBlob;
|
||||||
// need to have intermediate blob for interleave conversion
|
// need to have intermediate blob for interleave conversion
|
||||||
// TODO: NCHW format support is experimental = c++ MO did insert reshape, while TF mo - not
|
// TODO: NCHW format support is experimental = c++ MO did insert reshape, while TF mo - not
|
||||||
auto inputDims = inputsDataMap[name]->getTensorDesc().getDims();
|
auto inputDataIt = inputsDataMap.find(name);
|
||||||
|
if (inputDataIt == std::end(inputsDataMap)) {
|
||||||
|
THROW_GNA_EXCEPTION << "Input " << name << " isn't found";
|
||||||
|
}
|
||||||
|
auto inputDims = inputDataIt->second->getTensorDesc().getDims();
|
||||||
inputBlob = make_blob_with_precision(TensorDesc(precision, inputDims, GetLayoutForDims(inputDims)));
|
inputBlob = make_blob_with_precision(TensorDesc(precision, inputDims, GetLayoutForDims(inputDims)));
|
||||||
inputBlob->allocate();
|
inputBlob->allocate();
|
||||||
return inputBlob;
|
return inputBlob;
|
||||||
|
@ -86,7 +86,7 @@ static void insertDiagonalLayerBetween(InferenceEngine::CNNLayerPtr prevLayer,
|
|||||||
});
|
});
|
||||||
IE_ASSERT(inputLayer != nullptr);
|
IE_ASSERT(inputLayer != nullptr);
|
||||||
size_t weightsSize = (LayerInfo(prevLayer).has32BOutput() || LayerInfo(inputLayer).isInput()) ?
|
size_t weightsSize = (LayerInfo(prevLayer).has32BOutput() || LayerInfo(inputLayer).isInput()) ?
|
||||||
weightsSize = nextLayer->outData[0]->getDims().back() :
|
nextLayer->outData[0]->getDims().back() :
|
||||||
Get2DReshapedData(nextLayer->outData[0], 8)->getDims()[1];
|
Get2DReshapedData(nextLayer->outData[0], 8)->getDims()[1];
|
||||||
std::vector<float> weightsValues(weightsSize, fillValue);
|
std::vector<float> weightsValues(weightsSize, fillValue);
|
||||||
IE_ASSERT(diagLayer != nullptr);
|
IE_ASSERT(diagLayer != nullptr);
|
||||||
@ -314,6 +314,7 @@ void HandleMultipleActivationsForTheLayerPass::run() {
|
|||||||
LayerInfo info(inputTo.second);
|
LayerInfo info(inputTo.second);
|
||||||
|
|
||||||
if (info.isActivation()) {
|
if (info.isActivation()) {
|
||||||
|
if (odata->getDims().empty()) continue;
|
||||||
if (!activations.empty() && odata->getDims()[0] != 1) {
|
if (!activations.empty() && odata->getDims()[0] != 1) {
|
||||||
THROW_GNA_EXCEPTION << "Unsupported batch size " << odata->getDims()[0]
|
THROW_GNA_EXCEPTION << "Unsupported batch size " << odata->getDims()[0]
|
||||||
<< " for diagonal layer insertion";
|
<< " for diagonal layer insertion";
|
||||||
@ -741,12 +742,17 @@ void RemovePermutationsNHWCToNCHWPass::run() {
|
|||||||
IE_ASSERT(!input_to.empty());
|
IE_ASSERT(!input_to.empty());
|
||||||
auto current_layer = input_to.begin()->second;
|
auto current_layer = input_to.begin()->second;
|
||||||
setNHWCOrder(current_layer->input());
|
setNHWCOrder(current_layer->input());
|
||||||
while (current_layer != pattern_end) {
|
std::function<void(CNNLayerPtr)> propogateNHWCOrderRecursive =
|
||||||
setNHWCOrder(current_layer->outData[0]);
|
[pattern_end, &propogateNHWCOrderRecursive, &setNHWCOrder](CNNLayerPtr current_layer) {
|
||||||
input_to = getInputTo(current_layer->outData[0]);
|
if (current_layer == pattern_end) return;
|
||||||
IE_ASSERT(!input_to.empty());
|
for (size_t i = 0; i < current_layer->outData.size(); ++i) {
|
||||||
current_layer = input_to.begin()->second;
|
setNHWCOrder(current_layer->outData[i]);
|
||||||
}
|
auto input_to = getInputTo(current_layer->outData[i]);
|
||||||
|
IE_ASSERT(!input_to.empty());
|
||||||
|
propogateNHWCOrderRecursive(input_to.begin()->second);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
propogateNHWCOrderRecursive(current_layer);
|
||||||
|
|
||||||
if (LayerInfo(pattern_start).isPermute() && !getInputTo(pattern_start->outData.front()).empty()) {
|
if (LayerInfo(pattern_start).isPermute() && !getInputTo(pattern_start->outData.front()).empty()) {
|
||||||
auto layer_before_permute = CNNNetPrevLayer(pattern_start);
|
auto layer_before_permute = CNNNetPrevLayer(pattern_start);
|
||||||
@ -1447,21 +1453,19 @@ void EltwiseSplitOverChannelsPass::run() {
|
|||||||
THROW_GNA_LAYER_EXCEPTION(l) << "number of outputs expected to be 1";
|
THROW_GNA_LAYER_EXCEPTION(l) << "number of outputs expected to be 1";
|
||||||
}
|
}
|
||||||
auto oData = l->outData.front();
|
auto oData = l->outData.front();
|
||||||
|
auto out_width = GetDataDimSize(oData, DataDimName::W);
|
||||||
auto totalElementsForOutput = details::product(oData->getDims().begin(), oData->getDims().end());
|
auto totalElementsForOutput = details::product(oData->getDims().begin(), oData->getDims().end());
|
||||||
auto maxAffineElements = getPassManager()->getPolicy().GNAAffineDiagonalPolicy.limitedTo;
|
auto maxAffineElements = getPassManager()->getPolicy().GNAAffineDiagonalPolicy.limitedTo;
|
||||||
if (totalElementsForOutput <= maxAffineElements) {
|
if (totalElementsForOutput <= maxAffineElements) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: for now lets put split of 2 elements as restrictions
|
|
||||||
auto totalSplits = 1 + totalElementsForOutput / maxAffineElements;
|
auto totalSplits = 1 + totalElementsForOutput / maxAffineElements;
|
||||||
if (totalSplits > 2) {
|
|
||||||
THROW_GNA_LAYER_EXCEPTION(l) << "split layer over output channels on more than 2 layers unsupported";
|
|
||||||
}
|
|
||||||
|
|
||||||
pass_trace() << "transforming " << LAYER_NAME(l) << " by splitting it to multiple eltwise operations\n";
|
pass_trace() << "transforming " << LAYER_NAME(l) << " by splitting it to multiple eltwise operations\n";
|
||||||
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(l);
|
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(l);
|
||||||
|
|
||||||
|
bool sameInputs = l->insData[0].lock() == l->insData[1].lock();
|
||||||
std::vector<CNNLayerPtr> splitLayers(2);
|
std::vector<CNNLayerPtr> splitLayers(2);
|
||||||
for (size_t kThEltwiseInput = 0; kThEltwiseInput != 2; kThEltwiseInput++) {
|
for (size_t kThEltwiseInput = 0; kThEltwiseInput != 2; kThEltwiseInput++) {
|
||||||
// create split layer
|
// create split layer
|
||||||
@ -1472,31 +1476,38 @@ void EltwiseSplitOverChannelsPass::run() {
|
|||||||
|
|
||||||
split->insData.push_back(l->insData[kThEltwiseInput]);
|
split->insData.push_back(l->insData[kThEltwiseInput]);
|
||||||
auto inputDesc = l->insData[kThEltwiseInput].lock()->getTensorDesc();
|
auto inputDesc = l->insData[kThEltwiseInput].lock()->getTensorDesc();
|
||||||
// need to split this desc
|
|
||||||
if (inputDesc.getLayout() != Layout::NC) {
|
|
||||||
THROW_GNA_LAYER_EXCEPTION(l)
|
|
||||||
<< "cannot split over channel: input " << std::to_string(kThEltwiseInput)
|
|
||||||
<< " layout need to be NC";
|
|
||||||
}
|
|
||||||
|
|
||||||
// create split layer outputs
|
// create split layer outputs
|
||||||
for (size_t i = 0;; i++) {
|
size_t usedElements = 0;
|
||||||
auto elements_num = std::min(totalElementsForOutput - i * maxAffineElements,
|
for (size_t i = 0; i < totalSplits; i++) {
|
||||||
|
SizeVector newDims;
|
||||||
|
size_t elements_num = std::min(totalElementsForOutput - usedElements,
|
||||||
static_cast<size_t>(maxAffineElements));
|
static_cast<size_t>(maxAffineElements));
|
||||||
|
if (inputDesc.getDims().size() == 2) {
|
||||||
|
newDims = SizeVector{1, elements_num};
|
||||||
|
} else {
|
||||||
|
elements_num = elements_num - elements_num % out_width;
|
||||||
|
newDims = SizeVector{1, elements_num / out_width, out_width};
|
||||||
|
}
|
||||||
|
|
||||||
SizeVector newDims = {1, elements_num};
|
|
||||||
auto newDesc = TensorDesc(inputDesc.getPrecision(), newDims, inputDesc.getLayout());
|
auto newDesc = TensorDesc(inputDesc.getPrecision(), newDims, inputDesc.getLayout());
|
||||||
auto data = std::make_shared<Data>(l->name + "/" + std::to_string(kThEltwiseInput) + "/1", newDesc);
|
auto data = std::make_shared<Data>(l->name + "/" + std::to_string(kThEltwiseInput) + "/1", newDesc);
|
||||||
getCreatorLayer(data) = split;
|
getCreatorLayer(data) = split;
|
||||||
split->outData.push_back(data);
|
split->outData.push_back(data);
|
||||||
|
|
||||||
if (elements_num != maxAffineElements) {
|
usedElements += elements_num;
|
||||||
|
if (usedElements == totalElementsForOutput) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// replacing connection X->eltwise to X->split
|
// replacing connection X->eltwise to X->split
|
||||||
auto oData = CNNLayerFindOutData(l, kThEltwiseInput);
|
auto oData = CNNLayerFindOutData(l, kThEltwiseInput);
|
||||||
oData.second->second = split;
|
oData.second->second = split;
|
||||||
|
|
||||||
|
if (sameInputs) {
|
||||||
|
splitLayers[1] = splitLayers[0];
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// create concatlayer
|
// create concatlayer
|
||||||
@ -1507,8 +1518,6 @@ void EltwiseSplitOverChannelsPass::run() {
|
|||||||
concat->outData.push_back(masterEltwise->outData.front());
|
concat->outData.push_back(masterEltwise->outData.front());
|
||||||
getCreatorLayer(masterEltwise->outData.front()) = concat;
|
getCreatorLayer(masterEltwise->outData.front()) = concat;
|
||||||
|
|
||||||
|
|
||||||
// create new eltwise layers - here 2 hardcode
|
|
||||||
for (size_t k = 0; k != totalSplits; k++) {
|
for (size_t k = 0; k != totalSplits; k++) {
|
||||||
auto eltwiseRaw = std::make_shared<EltwiseLayer>(
|
auto eltwiseRaw = std::make_shared<EltwiseLayer>(
|
||||||
LayerParams{l->name + "/eltwise/" + std::to_string(k), "Eltwise", Precision::FP32});
|
LayerParams{l->name + "/eltwise/" + std::to_string(k), "Eltwise", Precision::FP32});
|
||||||
@ -1517,7 +1526,6 @@ void EltwiseSplitOverChannelsPass::run() {
|
|||||||
eltwiseRaw->coeff = masterEltwise->coeff;
|
eltwiseRaw->coeff = masterEltwise->coeff;
|
||||||
auto eltwise = quantized ? InferenceEngine::injectData<QuantizedLayerParams>(eltwiseRaw) : eltwiseRaw;
|
auto eltwise = quantized ? InferenceEngine::injectData<QuantizedLayerParams>(eltwiseRaw) : eltwiseRaw;
|
||||||
|
|
||||||
|
|
||||||
eltwise->insData.push_back(splitLayers[0]->outData[k]);
|
eltwise->insData.push_back(splitLayers[0]->outData[k]);
|
||||||
eltwise->insData.push_back(splitLayers[1]->outData[k]);
|
eltwise->insData.push_back(splitLayers[1]->outData[k]);
|
||||||
getInputTo(splitLayers[0]->outData[k])[eltwise->name] = eltwise;
|
getInputTo(splitLayers[0]->outData[k])[eltwise->name] = eltwise;
|
||||||
@ -1529,6 +1537,15 @@ void EltwiseSplitOverChannelsPass::run() {
|
|||||||
auto data = std::make_shared<Data>(l->name + "/elwise/out/" + std::to_string(k), newDesc);
|
auto data = std::make_shared<Data>(l->name + "/elwise/out/" + std::to_string(k), newDesc);
|
||||||
getCreatorLayer(data) = eltwise;
|
getCreatorLayer(data) = eltwise;
|
||||||
eltwise->outData.push_back(data);
|
eltwise->outData.push_back(data);
|
||||||
|
if (quantized) {
|
||||||
|
auto eltwiseQuant = InferenceEngine::getInjectedData<QuantizedLayerParams>(eltwise);
|
||||||
|
if (quantized->_src_quant.IsStatsSet()) {
|
||||||
|
eltwiseQuant->_src_quant.CopyStats(quantized->_src_quant);
|
||||||
|
}
|
||||||
|
if (quantized->_dst_quant.IsStatsSet()) {
|
||||||
|
eltwiseQuant->_dst_quant.CopyStats(quantized->_dst_quant);
|
||||||
|
}
|
||||||
|
}
|
||||||
getInputTo(data)[concat->name] = concat;
|
getInputTo(data)[concat->name] = concat;
|
||||||
concat->insData.push_back(data);
|
concat->insData.push_back(data);
|
||||||
}
|
}
|
||||||
@ -1919,13 +1936,20 @@ void FuseFQIntoWeightsPass::run() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
GNAFakeQuantizeLayer gnaFakeQuantizeLayer(fqLayer);
|
GNAFakeQuantizeLayer gnaFakeQuantizeLayer(fqLayer);
|
||||||
size_t layers_connected_to_fq_count = getInputTo(fqLayer->outData[0]).size();
|
auto inputTo = getInputTo(fqLayer->outData[0]);
|
||||||
|
size_t layers_connected_to_fq_count = inputTo.size();
|
||||||
|
auto layerBeforeWeightable = fqLayer;
|
||||||
|
while (layers_connected_to_fq_count == 1 && LayerInfo(inputTo.begin()->second).isNonFunctional()) {
|
||||||
|
layerBeforeWeightable = inputTo.begin()->second;
|
||||||
|
inputTo = getInputTo(layerBeforeWeightable->outData[0]);
|
||||||
|
layers_connected_to_fq_count = inputTo.size();
|
||||||
|
}
|
||||||
for (int index = 0; index < layers_connected_to_fq_count; index++) {
|
for (int index = 0; index < layers_connected_to_fq_count; index++) {
|
||||||
auto weightableLayer = CNNNetGetNextLayerSkipCertain(fqLayer, 0, index, isNonFunctional).first;
|
auto weightableLayer = CNNNetGetNextLayerSkipCertain(layerBeforeWeightable, 0, index, isNonFunctional).first;
|
||||||
if (!LayerInfo(weightableLayer).isWeightable()) {
|
if (!LayerInfo(weightableLayer).isWeightable()) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (weightableLayer->insData.size() != 3) {
|
if (weightableLayer->insData.size() < 2) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1942,7 +1966,8 @@ void FuseFQIntoWeightsPass::run() {
|
|||||||
pass_trace() << "found " << LAYER_NAME(fqLayer) << " that will be converted to weights of "
|
pass_trace() << "found " << LAYER_NAME(fqLayer) << " that will be converted to weights of "
|
||||||
<< LAYER_NAME(weightableLayer) << "\n";
|
<< LAYER_NAME(weightableLayer) << "\n";
|
||||||
|
|
||||||
auto biases = LayerUtils::getParamFromInputAsBlob(weightableLayer, biasesIdx);
|
auto biases = weightableLayer->insData.size() == 3 ?
|
||||||
|
LayerUtils::getParamFromInputAsBlob(weightableLayer, biasesIdx) : nullptr;
|
||||||
auto quantizedWeights = gnaFakeQuantizeLayer.getConstInputData();
|
auto quantizedWeights = gnaFakeQuantizeLayer.getConstInputData();
|
||||||
|
|
||||||
// 1. broke existing connections - by detaching fq subgraph from rest of graph
|
// 1. broke existing connections - by detaching fq subgraph from rest of graph
|
||||||
@ -2149,8 +2174,11 @@ void MoveFakeQuantizeLayerIntoQuantParamsPass :: run() {
|
|||||||
}
|
}
|
||||||
GNAFakeQuantizeLayer fqLayer(l);
|
GNAFakeQuantizeLayer fqLayer(l);
|
||||||
auto prevLayer = CNNNetPrevLayerSkipCertain(*fqLayer, 0, donotSkip);
|
auto prevLayer = CNNNetPrevLayerSkipCertain(*fqLayer, 0, donotSkip);
|
||||||
if (prevLayer->outData.size() != 1) {
|
auto prevDataIt = std::find_if(std::begin(prevLayer->outData), std::end(prevLayer->outData), [l](DataPtr data) {
|
||||||
THROW_GNA_LAYER_EXCEPTION(prevLayer) << " fake quantize input that connected to something else not supported";
|
return getInputTo(data).find(l->name) != std::end(getInputTo(data));
|
||||||
|
});
|
||||||
|
if (prevDataIt == std::end(prevLayer->outData)) {
|
||||||
|
THROW_GNA_LAYER_EXCEPTION(fqLayer) << "Invalid connection between " << prevLayer->name << " and " << l->name;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto inputRange = fqLayer.getInputRange();
|
auto inputRange = fqLayer.getInputRange();
|
||||||
@ -2181,8 +2209,18 @@ void MoveFakeQuantizeLayerIntoQuantParamsPass :: run() {
|
|||||||
quantParamsPrevLayer->_dst_quant.SetMinValues({ outputRange.first[0] }, false);
|
quantParamsPrevLayer->_dst_quant.SetMinValues({ outputRange.first[0] }, false);
|
||||||
quantParamsPrevLayer->_dst_quant.SetMaxValues({ outputRange.second[0] }, false);
|
quantParamsPrevLayer->_dst_quant.SetMaxValues({ outputRange.second[0] }, false);
|
||||||
|
|
||||||
|
// Propogate destination statistics to multiply layer if it's set for the next sum/sub layer (is considered as bias)
|
||||||
|
if (LayerInfo(prevLayer).isEltwiseSum() || LayerInfo(prevLayer).isEltwiseSub()) {
|
||||||
|
auto eltwPrevLayer = CNNNetPrevLayerSkipCertain(prevLayer, 0, donotSkip);
|
||||||
|
auto constLayer = CNNNetPrevLayerSkipCertain(prevLayer, 1, donotSkip);
|
||||||
|
if (LayerInfo(eltwPrevLayer).isEltwise() && LayerInfo(constLayer).isConst()) {
|
||||||
|
auto quantParamsEltwLayer = InferenceEngine::getInjectedData<QuantizedLayerParams>(eltwPrevLayer);
|
||||||
|
quantParamsEltwLayer->_dst_quant.CopyStats(quantParamsPrevLayer->_dst_quant);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
auto fqQauntParams = InferenceEngine::getInjectedData<QuantizedLayerParams>(l);
|
auto fqQauntParams = InferenceEngine::getInjectedData<QuantizedLayerParams>(l);
|
||||||
fqQauntParams->_dst_quant.SetLevels(fqLevels);
|
fqQauntParams->_dst_quant.SetLevels(UINT16_MAX);
|
||||||
fqQauntParams->_dst_quant.SetMinValues({ inputRange.first[0] }, true);
|
fqQauntParams->_dst_quant.SetMinValues({ inputRange.first[0] }, true);
|
||||||
fqQauntParams->_dst_quant.SetMaxValues({ inputRange.second[0] }, true);
|
fqQauntParams->_dst_quant.SetMaxValues({ inputRange.second[0] }, true);
|
||||||
fqQauntParams->_dst_quant.SetMinValues({ outputRange.first[0] }, false);
|
fqQauntParams->_dst_quant.SetMinValues({ outputRange.first[0] }, false);
|
||||||
@ -2198,7 +2236,7 @@ void MoveFakeQuantizeLayerIntoQuantParamsPass :: run() {
|
|||||||
// FQ Layer is fused only when previous layer is const, memory or activation layer
|
// FQ Layer is fused only when previous layer is const, memory or activation layer
|
||||||
// or a next layer is activation layer.
|
// or a next layer is activation layer.
|
||||||
bool isFQFuseAllowed = allowFQFuse(l);
|
bool isFQFuseAllowed = allowFQFuse(l);
|
||||||
auto prevData = prevLayer->outData.front();
|
auto prevData = *prevDataIt;
|
||||||
|
|
||||||
// Find all output layers connected to FQ
|
// Find all output layers connected to FQ
|
||||||
auto nextLayers = CNNNetGetAllNextLayersSkipCertain(*fqLayer, -1, donotSkip);
|
auto nextLayers = CNNNetGetAllNextLayersSkipCertain(*fqLayer, -1, donotSkip);
|
||||||
@ -2207,7 +2245,7 @@ void MoveFakeQuantizeLayerIntoQuantParamsPass :: run() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (isFQFuseAllowed) {
|
if (isFQFuseAllowed) {
|
||||||
getInputTo(prevLayer->outData.front()).clear();
|
getInputTo(prevData).clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Connect all next layers after FQ to the layer that is before FQ
|
// Connect all next layers after FQ to the layer that is before FQ
|
||||||
@ -2222,7 +2260,7 @@ void MoveFakeQuantizeLayerIntoQuantParamsPass :: run() {
|
|||||||
for (int insDataIdx : insDatas) {
|
for (int insDataIdx : insDatas) {
|
||||||
nextLayers[i]->insData[insDataIdx] = prevData;
|
nextLayers[i]->insData[insDataIdx] = prevData;
|
||||||
}
|
}
|
||||||
getInputTo(prevLayer->outData.front())[nextLayers[i]->name] = nextLayers[i];
|
getInputTo(prevData)[nextLayers[i]->name] = nextLayers[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
propagateStatistics(quantParamsPrevLayer, nextLayers[i]);
|
propagateStatistics(quantParamsPrevLayer, nextLayers[i]);
|
||||||
|
@ -0,0 +1,197 @@
|
|||||||
|
// Copyright (C) 2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <cstdint>
|
||||||
|
#include <map>
|
||||||
|
#include "backend/dnn_types.h"
|
||||||
|
#include "serial/headers/2dot4/gna_model_header.hpp"
|
||||||
|
#include "serial/headers/2dot6/gna_model_header.hpp"
|
||||||
|
#include "serial/headers/latest/gna_model_header.hpp"
|
||||||
|
#include "gna_data_types.hpp"
|
||||||
|
|
||||||
|
#pragma pack(push, 1)
|
||||||
|
|
||||||
|
namespace GNAPluginNS {
|
||||||
|
namespace Header2dot7 {
|
||||||
|
|
||||||
|
/**
|
||||||
|
Maximal number of supported shape dimensions.
|
||||||
|
*/
|
||||||
|
#define GNA_SHAPE_MAXIMUM_NUMBER_OF_DIMENSIONS 8
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Header version 2.7
|
||||||
|
*/
|
||||||
|
struct ModelHeader {
|
||||||
|
/**
|
||||||
|
*@brief MagicNumber – GNAM in ascii table, equals to hex 0x474e414d
|
||||||
|
*/
|
||||||
|
char gnam[4] = {};
|
||||||
|
/**
|
||||||
|
* @brief if header size is not equal to sizeof ModelHeader - some reserved data append in the end of header
|
||||||
|
* usually it is an indicator of working with version of model different that is current export function produce
|
||||||
|
*/
|
||||||
|
uint32_t headerSize = 0u;
|
||||||
|
struct Version {
|
||||||
|
/**
|
||||||
|
* @details Version of format Major – unsigned int, ex: 0x0001
|
||||||
|
* every change in the header or in the layers definition should be reflected in version change
|
||||||
|
* for backward compatibility new parsers can read old versions of model with certain restrictions
|
||||||
|
*/
|
||||||
|
uint16_t major = 2u;
|
||||||
|
/**
|
||||||
|
* @details Version of Format Minor – unsigned int, corresponding to build revision for example
|
||||||
|
* changes in minor version are not affected layout of model
|
||||||
|
*/
|
||||||
|
uint32_t minor = 7u;
|
||||||
|
} version;
|
||||||
|
/**
|
||||||
|
* @brief Memory required to be allocated using GNAAlloc()
|
||||||
|
*/
|
||||||
|
uint64_t gnaMemSize = 0ull;
|
||||||
|
/**
|
||||||
|
* @brief Number of GNA Layers
|
||||||
|
*/
|
||||||
|
uint64_t layersCount = 0ull;
|
||||||
|
/**
|
||||||
|
* @brief Grouping level
|
||||||
|
* This is depricted field and used for old models only (<=2.6)
|
||||||
|
*/
|
||||||
|
uint32_t nGroup = 0u;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convolution related setting - they are affecting input transformation
|
||||||
|
*/
|
||||||
|
uint32_t nRotateRows = 0u;
|
||||||
|
uint32_t nRotateColumns = 0u;
|
||||||
|
bool doRotateInput = false;
|
||||||
|
|
||||||
|
uint32_t nInputs = 0u;
|
||||||
|
uint32_t nOutputs = 0u;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convolution related setting - they are affecting output transformation
|
||||||
|
*/
|
||||||
|
uint32_t nRotateOutputRows = 0u;
|
||||||
|
uint32_t nRotateOutputColumns = 0u;
|
||||||
|
bool doRotateOutput = false;
|
||||||
|
|
||||||
|
uint32_t nTransposeInputs = 0u;
|
||||||
|
uint32_t nTransposeOutputs = 0u;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reserved Data might be here
|
||||||
|
*/
|
||||||
|
ModelHeader() = default;
|
||||||
|
ModelHeader(GNAPluginNS::Header2dot1::ModelHeader const &old) {
|
||||||
|
gnaMemSize = old.gnaMemSize;
|
||||||
|
layersCount = old.layersCount;
|
||||||
|
nGroup = old.nGroup;
|
||||||
|
nRotateRows = old.nRotateRows;
|
||||||
|
nRotateColumns = old.nRotateColumns;
|
||||||
|
nInputs = old.nInputs;
|
||||||
|
nOutputs = old.nOutputs;
|
||||||
|
version.minor = old.version.minor;
|
||||||
|
}
|
||||||
|
ModelHeader(GNAPluginNS::Header2dot4::ModelHeader const &old) {
|
||||||
|
gnaMemSize = old.gnaMemSize;
|
||||||
|
layersCount = old.layersCount;
|
||||||
|
nGroup = old.nGroup;
|
||||||
|
nRotateRows = old.nRotateRows;
|
||||||
|
nRotateColumns = old.nRotateColumns;
|
||||||
|
nInputs = old.nInputs;
|
||||||
|
nOutputs = old.nOutputs;
|
||||||
|
nRotateOutputRows = old.nRotateOutputRows;
|
||||||
|
nRotateOutputColumns = old.nRotateOutputColumns;
|
||||||
|
doRotateOutput = old.doRotateOutput;
|
||||||
|
version.minor = old.version.minor;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
#pragma pack(pop)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In runtime endpoint mostly same as in serial version, except of descriptor field
|
||||||
|
*/
|
||||||
|
struct RuntimeEndPoint {
|
||||||
|
/**
|
||||||
|
* if scale factor is different then pased into infer , network might need to be requantized
|
||||||
|
*/
|
||||||
|
float scaleFactor = 0;
|
||||||
|
/**
|
||||||
|
* Pointer descriptor
|
||||||
|
*/
|
||||||
|
void* descriptor_ptr = nullptr;
|
||||||
|
/**
|
||||||
|
* Endpoint resolution in bytes.
|
||||||
|
*/
|
||||||
|
uint32_t element_size = 0;
|
||||||
|
/**
|
||||||
|
* Number of elements
|
||||||
|
*/
|
||||||
|
uint32_t elements_count = 0;
|
||||||
|
/**
|
||||||
|
* Offset in bytes of pointer descriptor
|
||||||
|
*/
|
||||||
|
uint64_t descriptor_offset = 0ull;
|
||||||
|
/**
|
||||||
|
Shape specifying dimension values.
|
||||||
|
*/
|
||||||
|
struct Shape {
|
||||||
|
/**
|
||||||
|
Number of dimensions or rank or order.
|
||||||
|
*/
|
||||||
|
uint32_t NumberOfDimensions = 0;
|
||||||
|
/**
|
||||||
|
array specifying value of each dimension.
|
||||||
|
Set all zeros for scalars.
|
||||||
|
*/
|
||||||
|
uint32_t Dimensions[GNA_SHAPE_MAXIMUM_NUMBER_OF_DIMENSIONS] = {0};
|
||||||
|
} shape;
|
||||||
|
/**
|
||||||
|
* Blob layout
|
||||||
|
*/
|
||||||
|
uint8_t layout = InferenceEngine::Layout::NC;
|
||||||
|
/**
|
||||||
|
* Blob precision
|
||||||
|
*/
|
||||||
|
uint8_t precision = InferenceEngine::Precision::FP32;
|
||||||
|
|
||||||
|
intel_dnn_orientation_t orientation = kDnnUnknownOrientation;
|
||||||
|
|
||||||
|
RuntimeEndPoint() = default;
|
||||||
|
RuntimeEndPoint(const GNAPluginNS::Header2dot6::RuntimeEndPoint &old, uint32_t ngroup) {
|
||||||
|
scaleFactor = old.scaleFactor;
|
||||||
|
descriptor_ptr = old.descriptor_ptr;
|
||||||
|
element_size = old.element_size;
|
||||||
|
elements_count = old.elements_count;
|
||||||
|
orientation = old.orientation;
|
||||||
|
layout = InferenceEngine::Layout::NC;
|
||||||
|
precision = InferenceEngine::Precision::FP32;
|
||||||
|
descriptor_offset = old.descriptor_offset;
|
||||||
|
InferenceEngine::SizeVector dims = {ngroup, elements_count / ngroup};
|
||||||
|
shape.NumberOfDimensions = static_cast<uint32_t>(dims.size());
|
||||||
|
for (auto i = 0; i < dims.size(); i++) {
|
||||||
|
shape.Dimensions[i] = dims[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
RuntimeEndPoint(double scaleFactor,
|
||||||
|
void* descriptor_ptr,
|
||||||
|
uint32_t element_size,
|
||||||
|
uint32_t elements_count,
|
||||||
|
Shape shape,
|
||||||
|
uint8_t layout,
|
||||||
|
uint8_t precision,
|
||||||
|
intel_dnn_orientation_t orientation) : scaleFactor(scaleFactor),
|
||||||
|
descriptor_ptr(descriptor_ptr),
|
||||||
|
element_size(element_size),
|
||||||
|
elements_count(elements_count),
|
||||||
|
shape(shape),
|
||||||
|
layout(layout),
|
||||||
|
precision(precision),
|
||||||
|
orientation(orientation) { }
|
||||||
|
};
|
||||||
|
} // namespace Header2dot7
|
||||||
|
} // namespace GNAPluginNS
|
@ -4,11 +4,11 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "serial/headers/2dot6/gna_model_header.hpp"
|
#include "serial/headers/2dot7/gna_model_header.hpp"
|
||||||
|
|
||||||
namespace GNAPluginNS {
|
namespace GNAPluginNS {
|
||||||
namespace HeaderLatest {
|
namespace HeaderLatest {
|
||||||
using ModelHeader = GNAPluginNS::Header2dot6::ModelHeader;
|
using ModelHeader = GNAPluginNS::Header2dot7::ModelHeader;
|
||||||
using RuntimeEndPoint = GNAPluginNS::Header2dot6::RuntimeEndPoint;
|
using RuntimeEndPoint = GNAPluginNS::Header2dot7::RuntimeEndPoint;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,180 @@
|
|||||||
|
// Copyright (C) 2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "transformations/convert_matmul_to_pointwise_convolution.hpp"
|
||||||
|
|
||||||
|
#include <ngraph/opsets/opset7.hpp>
|
||||||
|
#include <ngraph/pattern/op/or.hpp>
|
||||||
|
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||||
|
|
||||||
|
#include "layers/gna_permute.hpp"
|
||||||
|
#include "backend/gna_limitations.hpp"
|
||||||
|
|
||||||
|
using namespace GNAPluginNS;
|
||||||
|
|
||||||
|
NGRAPH_RTTI_DEFINITION(ConvertMatmulToPointWiseConvolution, "ConvertMatmulToPointWiseConvolution", 0);
|
||||||
|
NGRAPH_RTTI_DEFINITION(ConvertMatmulWithBiasToPointWiseConvolution, "ConvertMatmulWithBiasToPointWiseConvolution", 0);
|
||||||
|
NGRAPH_RTTI_DEFINITION(ConvertMatmulWithFqToPointWiseConvolution, "ConvertMatmulWithFqToPointWiseConvolution", 0);
|
||||||
|
|
||||||
|
static std::tuple<bool, uint32_t, uint32_t, uint32_t> VerifyAndGetConvParams(std::shared_ptr<ngraph::Node> matmul_node) {
|
||||||
|
auto input1_shape = matmul_node->get_input_shape(0);
|
||||||
|
auto input2_shape = matmul_node->get_input_shape(1);
|
||||||
|
auto output_shape = matmul_node->get_output_shape(0);
|
||||||
|
if (input1_shape.size() == 3 && input1_shape.front() == 1) {
|
||||||
|
input1_shape.erase(std::begin(input1_shape));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (input1_shape.size() != 2 || input2_shape.size() != 2 || output_shape.size() < 2) {
|
||||||
|
return std::make_tuple(false, 0, 0, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if MatMul or corresponding pointwise convolution are supported by GNA
|
||||||
|
const uint32_t width = input1_shape.front();
|
||||||
|
const uint32_t in_channels = input2_shape.back();
|
||||||
|
const uint32_t out_channels = input2_shape.front();
|
||||||
|
if (input1_shape.front() <= GNALimitations::affineMaxBatchSize ||
|
||||||
|
out_channels % GNALimitations::convFiltersNumDivider != 0 ||
|
||||||
|
out_channels > GNALimitations::convMaxFiltersNum ||
|
||||||
|
in_channels > GNALimitations::convFilterMaxSize) {
|
||||||
|
return std::make_tuple(false, 0, 0, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
return std::make_tuple(true, width, in_channels, out_channels);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool Convert(std::shared_ptr<ngraph::Node> matmul_node,
|
||||||
|
std::shared_ptr<ngraph::Node> add,
|
||||||
|
std::shared_ptr<ngraph::Node> bias,
|
||||||
|
std::shared_ptr<ngraph::Node> fq) {
|
||||||
|
bool supported;
|
||||||
|
uint32_t width, in_channels, out_channels;
|
||||||
|
std::tie(supported, width, in_channels, out_channels) = VerifyAndGetConvParams(matmul_node);
|
||||||
|
if (!supported) return false;
|
||||||
|
|
||||||
|
auto input_node = matmul_node->input_value(0).get_node_shared_ptr();
|
||||||
|
auto weights_node = matmul_node->input_value(1).get_node_shared_ptr();
|
||||||
|
auto base_name = matmul_node->get_friendly_name();
|
||||||
|
|
||||||
|
auto reshape_const_before = std::make_shared<ngraph::opset7::Constant>(ngraph::element::Type_t::i64,
|
||||||
|
ngraph::Shape{4},
|
||||||
|
ngraph::Shape{1, 1, width, in_channels});
|
||||||
|
auto reshape_before = std::make_shared<ngraph::opset7::Reshape>(input_node, reshape_const_before, false);
|
||||||
|
reshape_before->set_friendly_name(base_name + "/reshape_in");
|
||||||
|
|
||||||
|
auto transpose_before = std::make_shared<ngraph::opset7::Transpose>(reshape_before,
|
||||||
|
ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{4},
|
||||||
|
GetPermuteOrder(InferenceEngine::Layout::NHWC, InferenceEngine::Layout::NCHW)));
|
||||||
|
transpose_before->set_friendly_name(base_name + "/transpose_in");
|
||||||
|
|
||||||
|
auto weights_reshape_const = std::make_shared<ngraph::opset7::Constant>(ngraph::element::Type_t::i64,
|
||||||
|
ngraph::Shape{4}, ngraph::Shape{out_channels, in_channels, 1, 1});
|
||||||
|
auto weights_reshaped = std::make_shared<ngraph::opset7::Reshape>(weights_node, weights_reshape_const, false);
|
||||||
|
|
||||||
|
std::shared_ptr<ngraph::Node> conv_node = std::make_shared<ngraph::opset7::Convolution>(transpose_before, weights_reshaped,
|
||||||
|
ngraph::Strides{1, 1}, ngraph::CoordinateDiff{0, 0}, ngraph::CoordinateDiff{0, 0},
|
||||||
|
ngraph::Strides{1, 1}, ngraph::op::PadType::VALID);
|
||||||
|
conv_node->set_friendly_name(base_name + "/conv");
|
||||||
|
|
||||||
|
std::shared_ptr<ngraph::Node> root_node = matmul_node;
|
||||||
|
if (bias != nullptr) {
|
||||||
|
conv_node = std::make_shared<ngraph::opset7::Add>(conv_node, bias);
|
||||||
|
root_node = add;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fq != nullptr) {
|
||||||
|
conv_node = fq->clone_with_new_inputs({conv_node, fq->input_value(1), fq->input_value(2),
|
||||||
|
fq->input_value(3), fq->input_value(4)});
|
||||||
|
root_node = fq;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto transpose_after = std::make_shared<ngraph::opset7::Transpose>(conv_node,
|
||||||
|
ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{4},
|
||||||
|
GetPermuteOrder(InferenceEngine::Layout::NCHW, InferenceEngine::Layout::NHWC)));
|
||||||
|
transpose_after->set_friendly_name(base_name + "/transpose_out");
|
||||||
|
|
||||||
|
auto output_shape = matmul_node->get_output_shape(0);
|
||||||
|
output_shape[output_shape.size() - 1] = out_channels;
|
||||||
|
output_shape[output_shape.size() - 2] = width;
|
||||||
|
auto reshape_const_after = std::make_shared<ngraph::opset7::Constant>(ngraph::element::Type_t::i64,
|
||||||
|
ngraph::Shape{output_shape.size()},
|
||||||
|
output_shape);
|
||||||
|
auto reshape_after = std::make_shared<ngraph::opset7::Reshape>(transpose_after, reshape_const_after, false);
|
||||||
|
reshape_after->set_friendly_name(base_name);
|
||||||
|
|
||||||
|
ngraph::replace_node(root_node, reshape_after);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
ConvertMatmulToPointWiseConvolution::ConvertMatmulToPointWiseConvolution() {
|
||||||
|
auto const_input = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
|
||||||
|
auto const_fq = ngraph::pattern::wrap_type<ngraph::opset7::FakeQuantize>({const_input,
|
||||||
|
ngraph::pattern::wrap_type<ngraph::opset7::Constant>(),
|
||||||
|
ngraph::pattern::wrap_type<ngraph::opset7::Constant>(),
|
||||||
|
ngraph::pattern::wrap_type<ngraph::opset7::Constant>(),
|
||||||
|
ngraph::pattern::wrap_type<ngraph::opset7::Constant>()});
|
||||||
|
auto second_input = std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{const_input, const_fq});
|
||||||
|
auto matmul = ngraph::pattern::wrap_type<ngraph::opset7::MatMul>({ngraph::pattern::any_input(), second_input});
|
||||||
|
|
||||||
|
ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) {
|
||||||
|
const auto& pattern_map = m.get_pattern_value_map();
|
||||||
|
return Convert(pattern_map.at(matmul).get_node_shared_ptr(), nullptr, nullptr, nullptr);
|
||||||
|
};
|
||||||
|
|
||||||
|
auto m = std::make_shared<ngraph::pattern::Matcher>(matmul, "ConvertMatmulToPointWiseConvolution");
|
||||||
|
this->register_matcher(m, callback);
|
||||||
|
}
|
||||||
|
|
||||||
|
ConvertMatmulWithBiasToPointWiseConvolution::ConvertMatmulWithBiasToPointWiseConvolution() {
|
||||||
|
auto const_input = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
|
||||||
|
auto const_fq = ngraph::pattern::wrap_type<ngraph::opset7::FakeQuantize>({const_input,
|
||||||
|
ngraph::pattern::wrap_type<ngraph::opset7::Constant>(),
|
||||||
|
ngraph::pattern::wrap_type<ngraph::opset7::Constant>(),
|
||||||
|
ngraph::pattern::wrap_type<ngraph::opset7::Constant>(),
|
||||||
|
ngraph::pattern::wrap_type<ngraph::opset7::Constant>()});
|
||||||
|
auto second_input = std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{const_input, const_fq});
|
||||||
|
auto matmul = ngraph::pattern::wrap_type<ngraph::opset7::MatMul>({ngraph::pattern::any_input(), second_input});
|
||||||
|
auto bias = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
|
||||||
|
auto add = ngraph::pattern::wrap_type<ngraph::opset7::Add>({matmul, bias});
|
||||||
|
|
||||||
|
ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) {
|
||||||
|
const auto& pattern_map = m.get_pattern_value_map();
|
||||||
|
return Convert(pattern_map.at(matmul).get_node_shared_ptr(), pattern_map.at(add).get_node_shared_ptr(),
|
||||||
|
pattern_map.at(bias).get_node_shared_ptr(), nullptr);
|
||||||
|
};
|
||||||
|
|
||||||
|
auto m = std::make_shared<ngraph::pattern::Matcher>(add, "ConvertMatmulWithBiasToPointWiseConvolution");
|
||||||
|
this->register_matcher(m, callback);
|
||||||
|
}
|
||||||
|
|
||||||
|
ConvertMatmulWithFqToPointWiseConvolution::ConvertMatmulWithFqToPointWiseConvolution() {
|
||||||
|
auto const_input = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
|
||||||
|
auto const_fq = ngraph::pattern::wrap_type<ngraph::opset7::FakeQuantize>({const_input,
|
||||||
|
ngraph::pattern::wrap_type<ngraph::opset7::Constant>(),
|
||||||
|
ngraph::pattern::wrap_type<ngraph::opset7::Constant>(),
|
||||||
|
ngraph::pattern::wrap_type<ngraph::opset7::Constant>(),
|
||||||
|
ngraph::pattern::wrap_type<ngraph::opset7::Constant>()});
|
||||||
|
auto second_input = std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{const_input, const_fq});
|
||||||
|
auto matmul = ngraph::pattern::wrap_type<ngraph::opset7::MatMul>({ngraph::pattern::any_input(), second_input});
|
||||||
|
auto bias = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
|
||||||
|
auto add = ngraph::pattern::wrap_type<ngraph::opset7::Add>({matmul, bias});
|
||||||
|
auto matmul_out = std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{add, matmul});
|
||||||
|
auto out_fq = ngraph::pattern::wrap_type<ngraph::opset7::FakeQuantize>({matmul_out,
|
||||||
|
ngraph::pattern::wrap_type<ngraph::opset7::Constant>(),
|
||||||
|
ngraph::pattern::wrap_type<ngraph::opset7::Constant>(),
|
||||||
|
ngraph::pattern::wrap_type<ngraph::opset7::Constant>(),
|
||||||
|
ngraph::pattern::wrap_type<ngraph::opset7::Constant>()});
|
||||||
|
|
||||||
|
ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) {
|
||||||
|
const auto& pattern_map = m.get_pattern_value_map();
|
||||||
|
auto add_it = pattern_map.find(add);
|
||||||
|
auto add_node = (add_it == std::end(pattern_map) ? nullptr : add_it->second.get_node_shared_ptr());
|
||||||
|
auto bias_it = pattern_map.find(bias);
|
||||||
|
auto bias_node = (bias_it == std::end(pattern_map) ? nullptr : bias_it->second.get_node_shared_ptr());
|
||||||
|
return Convert(pattern_map.at(matmul).get_node_shared_ptr(), add_node, bias_node,
|
||||||
|
pattern_map.at(out_fq).get_node_shared_ptr());
|
||||||
|
};
|
||||||
|
|
||||||
|
auto m = std::make_shared<ngraph::pattern::Matcher>(out_fq, "ConvertMatmulWithFqToPointWiseConvolution");
|
||||||
|
this->register_matcher(m, callback);
|
||||||
|
}
|
@ -0,0 +1,71 @@
|
|||||||
|
// Copyright (C) 2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <ngraph/pass/graph_rewrite.hpp>
|
||||||
|
|
||||||
|
namespace GNAPluginNS {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Convert a MatMul with batch size unsupported by GNA to a point-wise convolution with NHWC layout
|
||||||
|
* with transposes around it:
|
||||||
|
* Transose (NHWC -> NCHW)
|
||||||
|
* |
|
||||||
|
* Matmul Convolution in NHWC layout
|
||||||
|
* Input1: [A, B] B > 8 -------> Input: [1, 1, A, B]
|
||||||
|
* Input2: [B, C] Kernel: [C, B, 1, 1]
|
||||||
|
* Output: [A, C] Output: [1, 1, A, C]
|
||||||
|
* |
|
||||||
|
* Transose (NCHW -> NHWC)
|
||||||
|
*/
|
||||||
|
class ConvertMatmulToPointWiseConvolution : public ngraph::pass::MatcherPass {
|
||||||
|
public:
|
||||||
|
NGRAPH_RTTI_DECLARATION;
|
||||||
|
ConvertMatmulToPointWiseConvolution();
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Convert a MatMul with batch size unsupported by GNA to a point-wise convolution with NHWC layout
|
||||||
|
* with transposes around it, moved add with bias before the last transpose:
|
||||||
|
* Transose (NHWC -> NCHW)
|
||||||
|
* |
|
||||||
|
* Matmul Convolution in NHWC layout
|
||||||
|
* Input1: [A, B] B > 8 -------> Input: [1, 1, A, B]
|
||||||
|
* Input2: [B, C] Kernel: [C, B, 1, 1]
|
||||||
|
* Output: [A, C] Output: [1, 1, A, C]
|
||||||
|
* | |
|
||||||
|
* Add (const) Add (const)
|
||||||
|
* |
|
||||||
|
* Transose (NCHW -> NHWC)
|
||||||
|
*/
|
||||||
|
class ConvertMatmulWithBiasToPointWiseConvolution : public ngraph::pass::MatcherPass {
|
||||||
|
public:
|
||||||
|
NGRAPH_RTTI_DECLARATION;
|
||||||
|
ConvertMatmulWithBiasToPointWiseConvolution();
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Convert a MatMul with batch size unsupported by GNA to a point-wise convolution with NHWC layout
|
||||||
|
* with transposes around it, moved add with bias and/or fake quantize before the last transpose:
|
||||||
|
* Transose (NHWC -> NCHW)
|
||||||
|
* |
|
||||||
|
* Matmul Convolution in NHWC layout
|
||||||
|
* Input1: [A, B] B > 8 -------> Input: [1, 1, A, B]
|
||||||
|
* Input2: [B, C] Kernel: [C, B, 1, 1]
|
||||||
|
* Output: [A, C] Output: [1, 1, A, C]
|
||||||
|
* | |
|
||||||
|
* Add (const) Add (const)
|
||||||
|
* | |
|
||||||
|
* FakeQuantize FakeQuantize
|
||||||
|
* |
|
||||||
|
* Transose (NCHW -> NHWC)
|
||||||
|
*/
|
||||||
|
class ConvertMatmulWithFqToPointWiseConvolution : public ngraph::pass::MatcherPass {
|
||||||
|
public:
|
||||||
|
NGRAPH_RTTI_DECLARATION;
|
||||||
|
ConvertMatmulWithFqToPointWiseConvolution();
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace GNAPluginNS
|
@ -0,0 +1,131 @@
|
|||||||
|
// Copyright (C) 2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "transformations/split_convolution_with_large_buffer_size.hpp"
|
||||||
|
|
||||||
|
#include <numeric>
|
||||||
|
|
||||||
|
#include <ngraph/opsets/opset7.hpp>
|
||||||
|
#include <ngraph/pattern/op/or.hpp>
|
||||||
|
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||||
|
|
||||||
|
#include "backend/gna_limitations.hpp"
|
||||||
|
|
||||||
|
using namespace GNAPluginNS;
|
||||||
|
|
||||||
|
NGRAPH_RTTI_DEFINITION(SplitConvolution, "SplitConvolution", 0);
|
||||||
|
NGRAPH_RTTI_DEFINITION(SplitConvolutionWithBias, "SplitConvolutionWithBias", 0);
|
||||||
|
NGRAPH_RTTI_DEFINITION(SplitConvolutionWithFq, "SplitConvolutionWithFq", 0);
|
||||||
|
|
||||||
|
static std::vector<int64_t> GetConvSplitSizes(std::shared_ptr<ngraph::Node> conv) {
|
||||||
|
uint32_t width = conv->get_input_shape(0).back();
|
||||||
|
uint32_t in_channels = conv->get_input_shape(0).at(1);
|
||||||
|
uint32_t usedWidth = 0;
|
||||||
|
std::vector<int64_t> split_sizes;
|
||||||
|
uint32_t width_max_size = GNALimitations::bufferMaxSize / in_channels;
|
||||||
|
width_max_size = width_max_size - width_max_size % 64;
|
||||||
|
while (usedWidth < width) {
|
||||||
|
uint32_t width_part = std::min(width - usedWidth, width_max_size);
|
||||||
|
split_sizes.push_back(width_part);
|
||||||
|
usedWidth += width_part;
|
||||||
|
}
|
||||||
|
IE_ASSERT(usedWidth == width);
|
||||||
|
return split_sizes;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool Convert(std::shared_ptr<ngraph::Node> conv,
|
||||||
|
std::shared_ptr<ngraph::Node> add,
|
||||||
|
std::shared_ptr<ngraph::Node> bias,
|
||||||
|
std::shared_ptr<ngraph::Node> fq) {
|
||||||
|
auto input_size = std::accumulate(std::begin(conv->get_input_shape(0)),
|
||||||
|
std::end(conv->get_input_shape(0)), 1, std::multiplies<size_t>());
|
||||||
|
if (input_size <= GNALimitations::bufferMaxSize) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto split_sizes = GetConvSplitSizes(conv);
|
||||||
|
IE_ASSERT(split_sizes.size() > 1);
|
||||||
|
|
||||||
|
/* TODO check if it's NHWC convolution wrapped with transposes or all input dimensions except of width == 1,
|
||||||
|
otherwise this split axis isn't supported */
|
||||||
|
const int64_t width_axis = conv->get_input_shape(0).size() - 1;
|
||||||
|
auto split_node = std::make_shared<ngraph::opset7::VariadicSplit>(conv->input_value(0),
|
||||||
|
ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape({1}), std::vector<int64_t>{width_axis}),
|
||||||
|
ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape({split_sizes.size()}), split_sizes));
|
||||||
|
split_node->set_friendly_name(conv->get_friendly_name() + "/split");
|
||||||
|
ngraph::OutputVector convOutputs;
|
||||||
|
std::shared_ptr<ngraph::Node> root_node = fq ? fq : (add ? add : conv);
|
||||||
|
for (int i = 0; i < split_sizes.size(); ++i) {
|
||||||
|
std::shared_ptr<ngraph::Node> output = conv->clone_with_new_inputs({split_node->output(i), conv->input_value(1)});
|
||||||
|
output->set_friendly_name(conv->get_friendly_name() + "_" + std::to_string(i));
|
||||||
|
if (bias) {
|
||||||
|
output = std::make_shared<ngraph::opset7::Add>(output, bias);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fq) {
|
||||||
|
output = fq->clone_with_new_inputs({output, fq->input_value(1), fq->input_value(2),
|
||||||
|
fq->input_value(3), fq->input_value(4)});
|
||||||
|
}
|
||||||
|
convOutputs.push_back(output);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto concat = std::make_shared<ngraph::opset7::Concat>(convOutputs, width_axis);
|
||||||
|
concat->set_friendly_name(conv->get_friendly_name());
|
||||||
|
ngraph::replace_node(root_node, concat);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
SplitConvolution::SplitConvolution() {
|
||||||
|
auto conv = ngraph::pattern::wrap_type<ngraph::opset7::Convolution>({ngraph::pattern::any_input(),
|
||||||
|
ngraph::pattern::any_input()});
|
||||||
|
|
||||||
|
ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) {
|
||||||
|
const auto& pattern_map = m.get_pattern_value_map();
|
||||||
|
return Convert(pattern_map.at(conv).get_node_shared_ptr(), nullptr, nullptr, nullptr);
|
||||||
|
};
|
||||||
|
|
||||||
|
auto m = std::make_shared<ngraph::pattern::Matcher>(conv, "SplitConvolution");
|
||||||
|
this->register_matcher(m, callback);
|
||||||
|
}
|
||||||
|
|
||||||
|
SplitConvolutionWithBias::SplitConvolutionWithBias() {
|
||||||
|
auto conv = ngraph::pattern::wrap_type<ngraph::opset7::Convolution>({ngraph::pattern::any_input(),
|
||||||
|
ngraph::pattern::any_input()});
|
||||||
|
auto bias = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
|
||||||
|
auto add = ngraph::pattern::wrap_type<ngraph::opset7::Add>({conv, bias});
|
||||||
|
|
||||||
|
ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) {
|
||||||
|
const auto& pattern_map = m.get_pattern_value_map();
|
||||||
|
return Convert(pattern_map.at(conv).get_node_shared_ptr(), pattern_map.at(add).get_node_shared_ptr(),
|
||||||
|
pattern_map.at(bias).get_node_shared_ptr(), nullptr);
|
||||||
|
};
|
||||||
|
|
||||||
|
auto m = std::make_shared<ngraph::pattern::Matcher>(add, "SplitConvolutionWithBias");
|
||||||
|
this->register_matcher(m, callback);
|
||||||
|
}
|
||||||
|
|
||||||
|
SplitConvolutionWithFq::SplitConvolutionWithFq() {
|
||||||
|
auto conv = ngraph::pattern::wrap_type<ngraph::opset7::Convolution>({ngraph::pattern::any_input(),
|
||||||
|
ngraph::pattern::any_input()});
|
||||||
|
auto bias = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
|
||||||
|
auto add = ngraph::pattern::wrap_type<ngraph::opset7::Add>({conv, bias});
|
||||||
|
auto conv_output = std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{conv, add});
|
||||||
|
auto out_fq = ngraph::pattern::wrap_type<ngraph::opset7::FakeQuantize>({conv_output,
|
||||||
|
ngraph::pattern::wrap_type<ngraph::opset7::Constant>(),
|
||||||
|
ngraph::pattern::wrap_type<ngraph::opset7::Constant>(),
|
||||||
|
ngraph::pattern::wrap_type<ngraph::opset7::Constant>(),
|
||||||
|
ngraph::pattern::wrap_type<ngraph::opset7::Constant>()});
|
||||||
|
|
||||||
|
ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) {
|
||||||
|
const auto& pattern_map = m.get_pattern_value_map();
|
||||||
|
auto add_it = pattern_map.find(add);
|
||||||
|
auto add_node = (add_it == std::end(pattern_map) ? nullptr : add_it->second.get_node_shared_ptr());
|
||||||
|
auto bias_it = pattern_map.find(bias);
|
||||||
|
auto bias_node = (bias_it == std::end(pattern_map) ? nullptr : bias_it->second.get_node_shared_ptr());
|
||||||
|
return Convert(pattern_map.at(conv).get_node_shared_ptr(), add_node, bias_node, pattern_map.at(out_fq).get_node_shared_ptr());
|
||||||
|
};
|
||||||
|
|
||||||
|
auto m = std::make_shared<ngraph::pattern::Matcher>(out_fq, "SplitConvolutionWithFq");
|
||||||
|
this->register_matcher(m, callback);
|
||||||
|
}
|
@ -0,0 +1,34 @@
|
|||||||
|
// Copyright (C) 2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <ngraph/pass/graph_rewrite.hpp>
|
||||||
|
|
||||||
|
namespace GNAPluginNS {
|
||||||
|
|
||||||
|
// @brief Splits convolution with large input buffer
|
||||||
|
class SplitConvolution : public ngraph::pass::MatcherPass {
|
||||||
|
public:
|
||||||
|
NGRAPH_RTTI_DECLARATION;
|
||||||
|
SplitConvolution();
|
||||||
|
};
|
||||||
|
|
||||||
|
// @brief Splits convolution with large input buffer, move add with bias to each convolution before concat
|
||||||
|
class SplitConvolutionWithBias : public ngraph::pass::MatcherPass {
|
||||||
|
public:
|
||||||
|
NGRAPH_RTTI_DECLARATION;
|
||||||
|
SplitConvolutionWithBias();
|
||||||
|
};
|
||||||
|
|
||||||
|
/* @brief Splits convolution with large input buffer,
|
||||||
|
* move add with bias and/or fake quantize to each convolution before concat
|
||||||
|
*/
|
||||||
|
class SplitConvolutionWithFq : public ngraph::pass::MatcherPass {
|
||||||
|
public:
|
||||||
|
NGRAPH_RTTI_DECLARATION;
|
||||||
|
SplitConvolutionWithFq();
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace GNAPluginNS
|
@ -312,6 +312,7 @@ HeteroExecutableNetwork::HeteroExecutableNetwork(const InferenceEngine::CNNNetwo
|
|||||||
struct Subgraph {
|
struct Subgraph {
|
||||||
ngraph::ResultVector _results;
|
ngraph::ResultVector _results;
|
||||||
ngraph::ParameterVector _parameters;
|
ngraph::ParameterVector _parameters;
|
||||||
|
ngraph::SinkVector _sinks;
|
||||||
std::string _affinity;
|
std::string _affinity;
|
||||||
};
|
};
|
||||||
std::unordered_map<int, Subgraph> subgraphs;
|
std::unordered_map<int, Subgraph> subgraphs;
|
||||||
@ -325,6 +326,9 @@ HeteroExecutableNetwork::HeteroExecutableNetwork(const InferenceEngine::CNNNetwo
|
|||||||
} else if (ngraph::op::is_parameter(node)) {
|
} else if (ngraph::op::is_parameter(node)) {
|
||||||
subgraph._parameters.emplace_back(
|
subgraph._parameters.emplace_back(
|
||||||
std::dynamic_pointer_cast<ngraph::op::v0::Parameter>(node->shared_from_this()));
|
std::dynamic_pointer_cast<ngraph::op::v0::Parameter>(node->shared_from_this()));
|
||||||
|
} else if (ngraph::op::is_sink(node)) {
|
||||||
|
subgraph._sinks.emplace_back(
|
||||||
|
std::dynamic_pointer_cast<ngraph::op::Sink>(node->shared_from_this()));
|
||||||
}
|
}
|
||||||
auto itAffinity = affinities.find(node);
|
auto itAffinity = affinities.find(node);
|
||||||
if (itAffinity != affinities.end()) {
|
if (itAffinity != affinities.end()) {
|
||||||
@ -373,7 +377,7 @@ HeteroExecutableNetwork::HeteroExecutableNetwork(const InferenceEngine::CNNNetwo
|
|||||||
for (auto&& subgraph : orderedSubgraphs) {
|
for (auto&& subgraph : orderedSubgraphs) {
|
||||||
_networks[id]._device = subgraph._affinity;
|
_networks[id]._device = subgraph._affinity;
|
||||||
subFunctions[id] =
|
subFunctions[id] =
|
||||||
std::make_shared<ngraph::Function>(subgraph._results, subgraph._parameters,
|
std::make_shared<ngraph::Function>(subgraph._results, subgraph._sinks, subgraph._parameters,
|
||||||
_name + '_' + std::to_string(id));
|
_name + '_' + std::to_string(id));
|
||||||
_networks[id]._clonedNetwork = CNNNetwork{subFunctions[id]};
|
_networks[id]._clonedNetwork = CNNNetwork{subFunctions[id]};
|
||||||
// update of pre-processing info
|
// update of pre-processing info
|
||||||
|
@ -0,0 +1,29 @@
|
|||||||
|
// Copyright (C) 2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <transformations_visibility.hpp>
|
||||||
|
#include <ngraph/pass/graph_rewrite.hpp>
|
||||||
|
|
||||||
|
namespace ngraph {
|
||||||
|
namespace pass {
|
||||||
|
|
||||||
|
class TRANSFORMATIONS_API GatherNegativeConstIndicesNormalize;
|
||||||
|
|
||||||
|
} // namespace pass
|
||||||
|
} // namespace ngraph
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @ingroup ie_transformation_common_api
|
||||||
|
* @brief GatherNegativeConstIndicesNormalize checks if indices value is negative scalar and
|
||||||
|
* normalizes it using ShapeOf->Add->Cast subgraph.
|
||||||
|
* We need to remove this transformation after adding support of negative indices in
|
||||||
|
* future version of Gather operation.
|
||||||
|
*/
|
||||||
|
class ngraph::pass::GatherNegativeConstIndicesNormalize : public ngraph::pass::MatcherPass {
|
||||||
|
public:
|
||||||
|
NGRAPH_RTTI_DECLARATION;
|
||||||
|
GatherNegativeConstIndicesNormalize();
|
||||||
|
};
|
@ -70,6 +70,7 @@
|
|||||||
#include "transformations/op_conversions/log_softmax_decomposition.hpp"
|
#include "transformations/op_conversions/log_softmax_decomposition.hpp"
|
||||||
#include "transformations/op_conversions/mvn6_decomposition.hpp"
|
#include "transformations/op_conversions/mvn6_decomposition.hpp"
|
||||||
#include "transformations/op_conversions/simplify_ctc_greedy_decoder_seq_len.hpp"
|
#include "transformations/op_conversions/simplify_ctc_greedy_decoder_seq_len.hpp"
|
||||||
|
#include "transformations/op_conversions/gather_normalize_negative_indices.hpp"
|
||||||
|
|
||||||
#include <ngraph/pass/manager.hpp>
|
#include <ngraph/pass/manager.hpp>
|
||||||
#include <ngraph/pass/constant_folding.hpp>
|
#include <ngraph/pass/constant_folding.hpp>
|
||||||
@ -157,6 +158,7 @@ bool ngraph::pass::CommonOptimizations::run_on_function(std::shared_ptr<ngraph::
|
|||||||
decomp->add_matcher<ngraph::pass::MVN6Decomposition>();
|
decomp->add_matcher<ngraph::pass::MVN6Decomposition>();
|
||||||
decomp->add_matcher<ngraph::pass::SimplifyCTCGreedyDecoderSeqLen>();
|
decomp->add_matcher<ngraph::pass::SimplifyCTCGreedyDecoderSeqLen>();
|
||||||
decomp->add_matcher<ngraph::pass::EinsumDecomposition>();
|
decomp->add_matcher<ngraph::pass::EinsumDecomposition>();
|
||||||
|
decomp->add_matcher<ngraph::pass::GatherNegativeConstIndicesNormalize>();
|
||||||
decomp->set_name("ngraph::pass::CommonDecompositions");
|
decomp->set_name("ngraph::pass::CommonDecompositions");
|
||||||
|
|
||||||
// CF is required after all decompositions
|
// CF is required after all decompositions
|
||||||
|
@ -0,0 +1,77 @@
|
|||||||
|
// Copyright (C) 2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "transformations/op_conversions/gather_normalize_negative_indices.hpp"
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include <ngraph/opsets/opset7.hpp>
|
||||||
|
#include <ngraph/rt_info.hpp>
|
||||||
|
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||||
|
#include "itt.hpp"
|
||||||
|
|
||||||
|
NGRAPH_RTTI_DEFINITION(ngraph::pass::GatherNegativeConstIndicesNormalize, "GatherNegativeConstIndicesNormalize", 0);
|
||||||
|
|
||||||
|
ngraph::pass::GatherNegativeConstIndicesNormalize::GatherNegativeConstIndicesNormalize() {
|
||||||
|
MATCHER_SCOPE(GatherNegativeConstIndicesNormalize);
|
||||||
|
auto data_input = ngraph::pattern::any_input(pattern::has_static_rank());
|
||||||
|
auto axis_input = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
|
||||||
|
auto indices_input = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
|
||||||
|
auto gather_node = std::make_shared<ngraph::opset7::Gather>(data_input, indices_input, axis_input);
|
||||||
|
|
||||||
|
ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) {
|
||||||
|
auto& pattern_to_output = m.get_pattern_value_map();
|
||||||
|
auto gather = std::dynamic_pointer_cast<ngraph::opset7::Gather>(pattern_to_output.at(gather_node).get_node_shared_ptr());
|
||||||
|
auto data = pattern_to_output.at(data_input);
|
||||||
|
auto axis_constant = std::dynamic_pointer_cast<ngraph::opset7::Constant>(pattern_to_output.at(axis_input).get_node_shared_ptr());
|
||||||
|
auto indices_constant = std::dynamic_pointer_cast<ngraph::opset7::Constant>(pattern_to_output.at(indices_input).get_node_shared_ptr());
|
||||||
|
|
||||||
|
if (!gather || !axis_constant || !indices_constant) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto indices = indices_constant->cast_vector<int64_t>();
|
||||||
|
if (indices.size() != 1 || indices[0] >= 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto axis = axis_constant->cast_vector<int64_t>();
|
||||||
|
if (axis.size() != 1) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto axis_value = axis[0];
|
||||||
|
|
||||||
|
// normalize `axis` value if it is negative
|
||||||
|
if (axis_value < 0) {
|
||||||
|
axis_value = axis_value + data.get_partial_shape().rank().get_length();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (data.get_partial_shape().rank().get_length() < axis_value) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// check `axis` dimension of data tensor is static
|
||||||
|
if (!data.get_partial_shape()[axis_value].is_static()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto input_type = indices_constant->get_element_type();
|
||||||
|
auto shape_of = std::make_shared<ngraph::opset7::ShapeOf>(data, input_type);
|
||||||
|
auto input_gather = std::make_shared<ngraph::opset7::Gather>(shape_of,
|
||||||
|
ngraph::opset7::Constant::create(input_type, Shape{}, {axis_value}), ngraph::opset7::Constant::create(input_type, Shape{}, {0}));
|
||||||
|
|
||||||
|
auto add = std::make_shared<ngraph::opset7::Add>(input_gather, indices_constant);
|
||||||
|
auto gather_new = gather_node->copy_with_new_inputs({data, add, axis_constant});
|
||||||
|
gather_new->set_friendly_name(gather->get_friendly_name());
|
||||||
|
|
||||||
|
ngraph::copy_runtime_info(gather, {shape_of, input_gather, add, gather_new});
|
||||||
|
ngraph::replace_node(gather, gather_new);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
|
||||||
|
auto m = std::make_shared<ngraph::pattern::Matcher>(gather_node, matcher_name);
|
||||||
|
register_matcher(m, callback);
|
||||||
|
}
|
@ -92,7 +92,7 @@ bool checkGrowingOutput(const Model& model) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
static const float SCALE_THRESHOLD = 0.125f;
|
static const float SCALE_THRESHOLD = 0.1f;
|
||||||
|
|
||||||
for (const auto& stage : model->getStages()) {
|
for (const auto& stage : model->getStages()) {
|
||||||
if (stage->type() != StageType::Power &&
|
if (stage->type() != StageType::Power &&
|
||||||
@ -248,14 +248,13 @@ void PassImpl::run(const Model& model) {
|
|||||||
if (firstStage && shift < 4 && isGrowingOutput && weights->desc().dim(Dim::C) > 1) {
|
if (firstStage && shift < 4 && isGrowingOutput && weights->desc().dim(Dim::C) > 1) {
|
||||||
normalVal = 5;
|
normalVal = 5;
|
||||||
}
|
}
|
||||||
|
|
||||||
shift = correctShift(shift, firstStage, stage->origLayer()->type);
|
shift = correctShift(shift, firstStage, stage->origLayer()->type);
|
||||||
shift -= normalVal;
|
shift -= normalVal;
|
||||||
}
|
}
|
||||||
|
|
||||||
firstStage = false;
|
firstStage = false;
|
||||||
scale = 1;
|
scale = 1;
|
||||||
if (shift > scaleThreshold) {
|
if (shift >= scaleThreshold) {
|
||||||
scale = static_cast<float>(1ULL << static_cast<std::uint32_t>(shift));
|
scale = static_cast<float>(1ULL << static_cast<std::uint32_t>(shift));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -0,0 +1,306 @@
|
|||||||
|
// Copyright (C) 2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include <ngraph/function.hpp>
|
||||||
|
#include <ngraph/opsets/opset7.hpp>
|
||||||
|
#include <ngraph/pass/manager.hpp>
|
||||||
|
#include <transformations/op_conversions/gather_normalize_negative_indices.hpp>
|
||||||
|
#include <transformations/init_node_info.hpp>
|
||||||
|
|
||||||
|
#include "common_test_utils/ngraph_test_utils.hpp"
|
||||||
|
|
||||||
|
using namespace testing;
|
||||||
|
|
||||||
|
TEST(TransformationTests, GatherNegativeIndicesNormalize) {
|
||||||
|
std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
|
||||||
|
{
|
||||||
|
auto data = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::f32, ngraph::Shape{1, 15, 128});
|
||||||
|
auto indices = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {-1});
|
||||||
|
auto axis = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {1});
|
||||||
|
|
||||||
|
auto gather = std::make_shared<ngraph::opset7::Gather>(data, indices, axis, 0);
|
||||||
|
|
||||||
|
f = std::make_shared<ngraph::Function>(ngraph::NodeVector{gather}, ngraph::ParameterVector{data});
|
||||||
|
|
||||||
|
ngraph::pass::Manager manager;
|
||||||
|
manager.register_pass<ngraph::pass::InitNodeInfo>();
|
||||||
|
manager.register_pass<ngraph::pass::GatherNegativeConstIndicesNormalize>();
|
||||||
|
manager.run_passes(f);
|
||||||
|
ASSERT_NO_THROW(check_rt_info(f));
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
auto indices_type = ngraph::element::i32;
|
||||||
|
|
||||||
|
auto data = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::f32, ngraph::Shape{1, 15, 128});
|
||||||
|
auto indices = ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {-1});
|
||||||
|
auto axis = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {1});
|
||||||
|
|
||||||
|
auto shape_of = std::make_shared<ngraph::opset7::ShapeOf>(data, indices_type);
|
||||||
|
auto input_gather = std::make_shared<ngraph::opset7::Gather>(shape_of,
|
||||||
|
ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {1}), ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {0}));
|
||||||
|
auto add = std::make_shared<ngraph::opset7::Add>(input_gather, indices);
|
||||||
|
auto gather = std::make_shared<ngraph::opset7::Gather>(data, add, axis);
|
||||||
|
|
||||||
|
f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{gather}, ngraph::ParameterVector{data});
|
||||||
|
}
|
||||||
|
|
||||||
|
auto res = compare_functions(f, f_ref);
|
||||||
|
ASSERT_TRUE(res.first) << res.second;
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(TransformationTests, GatherNegativeIndicesNormalize_neg_axis) {
|
||||||
|
std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
|
||||||
|
{
|
||||||
|
auto data = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::f32, ngraph::Shape{1, 15, 128});
|
||||||
|
auto indices = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {-1});
|
||||||
|
auto axis = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {-2});
|
||||||
|
|
||||||
|
auto gather = std::make_shared<ngraph::opset7::Gather>(data, indices, axis, 0);
|
||||||
|
|
||||||
|
f = std::make_shared<ngraph::Function>(ngraph::NodeVector{gather}, ngraph::ParameterVector{data});
|
||||||
|
|
||||||
|
ngraph::pass::Manager manager;
|
||||||
|
manager.register_pass<ngraph::pass::InitNodeInfo>();
|
||||||
|
manager.register_pass<ngraph::pass::GatherNegativeConstIndicesNormalize>();
|
||||||
|
manager.run_passes(f);
|
||||||
|
ASSERT_NO_THROW(check_rt_info(f));
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
auto indices_type = ngraph::element::i32;
|
||||||
|
|
||||||
|
auto data = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::f32, ngraph::Shape{1, 15, 128});
|
||||||
|
auto indices = ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {-1});
|
||||||
|
auto axis = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {-2});
|
||||||
|
|
||||||
|
auto shape_of = std::make_shared<ngraph::opset7::ShapeOf>(data, indices_type);
|
||||||
|
auto input_gather = std::make_shared<ngraph::opset7::Gather>(shape_of,
|
||||||
|
ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {1}), ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {0}));
|
||||||
|
auto add = std::make_shared<ngraph::opset7::Add>(input_gather, indices);
|
||||||
|
auto gather = std::make_shared<ngraph::opset7::Gather>(data, add, axis);
|
||||||
|
|
||||||
|
f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{gather}, ngraph::ParameterVector{data});
|
||||||
|
}
|
||||||
|
|
||||||
|
auto res = compare_functions(f, f_ref);
|
||||||
|
ASSERT_TRUE(res.first) << res.second;
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(TransformationTests, GatherNegativeIndicesNormalize_dif_input_types) {
|
||||||
|
std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
|
||||||
|
{
|
||||||
|
auto data = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::f32, ngraph::Shape{1, 15, 128});
|
||||||
|
auto indices = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {-1});
|
||||||
|
auto axis = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{}, {1});
|
||||||
|
|
||||||
|
auto gather = std::make_shared<ngraph::opset7::Gather>(data, indices, axis, 0);
|
||||||
|
|
||||||
|
f = std::make_shared<ngraph::Function>(ngraph::NodeVector{gather}, ngraph::ParameterVector{data});
|
||||||
|
|
||||||
|
ngraph::pass::Manager manager;
|
||||||
|
manager.register_pass<ngraph::pass::InitNodeInfo>();
|
||||||
|
manager.register_pass<ngraph::pass::GatherNegativeConstIndicesNormalize>();
|
||||||
|
manager.run_passes(f);
|
||||||
|
ASSERT_NO_THROW(check_rt_info(f));
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
auto indices_type = ngraph::element::i32;
|
||||||
|
|
||||||
|
auto data = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::f32, ngraph::Shape{1, 15, 128});
|
||||||
|
auto indices = ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {-1});
|
||||||
|
auto axis = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{}, {1});
|
||||||
|
|
||||||
|
auto shape_of = std::make_shared<ngraph::opset7::ShapeOf>(data, indices_type);
|
||||||
|
auto input_gather = std::make_shared<ngraph::opset7::Gather>(shape_of,
|
||||||
|
ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {1}), ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {0}));
|
||||||
|
auto add = std::make_shared<ngraph::opset7::Add>(input_gather, indices);
|
||||||
|
auto gather = std::make_shared<ngraph::opset7::Gather>(data, add, axis);
|
||||||
|
|
||||||
|
f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{gather}, ngraph::ParameterVector{data});
|
||||||
|
}
|
||||||
|
|
||||||
|
auto res = compare_functions(f, f_ref);
|
||||||
|
ASSERT_TRUE(res.first) << res.second;
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(TransformationTests, GatherNegativeIndicesNormalize_static_axis_dim) {
|
||||||
|
std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
|
||||||
|
{
|
||||||
|
auto data = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::f32, ngraph::PartialShape{DYN, 15, DYN});
|
||||||
|
auto indices = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {-1});
|
||||||
|
auto axis = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {1});
|
||||||
|
|
||||||
|
auto gather = std::make_shared<ngraph::opset7::Gather>(data, indices, axis, 0);
|
||||||
|
|
||||||
|
f = std::make_shared<ngraph::Function>(ngraph::NodeVector{gather}, ngraph::ParameterVector{data});
|
||||||
|
|
||||||
|
ngraph::pass::Manager manager;
|
||||||
|
manager.register_pass<ngraph::pass::InitNodeInfo>();
|
||||||
|
manager.register_pass<ngraph::pass::GatherNegativeConstIndicesNormalize>();
|
||||||
|
manager.run_passes(f);
|
||||||
|
ASSERT_NO_THROW(check_rt_info(f));
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
auto indices_type = ngraph::element::i32;
|
||||||
|
|
||||||
|
auto data = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::f32, ngraph::PartialShape{DYN, 15, DYN});
|
||||||
|
auto indices = ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {-1});
|
||||||
|
auto axis = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {1});
|
||||||
|
|
||||||
|
auto shape_of = std::make_shared<ngraph::opset7::ShapeOf>(data, indices_type);
|
||||||
|
auto input_gather = std::make_shared<ngraph::opset7::Gather>(shape_of,
|
||||||
|
ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {1}), ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {0}));
|
||||||
|
auto add = std::make_shared<ngraph::opset7::Add>(input_gather, indices);
|
||||||
|
auto gather = std::make_shared<ngraph::opset7::Gather>(data, add, axis);
|
||||||
|
|
||||||
|
f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{gather}, ngraph::ParameterVector{data});
|
||||||
|
}
|
||||||
|
|
||||||
|
auto res = compare_functions(f, f_ref);
|
||||||
|
ASSERT_TRUE(res.first) << res.second;
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(TransformationTests, GatherNegativeIndicesNormalize_static_axis_dim_neg_axis) {
|
||||||
|
std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
|
||||||
|
{
|
||||||
|
auto data = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::f32, ngraph::PartialShape{DYN, 15, DYN});
|
||||||
|
auto indices = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {-1});
|
||||||
|
auto axis = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {-2});
|
||||||
|
|
||||||
|
auto gather = std::make_shared<ngraph::opset7::Gather>(data, indices, axis, 0);
|
||||||
|
|
||||||
|
f = std::make_shared<ngraph::Function>(ngraph::NodeVector{gather}, ngraph::ParameterVector{data});
|
||||||
|
|
||||||
|
ngraph::pass::Manager manager;
|
||||||
|
manager.register_pass<ngraph::pass::InitNodeInfo>();
|
||||||
|
manager.register_pass<ngraph::pass::GatherNegativeConstIndicesNormalize>();
|
||||||
|
manager.run_passes(f);
|
||||||
|
ASSERT_NO_THROW(check_rt_info(f));
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
auto indices_type = ngraph::element::i32;
|
||||||
|
|
||||||
|
auto data = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::f32, ngraph::PartialShape{DYN, 15, DYN});
|
||||||
|
auto indices = ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {-1});
|
||||||
|
auto axis = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {-2});
|
||||||
|
|
||||||
|
auto shape_of = std::make_shared<ngraph::opset7::ShapeOf>(data, indices_type);
|
||||||
|
auto input_gather = std::make_shared<ngraph::opset7::Gather>(shape_of,
|
||||||
|
ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {1}), ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {0}));
|
||||||
|
auto add = std::make_shared<ngraph::opset7::Add>(input_gather, indices);
|
||||||
|
auto gather = std::make_shared<ngraph::opset7::Gather>(data, add, axis);
|
||||||
|
|
||||||
|
f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{gather}, ngraph::ParameterVector{data});
|
||||||
|
}
|
||||||
|
|
||||||
|
auto res = compare_functions(f, f_ref);
|
||||||
|
ASSERT_TRUE(res.first) << res.second;
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(TransformationTests, GatherNegativeIndicesNormalize_non_static_axis_dim) {
|
||||||
|
std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
|
||||||
|
{
|
||||||
|
auto data = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::f32, ngraph::PartialShape{DYN, DYN, DYN});
|
||||||
|
auto indices = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {-1});
|
||||||
|
auto axis = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {1});
|
||||||
|
|
||||||
|
auto gather = std::make_shared<ngraph::opset7::Gather>(data, indices, axis, 0);
|
||||||
|
|
||||||
|
f = std::make_shared<ngraph::Function>(ngraph::NodeVector{gather}, ngraph::ParameterVector{data});
|
||||||
|
|
||||||
|
ngraph::pass::Manager manager;
|
||||||
|
manager.register_pass<ngraph::pass::InitNodeInfo>();
|
||||||
|
manager.register_pass<ngraph::pass::GatherNegativeConstIndicesNormalize>();
|
||||||
|
manager.run_passes(f);
|
||||||
|
ASSERT_NO_THROW(check_rt_info(f));
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
auto indices_type = ngraph::element::i32;
|
||||||
|
|
||||||
|
auto data = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::f32, ngraph::PartialShape{DYN, DYN, DYN});
|
||||||
|
auto indices = ngraph::opset7::Constant::create(indices_type, ngraph::Shape{}, {-1});
|
||||||
|
auto axis = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {1});
|
||||||
|
|
||||||
|
auto gather = std::make_shared<ngraph::opset7::Gather>(data, indices, axis);
|
||||||
|
|
||||||
|
f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{gather}, ngraph::ParameterVector{data});
|
||||||
|
}
|
||||||
|
|
||||||
|
auto res = compare_functions(f, f_ref);
|
||||||
|
ASSERT_TRUE(res.first) << res.second;
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(TransformationTests, GatherNegativeIndicesNormalize_positive_ind) {
|
||||||
|
std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
|
||||||
|
{
|
||||||
|
auto data = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::f32, ngraph::Shape{2, 3});
|
||||||
|
auto indices = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {1});
|
||||||
|
auto axis = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {0});
|
||||||
|
|
||||||
|
auto gather = std::make_shared<ngraph::opset7::Gather>(data, indices, axis, 0);
|
||||||
|
|
||||||
|
f = std::make_shared<ngraph::Function>(ngraph::NodeVector{gather}, ngraph::ParameterVector{data});
|
||||||
|
|
||||||
|
ngraph::pass::Manager manager;
|
||||||
|
manager.register_pass<ngraph::pass::InitNodeInfo>();
|
||||||
|
manager.register_pass<ngraph::pass::GatherNegativeConstIndicesNormalize>();
|
||||||
|
manager.run_passes(f);
|
||||||
|
ASSERT_NO_THROW(check_rt_info(f));
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
auto data = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::f32, ngraph::Shape{2, 3});
|
||||||
|
auto indices = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {1});
|
||||||
|
auto axis = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {0});
|
||||||
|
|
||||||
|
auto gather = std::make_shared<ngraph::opset7::Gather>(data, indices, axis);
|
||||||
|
|
||||||
|
f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{gather}, ngraph::ParameterVector{data});
|
||||||
|
}
|
||||||
|
|
||||||
|
auto res = compare_functions(f, f_ref);
|
||||||
|
ASSERT_TRUE(res.first) << res.second;
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(TransformationTests, GatherNegativeIndicesNormalize_non_static_rank) {
|
||||||
|
std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
|
||||||
|
{
|
||||||
|
auto data = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::f32, ngraph::PartialShape::dynamic(ngraph::Rank::dynamic()));
|
||||||
|
auto indices = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {-1});
|
||||||
|
auto axis = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {0});
|
||||||
|
|
||||||
|
auto gather = std::make_shared<ngraph::opset7::Gather>(data, indices, axis, 0);
|
||||||
|
|
||||||
|
f = std::make_shared<ngraph::Function>(ngraph::NodeVector{gather}, ngraph::ParameterVector{data});
|
||||||
|
|
||||||
|
ngraph::pass::Manager manager;
|
||||||
|
manager.register_pass<ngraph::pass::InitNodeInfo>();
|
||||||
|
manager.register_pass<ngraph::pass::GatherNegativeConstIndicesNormalize>();
|
||||||
|
manager.run_passes(f);
|
||||||
|
ASSERT_NO_THROW(check_rt_info(f));
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
auto data = std::make_shared<ngraph::opset7::Parameter>(ngraph::element::f32, ngraph::PartialShape::dynamic());
|
||||||
|
auto indices = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {-1});
|
||||||
|
auto axis = ngraph::opset7::Constant::create(ngraph::element::i32, ngraph::Shape{}, {0});
|
||||||
|
|
||||||
|
auto gather = std::make_shared<ngraph::opset7::Gather>(data, indices, axis);
|
||||||
|
|
||||||
|
f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{gather}, ngraph::ParameterVector{data});
|
||||||
|
}
|
||||||
|
|
||||||
|
auto res = compare_functions(f, f_ref);
|
||||||
|
ASSERT_TRUE(res.first) << res.second;
|
||||||
|
}
|
@ -38,7 +38,7 @@ INSTANTIATE_TEST_CASE_P(smoke_MemoryTest, MemoryTest,
|
|||||||
::testing::ValuesIn(iterationCount),
|
::testing::ValuesIn(iterationCount),
|
||||||
::testing::ValuesIn(inShapes),
|
::testing::ValuesIn(inShapes),
|
||||||
::testing::ValuesIn(inputPrecisions),
|
::testing::ValuesIn(inputPrecisions),
|
||||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
::testing::Values(CommonTestUtils::DEVICE_CPU, "HETERO:CPU")),
|
||||||
MemoryTest::getTestCaseName);
|
MemoryTest::getTestCaseName);
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
@ -0,0 +1,230 @@
|
|||||||
|
// Copyright (C) 2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
#include <memory>
|
||||||
|
#include <tuple>
|
||||||
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include <ie_core.hpp>
|
||||||
|
|
||||||
|
#include "common_test_utils/common_utils.hpp"
|
||||||
|
#include "functional_test_utils/plugin_cache.hpp"
|
||||||
|
#include "shared_test_classes/base/layer_test_utils.hpp"
|
||||||
|
#include "functional_test_utils/blob_utils.hpp"
|
||||||
|
#include "ngraph_functions/utils/ngraph_helpers.hpp"
|
||||||
|
#include "ngraph_functions/builders.hpp"
|
||||||
|
|
||||||
|
#include "ngraph_functions/pass/convert_prc.hpp"
|
||||||
|
|
||||||
|
typedef std::tuple<
|
||||||
|
InferenceEngine::Precision, // Network Precision
|
||||||
|
std::string, // Target Device
|
||||||
|
std::map<std::string, std::string>, // Configuration
|
||||||
|
std::vector<size_t> // Input Shape
|
||||||
|
> convertMatmulToPointwiseConvParams;
|
||||||
|
|
||||||
|
typedef std::tuple<
|
||||||
|
InferenceEngine::Precision, // Network Precision
|
||||||
|
std::string, // Target Device
|
||||||
|
std::map<std::string, std::string>, // Configuration
|
||||||
|
std::vector<size_t>, // Input Shape
|
||||||
|
std::pair<float, float> // Input Min and Max
|
||||||
|
> convertMatmulToPointwiseConvWithFqParams;
|
||||||
|
|
||||||
|
namespace LayerTestsDefinitions {
|
||||||
|
|
||||||
|
class ConvertMatmulToPointwiseConv : public testing::WithParamInterface<convertMatmulToPointwiseConvParams>,
|
||||||
|
public LayerTestsUtils::LayerTestsCommon {
|
||||||
|
public:
|
||||||
|
static std::string getTestCaseName(testing::TestParamInfo<convertMatmulToPointwiseConvParams> obj) {
|
||||||
|
InferenceEngine::Precision netPrecision;
|
||||||
|
std::string targetDevice;
|
||||||
|
std::map<std::string, std::string> configuration;
|
||||||
|
std::vector<size_t> inputShape;
|
||||||
|
std::tie(netPrecision, targetDevice, configuration, inputShape) = obj.param;
|
||||||
|
|
||||||
|
std::ostringstream result;
|
||||||
|
result << "netPRC=" << netPrecision.name() << "_";
|
||||||
|
result << "targetDevice=" << targetDevice << "_";
|
||||||
|
for (auto const& configItem : configuration) {
|
||||||
|
result << "_configItem=" << configItem.first << "_" << configItem.second;
|
||||||
|
}
|
||||||
|
result << "_inputShape=" << CommonTestUtils::vec2str(inputShape);
|
||||||
|
return result.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const {
|
||||||
|
InferenceEngine::Blob::Ptr blob = make_blob_with_precision(info.getTensorDesc());
|
||||||
|
blob->allocate();
|
||||||
|
|
||||||
|
auto* rawBlobDataPtr = blob->buffer().as<float*>();
|
||||||
|
std::vector<float> values = CommonTestUtils::generate_float_numbers(blob->size(), -0.2f, 0.2f);
|
||||||
|
for (size_t i = 0; i < blob->size(); i++) {
|
||||||
|
rawBlobDataPtr[i] = values[i];
|
||||||
|
}
|
||||||
|
return blob;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
void SetUp() override {
|
||||||
|
InferenceEngine::Precision netPrecision;
|
||||||
|
std::vector<size_t> inputShape;
|
||||||
|
std::tie(netPrecision, targetDevice, configuration, inputShape) = this->GetParam();
|
||||||
|
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||||
|
|
||||||
|
auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
|
||||||
|
|
||||||
|
size_t batch = inputShape[inputShape.size() - 2];
|
||||||
|
size_t elemNum = inputShape[inputShape.size() - 1];
|
||||||
|
std::vector<float> weights = CommonTestUtils::generate_float_numbers(elemNum * elemNum, -0.1f, 0.1f);
|
||||||
|
auto weightsNode = std::make_shared<ngraph::opset7::Constant>(ngPrc, ngraph::Shape{elemNum, elemNum}, weights);
|
||||||
|
auto matmul = ngraph::builder::makeMatMul(params[0], weightsNode, false, true);
|
||||||
|
|
||||||
|
auto bias = ngraph::builder::makeConstant(ngPrc, std::vector<size_t>{1, batch, 1}, std::vector<float>{1.0f});
|
||||||
|
auto add = ngraph::builder::makeEltwise(matmul, bias, ngraph::helpers::EltwiseTypes::ADD);
|
||||||
|
|
||||||
|
auto pattern = std::make_shared<ngraph::opset7::Constant>(ngraph::element::Type_t::i64,
|
||||||
|
ngraph::Shape{ inputShape.size() }, inputShape);
|
||||||
|
auto reshape = std::make_shared<ngraph::opset7::Reshape>(matmul, pattern, false);
|
||||||
|
auto relu = std::make_shared<ngraph::opset7::Relu>(reshape);
|
||||||
|
|
||||||
|
ngraph::ResultVector results{ std::make_shared<ngraph::opset7::Result>(relu)};
|
||||||
|
function = std::make_shared<ngraph::Function>(results, params, "ConvertMatmulToPointwiseConv");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class ConvertMatmulToPointwiseConvWithFq : public testing::WithParamInterface<convertMatmulToPointwiseConvWithFqParams>,
|
||||||
|
public LayerTestsUtils::LayerTestsCommon {
|
||||||
|
float inputDataMin = -10.0f;
|
||||||
|
float inputDataMax = 10.0f;
|
||||||
|
float inputDataResolution = 1.0f;
|
||||||
|
|
||||||
|
public:
|
||||||
|
static std::string getTestCaseName(testing::TestParamInfo<convertMatmulToPointwiseConvWithFqParams> obj) {
|
||||||
|
InferenceEngine::Precision netPrecision;
|
||||||
|
std::string targetDevice;
|
||||||
|
std::map<std::string, std::string> configuration;
|
||||||
|
std::vector<size_t> inputShape;
|
||||||
|
std::pair<float, float> inputMinMax;
|
||||||
|
std::tie(netPrecision, targetDevice, configuration, inputShape, inputMinMax) = obj.param;
|
||||||
|
|
||||||
|
std::ostringstream result;
|
||||||
|
result << "netPRC=" << netPrecision.name() << "_";
|
||||||
|
result << "targetDevice=" << targetDevice << "_";
|
||||||
|
for (auto const& configItem : configuration) {
|
||||||
|
result << "_configItem=" << configItem.first << "_" << configItem.second;
|
||||||
|
}
|
||||||
|
result << "_inputShape=" << CommonTestUtils::vec2str(inputShape);
|
||||||
|
result << "_inputMinMax=(" << inputMinMax.first << ".." << inputMinMax.second << ")";
|
||||||
|
return result.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const {
|
||||||
|
return FuncTestUtils::createAndFillBlob(info.getTensorDesc(), inputDataMax - inputDataMin, inputDataMin,
|
||||||
|
1 / inputDataResolution);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
void SetUp() override {
|
||||||
|
InferenceEngine::Precision netPrecision;
|
||||||
|
std::vector<size_t> inputShape;
|
||||||
|
std::pair<float, float> inputMinMax;
|
||||||
|
std::tie(netPrecision, targetDevice, configuration, inputShape, inputMinMax) = this->GetParam();
|
||||||
|
std::tie(inputDataMin, inputDataMax) = inputMinMax;
|
||||||
|
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||||
|
|
||||||
|
auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
|
||||||
|
|
||||||
|
auto inputLowNode = ngraph::builder::makeConstant(ngPrc, std::vector<size_t>{ 1 },
|
||||||
|
std::vector<float>{ inputDataMin });
|
||||||
|
auto inputHighNode = ngraph::builder::makeConstant(ngPrc, std::vector<size_t>{ 1 },
|
||||||
|
std::vector<float>{ inputDataMax });
|
||||||
|
auto inputFQ = std::make_shared<ngraph::opset7::FakeQuantize>(params[0],
|
||||||
|
inputLowNode, inputHighNode, inputLowNode, inputHighNode, UINT16_MAX);
|
||||||
|
|
||||||
|
size_t elemNum = inputShape[inputShape.size() - 1];
|
||||||
|
|
||||||
|
const float weightsMin = -0.2f;
|
||||||
|
const float weightsMax = 0.2f;
|
||||||
|
std::vector<float> weights = CommonTestUtils::generate_float_numbers(elemNum * elemNum, weightsMin, weightsMax);
|
||||||
|
auto weightsNode = std::make_shared<ngraph::opset7::Constant>(ngPrc, ngraph::Shape{elemNum, elemNum}, weights);
|
||||||
|
auto weightsLowNode = ngraph::builder::makeConstant(ngPrc, std::vector<size_t>{ 1 },
|
||||||
|
std::vector<float>{ weightsMin });
|
||||||
|
auto weightsHighNode = ngraph::builder::makeConstant(ngPrc, std::vector<size_t>{ 1 },
|
||||||
|
std::vector<float>{ weightsMax });
|
||||||
|
auto weightsFQNode = std::make_shared<ngraph::opset7::FakeQuantize>(weightsNode,
|
||||||
|
weightsLowNode, weightsHighNode, weightsLowNode, weightsHighNode, UINT16_MAX);
|
||||||
|
auto matmul = ngraph::builder::makeMatMul(inputFQ, weightsFQNode, false, true);
|
||||||
|
|
||||||
|
auto bias = ngraph::builder::makeConstant(ngPrc, std::vector<size_t>{1, 1, 1}, std::vector<float>{1.0f});
|
||||||
|
auto add = ngraph::builder::makeEltwise(matmul, bias, ngraph::helpers::EltwiseTypes::ADD);
|
||||||
|
|
||||||
|
auto outputLowNode = ngraph::builder::makeConstant(ngPrc, std::vector<size_t>{ 1 },
|
||||||
|
std::vector<float>{ -inputDataMax * weightsMax * elemNum });
|
||||||
|
auto outputHighNode = ngraph::builder::makeConstant(ngPrc, std::vector<size_t>{ 1 },
|
||||||
|
std::vector<float>{ inputDataMax * weightsMax * elemNum });
|
||||||
|
auto outputFQ = std::make_shared<ngraph::opset7::FakeQuantize>(add,
|
||||||
|
outputLowNode, outputHighNode, outputLowNode, outputHighNode, UINT16_MAX);
|
||||||
|
|
||||||
|
auto pattern = std::make_shared<ngraph::opset7::Constant>(ngraph::element::Type_t::i64,
|
||||||
|
ngraph::Shape{ inputShape.size() }, inputShape);
|
||||||
|
auto reshape = std::make_shared<ngraph::opset7::Reshape>(outputFQ, pattern, false);
|
||||||
|
|
||||||
|
auto relu = std::make_shared<ngraph::opset7::Relu>(reshape);
|
||||||
|
|
||||||
|
ngraph::ResultVector results{ std::make_shared<ngraph::opset7::Result>(relu)};
|
||||||
|
function = std::make_shared<ngraph::Function>(results, params, "ConvertMatmulToPointwiseConv");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_P(ConvertMatmulToPointwiseConv, CompareWithRefImpl) {
|
||||||
|
Run();
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_P(ConvertMatmulToPointwiseConvWithFq, CompareWithRefImpl) {
|
||||||
|
Run();
|
||||||
|
};
|
||||||
|
|
||||||
|
const std::vector<InferenceEngine::Precision> netPrecisions = {
|
||||||
|
InferenceEngine::Precision::FP32,
|
||||||
|
InferenceEngine::Precision::FP16
|
||||||
|
};
|
||||||
|
|
||||||
|
const std::vector<std::map<std::string, std::string>> configs = {
|
||||||
|
{
|
||||||
|
{"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const std::vector<std::vector<size_t>> inputShape = {
|
||||||
|
{1, 64, 64},
|
||||||
|
{1, 256, 128},
|
||||||
|
{1, 512, 128}
|
||||||
|
};
|
||||||
|
|
||||||
|
const std::vector<std::pair<float, float>> fqStats = {
|
||||||
|
{-0.5, 0.5}
|
||||||
|
};
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(smoke_ConvertMatmulToPointwiseConvTest, ConvertMatmulToPointwiseConv,
|
||||||
|
::testing::Combine(
|
||||||
|
::testing::ValuesIn(netPrecisions),
|
||||||
|
::testing::Values(CommonTestUtils::DEVICE_GNA),
|
||||||
|
::testing::ValuesIn(configs),
|
||||||
|
::testing::ValuesIn(inputShape)),
|
||||||
|
ConvertMatmulToPointwiseConv::getTestCaseName);
|
||||||
|
|
||||||
|
// Issue 55662
|
||||||
|
INSTANTIATE_TEST_CASE_P(DISABLED_smoke_ConvertMatmulToPointwiseConvTest, ConvertMatmulToPointwiseConvWithFq,
|
||||||
|
::testing::Combine(
|
||||||
|
::testing::ValuesIn(netPrecisions),
|
||||||
|
::testing::Values(CommonTestUtils::DEVICE_GNA),
|
||||||
|
::testing::ValuesIn(configs),
|
||||||
|
::testing::ValuesIn(inputShape),
|
||||||
|
::testing::ValuesIn(fqStats)),
|
||||||
|
ConvertMatmulToPointwiseConvWithFq::getTestCaseName);
|
||||||
|
|
||||||
|
} // namespace LayerTestsDefinitions
|
@ -18,19 +18,21 @@
|
|||||||
typedef std::tuple<
|
typedef std::tuple<
|
||||||
InferenceEngine::Precision, // Network Precision
|
InferenceEngine::Precision, // Network Precision
|
||||||
std::string, // Target Device
|
std::string, // Target Device
|
||||||
std::map<std::string, std::string> //Configuration
|
std::map<std::string, std::string>, // Configuration
|
||||||
|
std::vector<size_t> // Input Shape
|
||||||
> EltwiseSplitOverChannelsPassParams;
|
> EltwiseSplitOverChannelsPassParams;
|
||||||
|
|
||||||
namespace LayerTestsDefinitions {
|
namespace LayerTestsDefinitions {
|
||||||
|
|
||||||
class EltwiseSplitOverChannelsPassTest : public testing::WithParamInterface<EltwiseSplitOverChannelsPassParams>,
|
class EltwiseSplitOverChannelsPassTest : public testing::WithParamInterface<EltwiseSplitOverChannelsPassParams>,
|
||||||
public LayerTestsUtils::LayerTestsCommon {
|
public LayerTestsUtils::LayerTestsCommon {
|
||||||
public:
|
public:
|
||||||
static std::string getTestCaseName(testing::TestParamInfo<EltwiseSplitOverChannelsPassParams> obj) {
|
static std::string getTestCaseName(testing::TestParamInfo<EltwiseSplitOverChannelsPassParams> obj) {
|
||||||
InferenceEngine::Precision netPrecision;
|
InferenceEngine::Precision netPrecision;
|
||||||
std::string targetDevice;
|
std::string targetDevice;
|
||||||
std::map<std::string, std::string> configuration;
|
std::map<std::string, std::string> configuration;
|
||||||
std::tie(netPrecision, targetDevice, configuration) = obj.param;
|
std::vector<size_t> inputShape;
|
||||||
|
std::tie(netPrecision, targetDevice, configuration, inputShape) = obj.param;
|
||||||
|
|
||||||
std::ostringstream result;
|
std::ostringstream result;
|
||||||
result << "netPRC=" << netPrecision.name() << "_";
|
result << "netPRC=" << netPrecision.name() << "_";
|
||||||
@ -38,20 +40,22 @@ public:
|
|||||||
for (auto const& configItem : configuration) {
|
for (auto const& configItem : configuration) {
|
||||||
result << "_configItem=" << configItem.first << "_" << configItem.second;
|
result << "_configItem=" << configItem.first << "_" << configItem.second;
|
||||||
}
|
}
|
||||||
|
result << "_inputShape=" << CommonTestUtils::vec2str(inputShape);
|
||||||
return result.str();
|
return result.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void SetUp() override {
|
void SetUp() override {
|
||||||
InferenceEngine::Precision netPrecision;
|
InferenceEngine::Precision netPrecision;
|
||||||
std::tie(netPrecision, targetDevice, configuration) = this->GetParam();
|
std::vector<size_t> inputShape;
|
||||||
|
std::tie(netPrecision, targetDevice, configuration, inputShape) = this->GetParam();
|
||||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||||
|
|
||||||
auto params = ngraph::builder::makeParams(ngPrc, { {1, 67000} });
|
auto params = ngraph::builder::makeParams(ngPrc, { inputShape });
|
||||||
auto const_mult2 = ngraph::builder::makeConstant<float>(ngPrc, {1, 67000}, {-1.0f});
|
auto const_mult2 = ngraph::builder::makeConstant<float>(ngPrc, inputShape, {-1.0f});
|
||||||
|
|
||||||
auto sum = ngraph::builder::makeEltwise(params[0], const_mult2, ngraph::helpers::EltwiseTypes::MULTIPLY);
|
auto sum = ngraph::builder::makeEltwise(params[0], const_mult2, ngraph::helpers::EltwiseTypes::MULTIPLY);
|
||||||
function = std::make_shared<ngraph::Function>(sum, params, "RemovePermutationPass");
|
function = std::make_shared<ngraph::Function>(sum, params, "EltwiseSplitOverChannelsPassTest");
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -71,11 +75,17 @@ const std::vector<std::map<std::string, std::string>> configs = {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const std::vector<std::vector<size_t>> inputShape = {
|
||||||
|
{1, 67000},
|
||||||
|
{1, 500000}
|
||||||
|
};
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(smoke_EltwiseSplitOverChennels, EltwiseSplitOverChannelsPassTest,
|
INSTANTIATE_TEST_CASE_P(smoke_EltwiseSplitOverChennels, EltwiseSplitOverChannelsPassTest,
|
||||||
::testing::Combine(
|
::testing::Combine(
|
||||||
::testing::ValuesIn(netPrecisions),
|
::testing::ValuesIn(netPrecisions),
|
||||||
::testing::Values(CommonTestUtils::DEVICE_GNA),
|
::testing::Values(CommonTestUtils::DEVICE_GNA),
|
||||||
::testing::ValuesIn(configs)),
|
::testing::ValuesIn(configs),
|
||||||
|
::testing::ValuesIn(inputShape)),
|
||||||
EltwiseSplitOverChannelsPassTest::getTestCaseName);
|
EltwiseSplitOverChannelsPassTest::getTestCaseName);
|
||||||
|
|
||||||
} // namespace LayerTestsDefinitions
|
} // namespace LayerTestsDefinitions
|
||||||
|
@ -60,8 +60,6 @@ std::vector<std::string> disabledTestPatterns() {
|
|||||||
R"(.*ConstantResultSubgraphTest.*inPrc=(U8|I8|I32|U64|I64|BOOL).*)",
|
R"(.*ConstantResultSubgraphTest.*inPrc=(U8|I8|I32|U64|I64|BOOL).*)",
|
||||||
// TODO: Issue 51528
|
// TODO: Issue 51528
|
||||||
R"(.*CachingSupport.*_(u8|i16)_.*)",
|
R"(.*CachingSupport.*_(u8|i16)_.*)",
|
||||||
// TODO: Issue 51527
|
|
||||||
R"(.*CachingSupport.*_batch2_.*)",
|
|
||||||
// TODO: Issue 51525
|
// TODO: Issue 51525
|
||||||
R"(.*CachingSupport.*KSOFunction.*)",
|
R"(.*CachingSupport.*KSOFunction.*)",
|
||||||
// TODO: Issue 57363 (Param -> Result subgraphs)
|
// TODO: Issue 57363 (Param -> Result subgraphs)
|
||||||
|
@ -69,13 +69,16 @@ void ImportNetworkTestBase::Run() {
|
|||||||
|
|
||||||
for (const auto& next_input : importedExecNetwork.GetInputsInfo()) {
|
for (const auto& next_input : importedExecNetwork.GetInputsInfo()) {
|
||||||
ASSERT_NO_THROW(compiledExecNetwork.GetInputsInfo()[next_input.first]);
|
ASSERT_NO_THROW(compiledExecNetwork.GetInputsInfo()[next_input.first]);
|
||||||
|
Compare(next_input.second->getTensorDesc(), compiledExecNetwork.GetInputsInfo()[next_input.first]->getTensorDesc());
|
||||||
}
|
}
|
||||||
for (const auto& next_output : importedExecNetwork.GetOutputsInfo()) {
|
for (const auto& next_output : importedExecNetwork.GetOutputsInfo()) {
|
||||||
ASSERT_NO_THROW(compiledExecNetwork.GetOutputsInfo()[next_output.first]);
|
ASSERT_NO_THROW(compiledExecNetwork.GetOutputsInfo()[next_output.first]);
|
||||||
}
|
}
|
||||||
auto importedOutputs = GetOutputs();
|
auto importedOutputs = GetOutputs();
|
||||||
ASSERT_EQ(actualOutputs.size(), importedOutputs.size());
|
ASSERT_EQ(actualOutputs.size(), importedOutputs.size());
|
||||||
|
|
||||||
for (size_t i = 0; i < actualOutputs.size(); i++) {
|
for (size_t i = 0; i < actualOutputs.size(); i++) {
|
||||||
|
Compare(actualOutputs[i]->getTensorDesc(), importedOutputs[i]->getTensorDesc());
|
||||||
Compare(actualOutputs[i], importedOutputs[i]);
|
Compare(actualOutputs[i], importedOutputs[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -72,6 +72,8 @@ public:
|
|||||||
|
|
||||||
virtual void Compare(const InferenceEngine::Blob::Ptr &expected, const InferenceEngine::Blob::Ptr &actual);
|
virtual void Compare(const InferenceEngine::Blob::Ptr &expected, const InferenceEngine::Blob::Ptr &actual);
|
||||||
|
|
||||||
|
virtual void Compare(const InferenceEngine::TensorDesc &actualDesc, const InferenceEngine::TensorDesc &expectedDesc);
|
||||||
|
|
||||||
virtual void SetRefMode(RefMode mode);
|
virtual void SetRefMode(RefMode mode);
|
||||||
|
|
||||||
std::shared_ptr<ngraph::Function> GetFunction();
|
std::shared_ptr<ngraph::Function> GetFunction();
|
||||||
|
@ -274,6 +274,17 @@ void LayerTestsCommon::Compare(const InferenceEngine::Blob::Ptr &expected, const
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void LayerTestsCommon::Compare(const InferenceEngine::TensorDesc &actualDesc, const InferenceEngine::TensorDesc &expectedDesc) {
|
||||||
|
auto expectedDims = actualDesc.getDims();
|
||||||
|
auto actualDims = expectedDesc.getDims();
|
||||||
|
ASSERT_EQ(actualDims.size(), expectedDims.size());
|
||||||
|
for (size_t j = 0; j < actualDims.size(); ++j) {
|
||||||
|
ASSERT_EQ(actualDims.at(j), expectedDims.at(j));
|
||||||
|
}
|
||||||
|
ASSERT_EQ(actualDesc.getLayout(), expectedDesc.getLayout());
|
||||||
|
ASSERT_EQ(actualDesc.getPrecision(), expectedDesc.getPrecision());
|
||||||
|
}
|
||||||
|
|
||||||
void LayerTestsCommon::ConfigureNetwork() {
|
void LayerTestsCommon::ConfigureNetwork() {
|
||||||
for (const auto &in : cnnNetwork.getInputsInfo()) {
|
for (const auto &in : cnnNetwork.getInputsInfo()) {
|
||||||
if (inLayout != InferenceEngine::Layout::ANY) {
|
if (inLayout != InferenceEngine::Layout::ANY) {
|
||||||
|
@ -1176,9 +1176,6 @@ void program_impl::set_layout_optimizer_attributes(layout_optimizer& lo) {
|
|||||||
size_t opt_deconv_layers_b_fs_zyx_fsv16 = 0;
|
size_t opt_deconv_layers_b_fs_zyx_fsv16 = 0;
|
||||||
size_t total_crop_layers = 0;
|
size_t total_crop_layers = 0;
|
||||||
|
|
||||||
size_t weighted_sum_feature_size = 0;
|
|
||||||
size_t weight_sum = 0;
|
|
||||||
|
|
||||||
for (auto& node : get_processing_order()) {
|
for (auto& node : get_processing_order()) {
|
||||||
auto &prim = *node;
|
auto &prim = *node;
|
||||||
if (prim.type() == cldnn::convolution::type_id()) {
|
if (prim.type() == cldnn::convolution::type_id()) {
|
||||||
@ -1324,35 +1321,4 @@ void program_impl::set_layout_optimizer_attributes(layout_optimizer& lo) {
|
|||||||
|
|
||||||
if (should_use_bs_fs_yx_bsv16_fsv16)
|
if (should_use_bs_fs_yx_bsv16_fsv16)
|
||||||
lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::bs_fs_yx_bsv16_fsv16_network, 1);
|
lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::bs_fs_yx_bsv16_fsv16_network, 1);
|
||||||
|
|
||||||
|
|
||||||
// This is to avoid using fsv16 for shallow-feature networks.
|
|
||||||
// This may not be exactly same as real execution graph as layer fusing is not done yet,
|
|
||||||
// but it is a reasonable approximation.
|
|
||||||
// Check the expected network efficiency after setting layer optimization attributes.
|
|
||||||
// If network depth is shallow, it is faster with fsv4.
|
|
||||||
for (auto& node : get_processing_order()) {
|
|
||||||
auto &prim = *node;
|
|
||||||
|
|
||||||
if (prim.is_in_data_flow() && prim.type() == cldnn::convolution::type_id()) {
|
|
||||||
size_t num_feature = prim.get_output_layout().size.feature.vector()[0];
|
|
||||||
size_t num_spatial = 1;
|
|
||||||
for (auto s : prim.get_output_layout().size.spatial.vector())
|
|
||||||
num_spatial *= s;
|
|
||||||
|
|
||||||
if (lo.get_preferred_format(prim) != format::b_fs_yx_fsv4) {
|
|
||||||
weight_sum += num_spatial;
|
|
||||||
weighted_sum_feature_size += num_spatial * num_feature;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t weighted_average_feature_depth = weighted_sum_feature_size / std::max(weight_sum, static_cast<size_t>(1));
|
|
||||||
|
|
||||||
// Need to confirm that weighted_average_feature_depth > 1 to keep unittest behavior.
|
|
||||||
if (is_quantized_int8_model && weighted_average_feature_depth < 8 && weighted_average_feature_depth > 1) {
|
|
||||||
lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::fs_b_yx_fsv32_network, 0);
|
|
||||||
lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::b_fs_yx_fsv16_network, 0);
|
|
||||||
lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::bs_fs_yx_bsv16_fsv16_network, 0);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -34,6 +34,8 @@ namespace ngraph
|
|||||||
NGRAPH_API
|
NGRAPH_API
|
||||||
bool is_output(const ngraph::Node* node);
|
bool is_output(const ngraph::Node* node);
|
||||||
NGRAPH_API
|
NGRAPH_API
|
||||||
|
bool is_sink(const ngraph::Node* node);
|
||||||
|
NGRAPH_API
|
||||||
bool is_constant(const ngraph::Node* node);
|
bool is_constant(const ngraph::Node* node);
|
||||||
NGRAPH_API
|
NGRAPH_API
|
||||||
bool is_commutative(const ngraph::Node* node);
|
bool is_commutative(const ngraph::Node* node);
|
||||||
@ -60,6 +62,8 @@ namespace ngraph
|
|||||||
NGRAPH_API
|
NGRAPH_API
|
||||||
bool is_output(const std::shared_ptr<ngraph::Node>& node);
|
bool is_output(const std::shared_ptr<ngraph::Node>& node);
|
||||||
NGRAPH_API
|
NGRAPH_API
|
||||||
|
bool is_sink(const std::shared_ptr<ngraph::Node>& node);
|
||||||
|
NGRAPH_API
|
||||||
bool is_constant(const std::shared_ptr<ngraph::Node>& node);
|
bool is_constant(const std::shared_ptr<ngraph::Node>& node);
|
||||||
NGRAPH_API
|
NGRAPH_API
|
||||||
bool is_commutative(const std::shared_ptr<ngraph::Node>& node);
|
bool is_commutative(const std::shared_ptr<ngraph::Node>& node);
|
||||||
|
@ -76,6 +76,11 @@ bool ngraph::op::is_output(const ngraph::Node* node)
|
|||||||
return dynamic_cast<const ngraph::op::Result*>(node) != nullptr;
|
return dynamic_cast<const ngraph::op::Result*>(node) != nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool ngraph::op::is_sink(const ngraph::Node* node)
|
||||||
|
{
|
||||||
|
return dynamic_cast<const ngraph::op::Sink*>(node) != nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
bool ngraph::op::is_constant(const ngraph::Node* node)
|
bool ngraph::op::is_constant(const ngraph::Node* node)
|
||||||
{
|
{
|
||||||
return dynamic_cast<const ngraph::op::Constant*>(node) != nullptr;
|
return dynamic_cast<const ngraph::op::Constant*>(node) != nullptr;
|
||||||
@ -134,6 +139,10 @@ bool ngraph::op::is_output(const std::shared_ptr<ngraph::Node>& node)
|
|||||||
{
|
{
|
||||||
return is_output(node.get());
|
return is_output(node.get());
|
||||||
}
|
}
|
||||||
|
bool ngraph::op::is_sink(const std::shared_ptr<ngraph::Node>& node)
|
||||||
|
{
|
||||||
|
return is_sink(node.get());
|
||||||
|
}
|
||||||
bool ngraph::op::is_constant(const std::shared_ptr<ngraph::Node>& node)
|
bool ngraph::op::is_constant(const std::shared_ptr<ngraph::Node>& node)
|
||||||
{
|
{
|
||||||
return is_constant(node.get());
|
return is_constant(node.get());
|
||||||
|
@ -7,7 +7,7 @@ skip_install=True
|
|||||||
deps =
|
deps =
|
||||||
-rrequirements.txt
|
-rrequirements.txt
|
||||||
-rrequirements_test.txt
|
-rrequirements_test.txt
|
||||||
mypy
|
mypy<0.900
|
||||||
flake8-bugbear
|
flake8-bugbear
|
||||||
pytest-xdist
|
pytest-xdist
|
||||||
setenv =
|
setenv =
|
||||||
|
@ -288,7 +288,7 @@ set_source_files_properties(includes.cpp PROPERTIES COMPILE_DEFINITIONS
|
|||||||
if (ENABLE_MKL_DNN)
|
if (ENABLE_MKL_DNN)
|
||||||
message(STATUS "NGRAPH_TESTS: IE:CPU enabled")
|
message(STATUS "NGRAPH_TESTS: IE:CPU enabled")
|
||||||
set(ACTIVE_BACKEND_LIST ${ACTIVE_BACKEND_LIST} "IE:CPU")
|
set(ACTIVE_BACKEND_LIST ${ACTIVE_BACKEND_LIST} "IE:CPU")
|
||||||
if (NOT ENABLE_STRICT_DEPENDENCIES)
|
if (ENABLE_STRICT_DEPENDENCIES)
|
||||||
# For convinience add a runtime dependency to build along with this target.
|
# For convinience add a runtime dependency to build along with this target.
|
||||||
# Warning: Parallel build with -GNinja may not be efficient.
|
# Warning: Parallel build with -GNinja may not be efficient.
|
||||||
list(APPEND UNIT_TESTS_DEPENDENCIES MKLDNNPlugin)
|
list(APPEND UNIT_TESTS_DEPENDENCIES MKLDNNPlugin)
|
||||||
@ -298,7 +298,7 @@ endif()
|
|||||||
if (ENABLE_CLDNN)
|
if (ENABLE_CLDNN)
|
||||||
message(STATUS "NGRAPH_TESTS: IE:GPU enabled")
|
message(STATUS "NGRAPH_TESTS: IE:GPU enabled")
|
||||||
set(ACTIVE_BACKEND_LIST ${ACTIVE_BACKEND_LIST} "IE:GPU")
|
set(ACTIVE_BACKEND_LIST ${ACTIVE_BACKEND_LIST} "IE:GPU")
|
||||||
if (NOT ENABLE_STRICT_DEPENDENCIES)
|
if (ENABLE_STRICT_DEPENDENCIES)
|
||||||
# For convinience add a runtime dependency to build along with this target.
|
# For convinience add a runtime dependency to build along with this target.
|
||||||
# Warning: Parallel build with -GNinja may not be efficient.
|
# Warning: Parallel build with -GNinja may not be efficient.
|
||||||
list(APPEND UNIT_TESTS_DEPENDENCIES clDNNPlugin)
|
list(APPEND UNIT_TESTS_DEPENDENCIES clDNNPlugin)
|
||||||
|
Loading…
Reference in New Issue
Block a user