[CPU] [DEBUG CAPS] Extension for snippets and other ngraph transformations (#14223)
This commit is contained in:
parent
40e19dec00
commit
e306cbc67a
@ -14,10 +14,11 @@
|
|||||||
#include "ie_parallel.hpp"
|
#include "ie_parallel.hpp"
|
||||||
#include "ie_system_conf.h"
|
#include "ie_system_conf.h"
|
||||||
|
|
||||||
#include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
|
#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
|
||||||
#include "openvino/core/type/element_type_traits.hpp"
|
#include "openvino/core/type/element_type_traits.hpp"
|
||||||
#include "openvino/runtime/properties.hpp"
|
#include "openvino/runtime/properties.hpp"
|
||||||
#include <cpu/x64/cpu_isa_traits.hpp>
|
#include "utils/debug_capabilities.h"
|
||||||
|
#include "cpu/x64/cpu_isa_traits.hpp"
|
||||||
|
|
||||||
namespace ov {
|
namespace ov {
|
||||||
namespace intel_cpu {
|
namespace intel_cpu {
|
||||||
@ -48,10 +49,24 @@ Config::Config() {
|
|||||||
if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_bf16))
|
if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_bf16))
|
||||||
enforceBF16 = false;
|
enforceBF16 = false;
|
||||||
|
|
||||||
CPU_DEBUG_CAP_ENABLE(readDebugCapsProperties());
|
CPU_DEBUG_CAP_ENABLE(applyDebugCapsProperties());
|
||||||
|
|
||||||
updateProperties();
|
updateProperties();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CPU_DEBUG_CAPS
|
||||||
|
/**
|
||||||
|
* Debug capabilities configuration has more priority than common one
|
||||||
|
* Some of the debug capabilities also require to enable some of common
|
||||||
|
* configuration properties
|
||||||
|
*/
|
||||||
|
void Config::applyDebugCapsProperties() {
|
||||||
|
// always enable perf counters for verbose mode and performance summary
|
||||||
|
if (!debugCaps.verbose.empty() || !debugCaps.summaryPerf.empty())
|
||||||
|
collectPerfCounters = true;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
void Config::readProperties(const std::map<std::string, std::string> &prop) {
|
void Config::readProperties(const std::map<std::string, std::string> &prop) {
|
||||||
const auto streamExecutorConfigKeys = streamExecutorConfig.SupportedKeys();
|
const auto streamExecutorConfigKeys = streamExecutorConfig.SupportedKeys();
|
||||||
const auto hintsConfigKeys = perfHintsConfig.SupportedKeys();
|
const auto hintsConfigKeys = perfHintsConfig.SupportedKeys();
|
||||||
@ -184,7 +199,7 @@ void Config::readProperties(const std::map<std::string, std::string> &prop) {
|
|||||||
if (exclusiveAsyncRequests) // Exclusive request feature disables the streams
|
if (exclusiveAsyncRequests) // Exclusive request feature disables the streams
|
||||||
streamExecutorConfig._streams = 1;
|
streamExecutorConfig._streams = 1;
|
||||||
|
|
||||||
CPU_DEBUG_CAP_ENABLE(readDebugCapsProperties());
|
CPU_DEBUG_CAP_ENABLE(applyDebugCapsProperties());
|
||||||
updateProperties();
|
updateProperties();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -239,58 +254,6 @@ void Config::updateProperties() {
|
|||||||
_config.insert({PluginConfigParams::KEY_CACHE_DIR, cache_dir});
|
_config.insert({PluginConfigParams::KEY_CACHE_DIR, cache_dir});
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CPU_DEBUG_CAPS
|
|
||||||
void Config::readDebugCapsProperties() {
|
|
||||||
auto readEnv = [](const char* envVar) {
|
|
||||||
return std::getenv(envVar);
|
|
||||||
};
|
|
||||||
|
|
||||||
auto parseDumpFormat = [](const std::string& format) {
|
|
||||||
if (format == "BIN")
|
|
||||||
return FORMAT::BIN;
|
|
||||||
else if (format == "TEXT")
|
|
||||||
return FORMAT::TEXT;
|
|
||||||
else
|
|
||||||
IE_THROW() << "readDebugCapsProperties: Unknown dump format";
|
|
||||||
};
|
|
||||||
|
|
||||||
const char* envVarValue = nullptr;
|
|
||||||
|
|
||||||
if (envVarValue = readEnv("OV_CPU_EXEC_GRAPH_PATH"))
|
|
||||||
execGraphPath = envVarValue;
|
|
||||||
|
|
||||||
if (envVarValue = readEnv("OV_CPU_VERBOSE"))
|
|
||||||
verbose = envVarValue;
|
|
||||||
|
|
||||||
if (envVarValue = readEnv("OV_CPU_BLOB_DUMP_DIR"))
|
|
||||||
blobDumpDir = envVarValue;
|
|
||||||
|
|
||||||
if (envVarValue = readEnv("OV_CPU_BLOB_DUMP_FORMAT"))
|
|
||||||
blobDumpFormat = parseDumpFormat(envVarValue);
|
|
||||||
|
|
||||||
if (envVarValue = readEnv("OV_CPU_BLOB_DUMP_NODE_EXEC_ID"))
|
|
||||||
blobDumpFilters[BY_EXEC_ID] = envVarValue;
|
|
||||||
|
|
||||||
if (envVarValue = readEnv("OV_CPU_BLOB_DUMP_NODE_PORTS"))
|
|
||||||
blobDumpFilters[BY_PORTS] = envVarValue;
|
|
||||||
|
|
||||||
if (envVarValue = readEnv("OV_CPU_BLOB_DUMP_NODE_TYPE"))
|
|
||||||
blobDumpFilters[BY_TYPE] = envVarValue;
|
|
||||||
|
|
||||||
if (envVarValue = readEnv("OV_CPU_BLOB_DUMP_NODE_NAME"))
|
|
||||||
blobDumpFilters[BY_NAME] = envVarValue;
|
|
||||||
|
|
||||||
if (envVarValue = readEnv("OV_CPU_SUMMARY_PERF")) {
|
|
||||||
collectPerfCounters = true;
|
|
||||||
summaryPerf = envVarValue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// always enable perf counters for verbose mode
|
|
||||||
if (!verbose.empty())
|
|
||||||
collectPerfCounters = true;
|
|
||||||
}
|
|
||||||
#endif // CPU_DEBUG_CAPS
|
|
||||||
|
|
||||||
} // namespace intel_cpu
|
} // namespace intel_cpu
|
||||||
} // namespace ov
|
} // namespace ov
|
||||||
|
|
||||||
|
@ -6,8 +6,11 @@
|
|||||||
|
|
||||||
#include <threading/ie_istreams_executor.hpp>
|
#include <threading/ie_istreams_executor.hpp>
|
||||||
#include <ie_performance_hints.hpp>
|
#include <ie_performance_hints.hpp>
|
||||||
#include "utils/debug_capabilities.h"
|
#include <ie/ie_common.h>
|
||||||
|
#include <openvino/util/common_util.hpp>
|
||||||
|
#include "utils/debug_caps_config.h"
|
||||||
|
|
||||||
|
#include <bitset>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
@ -57,31 +60,12 @@ struct Config {
|
|||||||
|
|
||||||
std::map<std::string, std::string> _config;
|
std::map<std::string, std::string> _config;
|
||||||
|
|
||||||
#ifdef CPU_DEBUG_CAPS
|
|
||||||
enum FILTER {
|
|
||||||
BY_PORTS,
|
|
||||||
BY_EXEC_ID,
|
|
||||||
BY_TYPE,
|
|
||||||
BY_NAME,
|
|
||||||
};
|
|
||||||
|
|
||||||
enum class FORMAT {
|
|
||||||
BIN,
|
|
||||||
TEXT,
|
|
||||||
};
|
|
||||||
|
|
||||||
std::string execGraphPath;
|
|
||||||
std::string verbose;
|
|
||||||
std::string blobDumpDir = "cpu_dump";
|
|
||||||
FORMAT blobDumpFormat = FORMAT::TEXT;
|
|
||||||
// std::hash<int> is necessary for Ubuntu-16.04 (gcc-5.4 and defect in C++11 standart)
|
|
||||||
std::unordered_map<FILTER, std::string, std::hash<int>> blobDumpFilters;
|
|
||||||
std::string summaryPerf = "";
|
|
||||||
|
|
||||||
void readDebugCapsProperties();
|
|
||||||
#endif
|
|
||||||
|
|
||||||
bool isNewApi = true;
|
bool isNewApi = true;
|
||||||
|
|
||||||
|
#ifdef CPU_DEBUG_CAPS
|
||||||
|
DebugCapsConfig debugCaps;
|
||||||
|
void applyDebugCapsProperties();
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace intel_cpu
|
} // namespace intel_cpu
|
||||||
|
@ -6,6 +6,7 @@ Use the following cmake option to enable debug capabilities:
|
|||||||
* [Verbose mode](verbose.md)
|
* [Verbose mode](verbose.md)
|
||||||
* [Blob dumping](blob_dumping.md)
|
* [Blob dumping](blob_dumping.md)
|
||||||
* [Graph serialization](graph_serialization.md)
|
* [Graph serialization](graph_serialization.md)
|
||||||
|
* [Graph transformation disabling](feature_disabling.md#graph-transformations)
|
||||||
|
|
||||||
## Debug log
|
## Debug log
|
||||||
|
|
||||||
|
@ -29,8 +29,8 @@ Default is *cpu_dump*
|
|||||||
OV_CPU_BLOB_DUMP_FORMAT=<format> binary ...
|
OV_CPU_BLOB_DUMP_FORMAT=<format> binary ...
|
||||||
```
|
```
|
||||||
Options are:
|
Options are:
|
||||||
* BIN (default)
|
* BIN
|
||||||
* TEXT
|
* TEXT (default)
|
||||||
|
|
||||||
## Filter input / output blobs
|
## Filter input / output blobs
|
||||||
To dump only input / output blobs:
|
To dump only input / output blobs:
|
||||||
|
52
src/plugins/intel_cpu/src/docs/debug_caps_filters.md
Normal file
52
src/plugins/intel_cpu/src/docs/debug_caps_filters.md
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
# Filters
|
||||||
|
|
||||||
|
Filters described below have the following common format:
|
||||||
|
```sh
|
||||||
|
filter_name=<comma_separated_tokens>
|
||||||
|
```
|
||||||
|
Tokens are processed from left to right and each one includes or excludes corresponding value.\
|
||||||
|
For exclusion token is just prepended by minus: *-token*\
|
||||||
|
All tokens are case insensitive and no tokens is treated as *all*\
|
||||||
|
So filters below are equal:
|
||||||
|
* filter_name
|
||||||
|
* filter_name=all
|
||||||
|
* filter_name=-all,ALL
|
||||||
|
|
||||||
|
## IR format filter
|
||||||
|
|
||||||
|
IR format filter is used to specify output IR formats, e.g. for [serialization](graph_serialization.md#graph-transformations).
|
||||||
|
```sh
|
||||||
|
formats=<comma_separated_tokens>
|
||||||
|
```
|
||||||
|
|
||||||
|
The following tokens are supported:
|
||||||
|
* all\
|
||||||
|
equals to <xml,dot,svg>
|
||||||
|
* xml (default)\
|
||||||
|
IR in .xml file. Can be opened using, for example, *netron* app. (For now the option is Linux only)
|
||||||
|
* xmlbin\
|
||||||
|
IR in .xml and .bin files. Can be opened using, for example, *netron* app.
|
||||||
|
* dot\
|
||||||
|
IR in .dot file (.svg.dot file if svg is also specified). Can be inspected using, for example, *graphviz* tools.
|
||||||
|
* svg\
|
||||||
|
IR in .svg file. Requires *dot* tool to be installed on the host, not supported on Windows.\
|
||||||
|
Generation is based on dot representation, so IR is additionally dumped to .svg.dot file.
|
||||||
|
|
||||||
|
## Transformation filter
|
||||||
|
|
||||||
|
Transformation filter is used to specify main graph transformation stages for different purposes,
|
||||||
|
e.g. for [disabling](feature_disabling.md#graph-transformation) or [serialization](graph_serialization.md#graph-transformations).
|
||||||
|
```sh
|
||||||
|
transformations=<comma_separated_tokens>
|
||||||
|
```
|
||||||
|
|
||||||
|
The following tokens are supported:
|
||||||
|
* all (default)\
|
||||||
|
equals to <preLpt,lpt,postLpt,snippets,specific>
|
||||||
|
* common \
|
||||||
|
equals to <preLpt,postLpt>
|
||||||
|
* preLpt
|
||||||
|
* lpt
|
||||||
|
* postLpt
|
||||||
|
* snippets
|
||||||
|
* specific
|
26
src/plugins/intel_cpu/src/docs/feature_disabling.md
Normal file
26
src/plugins/intel_cpu/src/docs/feature_disabling.md
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
# Feature disabling
|
||||||
|
|
||||||
|
Common way to disable something in CPU plugin is implied by means of environment variable **OV_CPU_DISABLE**:
|
||||||
|
```sh
|
||||||
|
OV_CPU_DISABLE=<space_separated_options> binary ...
|
||||||
|
```
|
||||||
|
Option names are case insensitive and processed from left to right,\
|
||||||
|
so last one overwrites previous ones if duplicated.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
```sh
|
||||||
|
OV_CPU_DISABLE="transformations" binary ...
|
||||||
|
OV_CPU_DISABLE="transformations=lpt" binary ...
|
||||||
|
OV_CPU_DISABLE="transformations=all,-common" binary ...
|
||||||
|
```
|
||||||
|
|
||||||
|
By means of corresponding options **OV_CPU_DISABLE** controls disabling of the following features:
|
||||||
|
|
||||||
|
## Graph transformations
|
||||||
|
|
||||||
|
Graph transformation disabling is controlled by the following option inside **OV_CPU_DISABLE**:
|
||||||
|
```sh
|
||||||
|
transformations=<comma_separated_tokens>
|
||||||
|
```
|
||||||
|
Filter with main transformation stages to disable specified ones.\
|
||||||
|
See [transformation filter](debug_caps_filters.md#transformation-filter) for more details.
|
@ -1,17 +1,43 @@
|
|||||||
# Graph serialization
|
# Graph serialization
|
||||||
|
|
||||||
The functionality allows to serialize execution graph using environment variable:
|
Graph serialization is disabled by default and controlled by environment variables.
|
||||||
|
|
||||||
|
## Execution graph
|
||||||
|
|
||||||
|
Execution graph could be serialized using environment variable **OV_CPU_EXEC_GRAPH_PATH**:
|
||||||
```sh
|
```sh
|
||||||
OV_CPU_EXEC_GRAPH_PATH=<path> binary ...
|
OV_CPU_EXEC_GRAPH_PATH=<option> binary ...
|
||||||
|
```
|
||||||
|
Possible serialization options:
|
||||||
|
* cout\
|
||||||
|
Serialize to console output.
|
||||||
|
* \<path\>.xml\
|
||||||
|
Serialize graph into .xml and .bin files. Can be opened using, for example, *netron* app.
|
||||||
|
* **TBD**: \<path\>.dot\
|
||||||
|
Serialize graph into .dot file. Can be inspected using, for example, *graphviz* tools.
|
||||||
|
|
||||||
|
## Graph transformations
|
||||||
|
|
||||||
|
Additionally, IR could be serialized at specified stages using environment variable **OV_CPU_DUMP_IR**:
|
||||||
|
```sh
|
||||||
|
OV_CPU_DUMP_IR=<space_separated_options> binary ...
|
||||||
```
|
```
|
||||||
|
|
||||||
Possible serialization options:
|
Examples:
|
||||||
* cout
|
```sh
|
||||||
|
OV_CPU_DUMP_IR="transformations" binary ...
|
||||||
|
OV_CPU_DUMP_IR="transformations=snippets dir=path/dumpDir" binary ...
|
||||||
|
OV_CPU_DUMP_IR="transformations=all,-common DIR=path/dumpdir formats=svg,xml" binary ...
|
||||||
|
```
|
||||||
|
|
||||||
Serialize to console output
|
Option names are case insensitive, the following options are supported:
|
||||||
* \<path\>.xml
|
* dir=\<path\>\
|
||||||
|
Path to dumped IR files. If omitted, it defaults to *intel_cpu_dump*
|
||||||
|
* formats=<comma_separated_tokens>\
|
||||||
|
Filter with IR formats to dump. If omitted, it defaults to *xml*\
|
||||||
|
See [IR format filter](debug_caps_filters.md#ir-format-filter) for more details.
|
||||||
|
* transformations=<comma_separated_tokens>\
|
||||||
|
Filter with main transformation stages to serialize graph before and after specified ones.\
|
||||||
|
See [transformation filter](debug_caps_filters.md#transformation-filter) for more details.
|
||||||
|
|
||||||
Serialize graph into .xml and .bin files. Can be opened using, for example, *netron* app
|
Options are processed from left to right, so last one overwrites previous ones if duplicated.
|
||||||
* \<path\>.dot
|
|
||||||
|
|
||||||
TBD. Serialize graph into .dot file. Can be inspected using, for example, *graphviz* tools.
|
|
||||||
|
@ -1073,7 +1073,7 @@ void Graph::InferStatic(InferRequestBase* request) {
|
|||||||
dnnl::stream stream(eng);
|
dnnl::stream stream(eng);
|
||||||
|
|
||||||
for (const auto& node : executableGraphNodes) {
|
for (const auto& node : executableGraphNodes) {
|
||||||
VERBOSE(node, config.verbose);
|
VERBOSE(node, config.debugCaps.verbose);
|
||||||
PERF(node, config.collectPerfCounters);
|
PERF(node, config.collectPerfCounters);
|
||||||
|
|
||||||
if (request)
|
if (request)
|
||||||
@ -1160,7 +1160,7 @@ void Graph::InferDynamic(InferRequestBase* request) {
|
|||||||
updateNodes(stopIndx);
|
updateNodes(stopIndx);
|
||||||
for (; inferCounter < stopIndx; ++inferCounter) {
|
for (; inferCounter < stopIndx; ++inferCounter) {
|
||||||
auto& node = executableGraphNodes[inferCounter];
|
auto& node = executableGraphNodes[inferCounter];
|
||||||
VERBOSE(node, config.verbose);
|
VERBOSE(node, config.debugCaps.verbose);
|
||||||
PERF(node, config.collectPerfCounters);
|
PERF(node, config.collectPerfCounters);
|
||||||
|
|
||||||
if (request)
|
if (request)
|
||||||
@ -1171,7 +1171,7 @@ void Graph::InferDynamic(InferRequestBase* request) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
inline void Graph::ExecuteNode(const NodePtr& node, const dnnl::stream& stream) const {
|
inline void Graph::ExecuteNode(const NodePtr& node, const dnnl::stream& stream) const {
|
||||||
DUMP(node, config, infer_count);
|
DUMP(node, config.debugCaps, infer_count);
|
||||||
OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, node->profiling.execute);
|
OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, node->profiling.execute);
|
||||||
|
|
||||||
if (node->isDynamicNode()) {
|
if (node->isDynamicNode()) {
|
||||||
|
@ -210,7 +210,7 @@ std::shared_ptr<ngraph::Function> dump_graph_as_ie_ngraph_net(const Graph &graph
|
|||||||
|
|
||||||
#ifdef CPU_DEBUG_CAPS
|
#ifdef CPU_DEBUG_CAPS
|
||||||
void serialize(const Graph &graph) {
|
void serialize(const Graph &graph) {
|
||||||
const std::string& path = graph.getConfig().execGraphPath;
|
const std::string& path = graph.getConfig().debugCaps.execGraphPath;
|
||||||
|
|
||||||
if (path.empty())
|
if (path.empty())
|
||||||
return;
|
return;
|
||||||
@ -257,7 +257,7 @@ void serializeToCout(const Graph &graph) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void summary_perf(const Graph &graph) {
|
void summary_perf(const Graph &graph) {
|
||||||
const std::string& summaryPerf = graph.getConfig().summaryPerf;
|
const std::string& summaryPerf = graph.getConfig().debugCaps.summaryPerf;
|
||||||
|
|
||||||
if (summaryPerf.empty())
|
if (summaryPerf.empty())
|
||||||
return;
|
return;
|
||||||
|
@ -27,6 +27,7 @@ namespace intel_cpu {
|
|||||||
|
|
||||||
inline void ConvertToCPUSpecificOpset(std::shared_ptr<ngraph::Function> &nGraphFunc) {
|
inline void ConvertToCPUSpecificOpset(std::shared_ptr<ngraph::Function> &nGraphFunc) {
|
||||||
RUN_ON_FUNCTION_SCOPE(ConvertToCPUSpecificOpset);
|
RUN_ON_FUNCTION_SCOPE(ConvertToCPUSpecificOpset);
|
||||||
|
|
||||||
ngraph::pass::Manager manager;
|
ngraph::pass::Manager manager;
|
||||||
manager.register_pass<ConvertMatMulToFC>();
|
manager.register_pass<ConvertMatMulToFC>();
|
||||||
manager.register_pass<AlignMatMulInputRanks>();
|
manager.register_pass<AlignMatMulInputRanks>();
|
||||||
|
@ -2,140 +2,28 @@
|
|||||||
// SPDX-License-Identifier: Apache-2.0
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
//
|
//
|
||||||
|
|
||||||
#include "ie_metric_helpers.hpp"
|
#include "ie_metric_helpers.hpp" // must be included first
|
||||||
#include "plugin.h"
|
|
||||||
#include "extension_mngr.h"
|
#include "plugin.h"
|
||||||
#include "weights_cache.hpp"
|
|
||||||
#include "extension.h"
|
#include "transformation_pipeline.h"
|
||||||
#include "itt.h"
|
#include "itt.h"
|
||||||
#include "serialize.h"
|
#include "extension_mngr.h"
|
||||||
|
#include "extension.h"
|
||||||
|
#include "serialize.h"
|
||||||
|
#include "threading/ie_executor_manager.hpp"
|
||||||
|
|
||||||
|
#include "ie_icore.hpp"
|
||||||
|
#include "ie_plugin_config.hpp"
|
||||||
|
#include "ie_system_conf.h"
|
||||||
|
#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
|
||||||
|
|
||||||
#include <threading/ie_executor_manager.hpp>
|
|
||||||
#include <memory>
|
|
||||||
#include <ie_plugin_config.hpp>
|
|
||||||
#include <cpp_interfaces/interface/ie_internal_plugin_config.hpp>
|
|
||||||
#include <ie_icore.hpp>
|
|
||||||
#include <fstream>
|
|
||||||
#include <vector>
|
|
||||||
#include <tuple>
|
|
||||||
#include <unordered_set>
|
|
||||||
#include <ie_system_conf.h>
|
|
||||||
#include <ie_ngraph_utils.hpp>
|
#include <ie_ngraph_utils.hpp>
|
||||||
|
|
||||||
|
|
||||||
#include <transformations/common_optimizations/add_fake_quantize_fusion.hpp>
|
|
||||||
#include <transformations/common_optimizations/common_optimizations.hpp>
|
|
||||||
#include <transformations/common_optimizations/fq_mul_fusion.hpp>
|
|
||||||
#include <transformations/common_optimizations/mul_fake_quantize_fusion.hpp>
|
|
||||||
#include <transformations/common_optimizations/weights_dequantize_to_fake_quantize.hpp>
|
|
||||||
#include <transformations/common_optimizations/convert_quantize_dequantize.hpp>
|
|
||||||
#include <transformations/common_optimizations/nop_elimination.hpp>
|
|
||||||
#include <transformations/common_optimizations/wrap_interpolate_into_transposes.hpp>
|
|
||||||
#include <transformations/common_optimizations/transpose_sinking.hpp>
|
|
||||||
#include "transformations/common_optimizations/convert_compression_only_to_legacy.hpp"
|
|
||||||
#include <transformations/common_optimizations/lin_op_sequence_fusion.hpp>
|
|
||||||
|
|
||||||
#include <transformations/opset_conversions/convert_opset3_to_opset2.hpp>
|
|
||||||
#include <transformations/opset_conversions/convert_opset2_to_opset1.hpp>
|
|
||||||
#include <transformations/op_conversions/convert_broadcast_to_tiles.hpp>
|
|
||||||
#include <transformations/op_conversions/convert_depth_to_space.hpp>
|
|
||||||
#include <transformations/op_conversions/convert_shuffle_channels3.hpp>
|
|
||||||
#include <transformations/op_conversions/convert_slice_to_strided_slice.hpp>
|
|
||||||
#include <transformations/op_conversions/convert_space_to_depth.hpp>
|
|
||||||
#include <transformations/op_conversions/convert_gelu.hpp>
|
|
||||||
#include <transformations/op_conversions/convert_gather_downgrade.hpp>
|
|
||||||
#include <transformations/op_conversions/convert_gather_upgrade.hpp>
|
|
||||||
#include <transformations/op_conversions/detection_output_downgrade.hpp>
|
|
||||||
#include <transformations/op_conversions/detection_output_upgrade.hpp>
|
|
||||||
#include <transformations/op_conversions/gelu7_downgrade.hpp>
|
|
||||||
#include <transformations/op_conversions/hswish_decomposition.hpp>
|
|
||||||
#include <transformations/op_conversions/hsigmoid_decomposition.hpp>
|
|
||||||
#include <transformations/op_conversions/mvn6_decomposition.hpp>
|
|
||||||
#include <transformations/op_conversions/normalize_l2_decomposition.hpp>
|
|
||||||
#include <transformations/op_conversions/reduce_l1_decomposition.hpp>
|
|
||||||
#include <transformations/op_conversions/reduce_l2_decomposition.hpp>
|
|
||||||
#include <transformations/op_conversions/softplus_decomposition.hpp>
|
|
||||||
#include <transformations/op_conversions/convert_space_to_batch.hpp>
|
|
||||||
#include <transformations/op_conversions/convert_batch_to_space.hpp>
|
|
||||||
#include <transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp>
|
|
||||||
#include <transformations/op_conversions/convert_subtract.hpp>
|
|
||||||
#include <transformations/op_conversions/softmax_decomposition.hpp>
|
|
||||||
#include <transformations/control_flow/unroll_tensor_iterator.hpp>
|
|
||||||
#include <transformations/op_conversions/convert_mod.hpp>
|
|
||||||
#include <transformations/op_conversions/convert_ti_to_sequences.hpp>
|
|
||||||
#include <transformations/op_conversions/lstm_cell_decomposition.hpp>
|
|
||||||
#include <transformations/op_conversions/rnn_cell_decomposition.hpp>
|
|
||||||
#include <transformations/op_conversions/gru_cell_decomposition.hpp>
|
|
||||||
#include <transformations/op_conversions/log_softmax_decomposition.hpp>
|
|
||||||
#include <transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp>
|
|
||||||
#include <transformations/op_conversions/simplify_ctc_greedy_decoder_seq_len.hpp>
|
|
||||||
#include <transformations/op_conversions/convert_previous_nms_to_nms_9.hpp>
|
|
||||||
#include <transformations/op_conversions/convert_nms9_to_nms_ie_internal.hpp>
|
|
||||||
#include <transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.hpp>
|
|
||||||
#include <transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.hpp>
|
|
||||||
#include <transformations/op_conversions/convert_deformable_conv_v8_to_v1.hpp>
|
|
||||||
#include <transformations/smart_reshape/matmul_sr.hpp>
|
|
||||||
#include <transformations/op_conversions/convert_minimum_to_power_and_max.hpp>
|
|
||||||
#include <transformations/op_conversions/convert_reduce_to_pooling.hpp>
|
|
||||||
#include <transformations/convert_precision.hpp>
|
|
||||||
#include <transformations/init_node_info.hpp>
|
|
||||||
#include <transformations/disable_decompression_convert_constant_folding.hpp>
|
|
||||||
#include <transformations/rt_info/fused_names_attribute.hpp>
|
|
||||||
#include <transformations/op_conversions/fq_decomposition.hpp>
|
|
||||||
#include <transformations/utils/utils.hpp>
|
|
||||||
#include <transformations/op_conversions/convert_roi_align_v9_to_v3.hpp>
|
|
||||||
#include <transformations/op_conversions/convert_roi_align_v3_to_v9.hpp>
|
|
||||||
#include <transformations/op_conversions/softsign_decomposition.hpp>
|
|
||||||
#include "transformations/op_conversions/eye_decomposition.hpp"
|
|
||||||
#include "transformations/op_conversions/unique_decomposition.hpp"
|
|
||||||
|
|
||||||
#include "ngraph_transformations/convert_to_cpu_specific_opset.hpp"
|
|
||||||
#include "ngraph_transformations/snippets_mark_skipped.hpp"
|
|
||||||
#include "ngraph_transformations/mha_fusion.hpp"
|
|
||||||
#include "ngraph_transformations/convert_to_interaction.hpp"
|
|
||||||
#include "ngraph_transformations/convert_fq_rnn_to_quantized_rnn.hpp"
|
|
||||||
#include "ngraph_transformations/move_eltwise_up_data_movement.hpp"
|
|
||||||
#include "ngraph_transformations/swap_convert_transpose.hpp"
|
|
||||||
|
|
||||||
#include <snippets/pass/collapse_subgraph.hpp>
|
|
||||||
#include <snippets/pass/common_optimizations.hpp>
|
|
||||||
#include <snippets/pass/convert_constants.hpp>
|
|
||||||
|
|
||||||
#include <ngraph/opsets/opset1.hpp>
|
|
||||||
#include <ngraph/opsets/opset2.hpp>
|
|
||||||
#include <ngraph/opsets/opset3.hpp>
|
|
||||||
#include <ngraph/opsets/opset4.hpp>
|
|
||||||
#include <ngraph/opsets/opset5.hpp>
|
|
||||||
#include <ngraph/opsets/opset6.hpp>
|
|
||||||
#include <openvino/opsets/opset10.hpp>
|
|
||||||
#include <ngraph/op/util/op_types.hpp>
|
|
||||||
#include <ngraph/pass/manager.hpp>
|
|
||||||
#include <ngraph/graph_util.hpp>
|
|
||||||
#include <ov_ops/augru_cell.hpp>
|
|
||||||
#include <ov_ops/augru_sequence.hpp>
|
|
||||||
|
|
||||||
#include <transformations/low_precision/mark_dequantization_subgraph.hpp>
|
|
||||||
#include <low_precision/common/quantization_granularity_restriction.hpp>
|
|
||||||
#include <low_precision/common/precisions_restriction.hpp>
|
|
||||||
#include <low_precision/convert_subtract_constant.hpp>
|
|
||||||
#include <low_precision/convolution.hpp>
|
|
||||||
#include <low_precision/convolution_backprop_data.hpp>
|
|
||||||
#include <low_precision/layer_transformation.hpp>
|
|
||||||
#include <low_precision/low_precision.hpp>
|
|
||||||
#include <low_precision/multiply_to_group_convolution.hpp>
|
|
||||||
#include <low_precision/network_helper.hpp>
|
|
||||||
#include "openvino/runtime/core.hpp"
|
|
||||||
#include "openvino/util/common_util.hpp"
|
|
||||||
|
|
||||||
#include <ie_algorithm.hpp>
|
|
||||||
#include "performance_heuristics.hpp"
|
#include "performance_heuristics.hpp"
|
||||||
|
|
||||||
#include "nodes/mvn.h"
|
#include "weights_cache.hpp"
|
||||||
#include "nodes/fake_quantize.h"
|
|
||||||
#include "nodes/normalize.h"
|
|
||||||
#include "nodes/mha.h"
|
|
||||||
#include "utils/denormals.hpp"
|
#include "utils/denormals.hpp"
|
||||||
#include "transformations/common_optimizations/augru_cell_fusion.hpp"
|
|
||||||
|
|
||||||
#if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
|
#if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
|
||||||
#ifndef __GNUC_PREREQ
|
#ifndef __GNUC_PREREQ
|
||||||
@ -262,452 +150,6 @@ Engine::~Engine() {
|
|||||||
executorManager()->clear("CPUCallbackExecutor");
|
executorManager()->clear("CPUCallbackExecutor");
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool fuse_type_to_convert(const std::shared_ptr<ngraph::Node>& node, ov::element::Type to, size_t idx) {
|
|
||||||
if (auto convert = ov::as_type_ptr<ov::opset10::Convert>(node)) {
|
|
||||||
// For Convert node, converting precision from floating point to boolean will lead to mathematical
|
|
||||||
// error, because here the output precision boolean is replaced by u8. E.g. floating point value 0.01
|
|
||||||
// is converted to be 1 for boolean, but 0 for u8. Thus an Abs and Ceil node should be added before the
|
|
||||||
// Convert node for this scenario.
|
|
||||||
if (convert->input(0).get_element_type().is_real() &&
|
|
||||||
convert->get_convert_element_type() == ngraph::element::boolean && to.is_integral_number()) {
|
|
||||||
auto abs = std::make_shared<ov::opset10::Abs>(convert->input_value(0).get_node_shared_ptr());
|
|
||||||
auto ceil = std::make_shared<ov::opset10::Ceiling>(abs);
|
|
||||||
auto new_convert = std::make_shared<ov::opset10::Convert>(ceil, to);
|
|
||||||
new_convert->set_friendly_name(convert->get_friendly_name());
|
|
||||||
ov::copy_runtime_info(convert, {abs, ceil, new_convert});
|
|
||||||
ov::replace_node(convert, new_convert);
|
|
||||||
return true;
|
|
||||||
} else {
|
|
||||||
convert->set_convert_element_type(to);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void TransformationUpToCPUSpecificOpSet(std::shared_ptr<ngraph::Function> nGraphFunc, const bool _enableLPT, const bool _enableBF16,
|
|
||||||
const bool _enableSnippets, const bool isLegacyApi) {
|
|
||||||
ov::pass::Manager manager;
|
|
||||||
manager.set_per_pass_validation(false);
|
|
||||||
manager.register_pass<ov::pass::InitNodeInfo>();
|
|
||||||
|
|
||||||
const bool useLpt =
|
|
||||||
_enableLPT &&
|
|
||||||
ngraph::pass::low_precision::LowPrecision::isFunctionQuantized(nGraphFunc);
|
|
||||||
auto defaultPrecisions = useLpt ? ngraph::pass::low_precision::precision_set::int8_support : std::vector<ov::element::Type>{};
|
|
||||||
bool hasINT16orINT32Levels = false;
|
|
||||||
if (useLpt) {
|
|
||||||
CPU_LPT_SCOPE(LowPrecisionTransformations_Part1);
|
|
||||||
hasINT16orINT32Levels = ngraph::pass::low_precision::LowPrecision::isFQLevelsPresent(
|
|
||||||
nGraphFunc,
|
|
||||||
{ngraph::pass::low_precision::levels::int16, ngraph::pass::low_precision::levels::int16_narrow_range,
|
|
||||||
ngraph::pass::low_precision::levels::int32, ngraph::pass::low_precision::levels::int32_narrow_range});
|
|
||||||
if (hasINT16orINT32Levels) {
|
|
||||||
defaultPrecisions = ngraph::pass::low_precision::precision_set::int8_int16_int32_support;
|
|
||||||
}
|
|
||||||
manager.register_pass<ov::pass::MarkDequantizationSubgraph>(defaultPrecisions);
|
|
||||||
}
|
|
||||||
auto get_convert_precisions = []() {
|
|
||||||
precisions_array array = {
|
|
||||||
{ngraph::element::i64, ngraph::element::i32},
|
|
||||||
{ngraph::element::u64, ngraph::element::i32},
|
|
||||||
{ngraph::element::i16, ngraph::element::i32},
|
|
||||||
{ngraph::element::u16, ngraph::element::i32},
|
|
||||||
{ngraph::element::u32, ngraph::element::i32},
|
|
||||||
{ngraph::element::f64, ngraph::element::f32},
|
|
||||||
{ngraph::element::f16, ngraph::element::f32},
|
|
||||||
{ngraph::element::boolean, ngraph::element::u8},
|
|
||||||
{ngraph::element::i4, ngraph::element::i8},
|
|
||||||
{ngraph::element::u4, ngraph::element::u8}
|
|
||||||
};
|
|
||||||
|
|
||||||
if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core))
|
|
||||||
array.push_back({ngraph::element::bf16, ngraph::element::f32});
|
|
||||||
|
|
||||||
return array;
|
|
||||||
};
|
|
||||||
|
|
||||||
static const auto precisions = get_convert_precisions();
|
|
||||||
type_to_fuse_map type_to_fuse = {{ov::opset10::Convert::get_type_info_static(), fuse_type_to_convert}};
|
|
||||||
|
|
||||||
manager.register_pass<ov::pass::AUGRUCellFusion>();
|
|
||||||
manager.register_pass<ov::pass::CommonOptimizations>();
|
|
||||||
manager.register_pass<ov::pass::WrapInterpolateIntoTransposes>();
|
|
||||||
manager.register_pass<ov::pass::TransposeSinking>();
|
|
||||||
manager.register_pass<ov::pass::ConvertSequenceToTensorIterator>();
|
|
||||||
manager.register_pass<ov::pass::ConvertOpSet3ToOpSet2>();
|
|
||||||
manager.register_pass<ov::pass::ConvertOpSet2ToOpSet1>();
|
|
||||||
manager.register_pass<ov::pass::LSTMCellDecomposition>();
|
|
||||||
manager.register_pass<ov::pass::GRUCellDecomposition>();
|
|
||||||
manager.register_pass<ov::pass::RNNCellDecomposition>();
|
|
||||||
manager.register_pass<ov::pass::ConvertNMS1ToNMS9>();
|
|
||||||
manager.register_pass<ov::pass::ConvertNMS3ToNMS9>();
|
|
||||||
manager.register_pass<ov::pass::ConvertNMS4ToNMS9>();
|
|
||||||
manager.register_pass<ov::pass::ConvertNMS5ToNMS9>();
|
|
||||||
manager.register_pass<ov::pass::ConvertNMS9ToNMSIEInternal>();
|
|
||||||
manager.register_pass<ov::pass::ConvertMulticlassNmsToMulticlassNmsIE>();
|
|
||||||
manager.register_pass<ov::pass::ConvertMatrixNmsToMatrixNmsIE>();
|
|
||||||
manager.register_pass<ov::pass::TransposeMatMul>();
|
|
||||||
manager.register_pass<ov::pass::ConstantFolding>();
|
|
||||||
|
|
||||||
if (useLpt) {
|
|
||||||
CPU_LPT_SCOPE(LowPrecisionTransformations_Part2);
|
|
||||||
manager.register_pass<ngraph::pass::low_precision::ConvertSubtractConstant>(defaultPrecisions);
|
|
||||||
}
|
|
||||||
manager.register_pass<ov::pass::Validate>();
|
|
||||||
manager.register_pass<ov::pass::ConvertPrecision>(precisions, type_to_fuse);
|
|
||||||
manager.register_pass<ov::pass::EliminateConvert>();
|
|
||||||
manager.register_pass<SwapConvertTranspose>();
|
|
||||||
manager.register_pass<ConvertToInteraction>();
|
|
||||||
manager.register_pass<ConvertInteractionInt8>();
|
|
||||||
|
|
||||||
auto pass_config = manager.get_pass_config();
|
|
||||||
|
|
||||||
using const_node_ptr = const std::shared_ptr<const ngraph::Node>;
|
|
||||||
|
|
||||||
// SpaceToDepth/ DepthToSpace node implementation supports only equal input/output tensors with rank <= 5
|
|
||||||
pass_config->set_callback<ov::pass::ConvertSpaceToDepth,
|
|
||||||
ov::pass::ConvertDepthToSpace>(
|
|
||||||
[](const_node_ptr &node) -> bool {
|
|
||||||
return node->input_value(0).get_shape().size() <= 5lu &&
|
|
||||||
node->input_value(0).get_shape().size() == node->get_output_shape(0).size();
|
|
||||||
});
|
|
||||||
|
|
||||||
pass_config->set_callback<ov::pass::ConvertBatchToSpace,
|
|
||||||
ov::pass::ConvertSpaceToBatch>(
|
|
||||||
[](const_node_ptr &node) -> bool {
|
|
||||||
const auto & rank = node->input(0).get_partial_shape().rank().get_length();
|
|
||||||
return rank == 4lu || rank == 5lu;
|
|
||||||
});
|
|
||||||
|
|
||||||
auto isCellPrimitiveSupported = [](const_node_ptr &node) -> bool {
|
|
||||||
if (const auto &rnn_cell = std::dynamic_pointer_cast<const ngraph::opset4::RNNCell>(node)) {
|
|
||||||
return rnn_cell->get_clip() == 0.0f;
|
|
||||||
} else if (const auto &gru_cell = std::dynamic_pointer_cast<const ngraph::opset4::GRUCell>(
|
|
||||||
node)) {
|
|
||||||
return gru_cell->get_clip() == 0.0f
|
|
||||||
&& gru_cell->get_activations() == std::vector<std::string>{"sigmoid", "tanh"};
|
|
||||||
} else if (const auto &augru_cell = std::dynamic_pointer_cast<const ov::op::internal::AUGRUCell>(
|
|
||||||
node)) {
|
|
||||||
return augru_cell->get_clip() == 0.0f
|
|
||||||
&& augru_cell->get_activations() == std::vector<std::string>{"sigmoid", "tanh"};
|
|
||||||
} else if (const auto &lstm_cell = std::dynamic_pointer_cast<const ngraph::opset4::LSTMCell>(
|
|
||||||
node)) {
|
|
||||||
return lstm_cell->get_clip() == 0.0f &&
|
|
||||||
lstm_cell->get_activations() == std::vector<std::string>{"sigmoid", "tanh", "tanh"};
|
|
||||||
} else if (const auto &lstm_cell_v1 = std::dynamic_pointer_cast<const ngraph::opset1::LSTMCell>(
|
|
||||||
node)) {
|
|
||||||
return lstm_cell_v1->get_clip() == 0.0f &&
|
|
||||||
lstm_cell_v1->get_activations() == std::vector<std::string>{"sigmoid", "tanh", "tanh"};
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Sequences supported by the plugin shouldn't be converted to TensorIterator.
|
|
||||||
// sequence_length input is not supported in all Sequences, so if is_seq_len_provided() == true, we
|
|
||||||
// should always convert to TensorIterator.
|
|
||||||
// RNN/GRU/LSTM Sequences are supported with clip == 0, and with default activations.
|
|
||||||
auto isSequencePrimitiveSupported = [](const_node_ptr &node) -> bool {
|
|
||||||
const auto& data = node->input(0);
|
|
||||||
const auto& data_pshape = data.get_partial_shape();
|
|
||||||
// WA: dynamic shapes make impossible to check seq_len due to shapeOf subgraphs
|
|
||||||
// but the sequence is still supported in CPU and doesn't need to be decomposed
|
|
||||||
if (data_pshape.is_dynamic())
|
|
||||||
return true;
|
|
||||||
if (data_pshape.rank().is_static() && data_pshape.rank().get_length() > 1 && !data_pshape[1].is_static())
|
|
||||||
return false;
|
|
||||||
auto max_seq_len = data.get_shape().at(1);
|
|
||||||
if (const auto &rnn_seq = std::dynamic_pointer_cast<const ngraph::opset6::RNNSequence>(node)) {
|
|
||||||
return rnn_seq->get_clip() == 0.0f &&
|
|
||||||
!ngraph::op::util::is_seq_len_provided(rnn_seq->get_input_node_shared_ptr(2),
|
|
||||||
max_seq_len);
|
|
||||||
} else if (const auto &gru_seq = std::dynamic_pointer_cast<const ngraph::opset6::GRUSequence>(
|
|
||||||
node)) {
|
|
||||||
return gru_seq->get_clip() == 0.0f &&
|
|
||||||
gru_seq->get_activations() == std::vector<std::string>{"sigmoid", "tanh"} &&
|
|
||||||
!ngraph::op::util::is_seq_len_provided(gru_seq->get_input_node_shared_ptr(2),
|
|
||||||
max_seq_len);
|
|
||||||
} else if (const auto &augru_seq = std::dynamic_pointer_cast<const ov::op::internal::AUGRUSequence>(
|
|
||||||
node)) {
|
|
||||||
return augru_seq->get_clip() == 0.0f &&
|
|
||||||
augru_seq->get_activations() == std::vector<std::string>{"sigmoid", "tanh"} &&
|
|
||||||
!ngraph::op::util::is_seq_len_provided(augru_seq->get_input_node_shared_ptr(2),
|
|
||||||
max_seq_len);
|
|
||||||
} else if (const auto &lstm_seq = std::dynamic_pointer_cast<const ngraph::opset6::LSTMSequence>(
|
|
||||||
node)) {
|
|
||||||
return lstm_seq->get_clip() == 0.0f &&
|
|
||||||
lstm_seq->get_activations() == std::vector<std::string>{"sigmoid", "tanh", "tanh"} &&
|
|
||||||
!ngraph::op::util::is_seq_len_provided(lstm_seq->get_input_node_shared_ptr(3),
|
|
||||||
max_seq_len);
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
};
|
|
||||||
|
|
||||||
pass_config->set_callback<ov::pass::ConvertRNNSequenceToTensorIterator,
|
|
||||||
ov::pass::ConvertGRUSequenceToTensorIterator,
|
|
||||||
ov::pass::ConvertLSTMSequenceToTensorIterator>(
|
|
||||||
[isSequencePrimitiveSupported](const_node_ptr &node) -> bool {
|
|
||||||
return isSequencePrimitiveSupported(node);
|
|
||||||
});
|
|
||||||
|
|
||||||
pass_config->set_callback<ov::pass::RNNCellDecomposition, ov::pass::GRUCellDecomposition,
|
|
||||||
ov::pass::LSTMCellDecomposition>(
|
|
||||||
[isCellPrimitiveSupported](const_node_ptr &node) -> bool {
|
|
||||||
return isCellPrimitiveSupported(node);
|
|
||||||
});
|
|
||||||
|
|
||||||
pass_config->set_callback<ov::pass::MVN6Decomposition>(
|
|
||||||
[](const_node_ptr &node) -> bool {
|
|
||||||
std::string errorMessage;
|
|
||||||
return node::MVN::isSupportedOperation(node, errorMessage);
|
|
||||||
});
|
|
||||||
|
|
||||||
pass_config->set_callback<ov::pass::NormalizeL2Decomposition>(
|
|
||||||
[](const_node_ptr &node) -> bool {
|
|
||||||
std::string errorMsg;
|
|
||||||
return node::NormalizeL2::isSupportedOperation(node, errorMsg);
|
|
||||||
});
|
|
||||||
|
|
||||||
pass_config->enable<ov::pass::SoftmaxDecomposition>();
|
|
||||||
pass_config->set_callback<ov::pass::SoftmaxDecomposition>(
|
|
||||||
[](const_node_ptr &node) -> bool {
|
|
||||||
return node->input_value(0).get_partial_shape().rank().get_length() <= 5;
|
|
||||||
});
|
|
||||||
|
|
||||||
if (!isLegacyApi) {
|
|
||||||
auto nmsCallback = [](const_node_ptr &node) -> bool {
|
|
||||||
for (size_t i = 0; i < node->get_output_size(); i++) {
|
|
||||||
const auto outputs = node->get_output_target_inputs(i);
|
|
||||||
for (const auto &out : outputs) {
|
|
||||||
if (!ngraph::op::is_output(out.get_node())) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
};
|
|
||||||
|
|
||||||
pass_config->set_callback<ov::pass::ConvertNMS9ToNMSIEInternal>(nmsCallback);
|
|
||||||
pass_config->set_callback<ov::pass::ConvertMulticlassNmsToMulticlassNmsIE>(nmsCallback);
|
|
||||||
pass_config->set_callback<ov::pass::ConvertMatrixNmsToMatrixNmsIE>(nmsCallback);
|
|
||||||
}
|
|
||||||
|
|
||||||
// List of enabled/disabled transformations
|
|
||||||
|
|
||||||
// Allow FP16 Converts to be folded and FP16 constants to be upgraded to FP32 data type
|
|
||||||
pass_config->disable<ov::pass::DisableDecompressionConvertConstantFolding>();
|
|
||||||
pass_config->disable<ov::pass::ConvertCompressedOnlyToLegacy>();
|
|
||||||
pass_config->disable<ov::pass::EyeDecomposition>();
|
|
||||||
|
|
||||||
pass_config->disable<ov::pass::ConvertGELU>();
|
|
||||||
pass_config->disable<ov::pass::ConvertShuffleChannels3>();
|
|
||||||
pass_config->disable<ov::pass::Gelu7Downgrade>();
|
|
||||||
pass_config->disable<ov::pass::HSwishDecomposition>();
|
|
||||||
pass_config->disable<ov::pass::ReduceL1Decomposition>();
|
|
||||||
pass_config->disable<ov::pass::ReduceL2Decomposition>();
|
|
||||||
pass_config->disable<ov::pass::SoftPlusDecomposition>();
|
|
||||||
pass_config->disable<ov::pass::HSigmoidDecomposition>();
|
|
||||||
pass_config->disable<ov::pass::ConvertMod>();
|
|
||||||
pass_config->disable<ov::pass::ConvertShuffleChannels3>();
|
|
||||||
pass_config->disable<ov::pass::WeightsDequantizeToFakeQuantize>();
|
|
||||||
pass_config->disable<ov::pass::SimplifyCTCGreedyDecoderSeqLen>();
|
|
||||||
pass_config->disable<ov::pass::ConvertGather7ToGather1>();
|
|
||||||
pass_config->disable<ov::pass::ConvertGather8ToGather7>();
|
|
||||||
pass_config->disable<ov::pass::ConvertMinimum>();
|
|
||||||
pass_config->disable<ov::pass::ConvertBroadcastToTiles>();
|
|
||||||
pass_config->disable<ov::pass::ConvertReduceMeanToPooling>();
|
|
||||||
pass_config->disable<ov::pass::ConvertReduceMaxToPooling>();
|
|
||||||
pass_config->disable<ov::pass::ConvertReduceSumToPooling>();
|
|
||||||
pass_config->disable<ov::pass::SliceToStridedSlice>();
|
|
||||||
pass_config->disable<ov::pass::ConvertDetectionOutput8ToDetectionOutput1>();
|
|
||||||
pass_config->disable<ov::pass::ConvertROIAlign9To3>();
|
|
||||||
pass_config->disable<ov::pass::SoftSignDecomposition>();
|
|
||||||
pass_config->disable<ov::pass::UniqueDecomposition>();
|
|
||||||
|
|
||||||
pass_config->enable<ov::pass::NormalizeL2Decomposition>();
|
|
||||||
pass_config->enable<ov::pass::ConvertInterpolate1ToInterpolate4>();
|
|
||||||
pass_config->enable<ov::pass::ConvertGather1ToGather7>();
|
|
||||||
pass_config->enable<ov::pass::ConvertDetectionOutput1ToDetectionOutput8>();
|
|
||||||
pass_config->enable<ov::pass::ConvertROIAlign3To9>();
|
|
||||||
|
|
||||||
if (useLpt) {
|
|
||||||
CPU_LPT_SCOPE(LowPrecisionTransformations_Part3);
|
|
||||||
pass_config->set_callback<ov::pass::AddFakeQuantizeFusion,
|
|
||||||
ov::pass::MulFakeQuantizeFusion,
|
|
||||||
ov::pass::FakeQuantizeMulFusion>([](const_node_ptr &node) -> bool {
|
|
||||||
std::string errMsg;
|
|
||||||
return !node::FakeQuantize::isSupportedOperation(node, errMsg);
|
|
||||||
});
|
|
||||||
|
|
||||||
pass_config->set_callback<ov::pass::ConvertQuantizeDequantize>([&defaultPrecisions](const_node_ptr &node) -> bool {
|
|
||||||
return ngraph::pass::low_precision::NetworkHelper::areQuantizeAndDequantizeSupportedForMultiply(node, defaultPrecisions);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
manager.run_passes(nGraphFunc);
|
|
||||||
|
|
||||||
using namespace ngraph::pass::low_precision;
|
|
||||||
if (useLpt) {
|
|
||||||
CPU_LPT_SCOPE(LowPrecisionTransformations_Part4);
|
|
||||||
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "LowPrecisionTransformations");
|
|
||||||
//Only enable conv/group conv signed input on AMX platform.
|
|
||||||
std::vector<ngraph::element::Type> input0LowPrecisionList;
|
|
||||||
if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_amx)) {
|
|
||||||
input0LowPrecisionList = {ngraph::element::u8, ngraph::element::i8};
|
|
||||||
} else {
|
|
||||||
input0LowPrecisionList = {ngraph::element::u8};
|
|
||||||
}
|
|
||||||
auto supportedPrecisions = std::vector<PrecisionsRestriction>({
|
|
||||||
PrecisionsRestriction::create<ngraph::opset1::Convolution>({
|
|
||||||
{{0}, input0LowPrecisionList},
|
|
||||||
{{1}, {ngraph::element::i8}},
|
|
||||||
}),
|
|
||||||
PrecisionsRestriction::create<ngraph::opset1::ConvolutionBackpropData>({
|
|
||||||
{{0}, {ngraph::element::u8, ngraph::element::i8}},
|
|
||||||
{{1}, {ngraph::element::i8}}
|
|
||||||
}),
|
|
||||||
PrecisionsRestriction::create<ngraph::opset1::GroupConvolution>({
|
|
||||||
{{0}, input0LowPrecisionList},
|
|
||||||
{{1}, {ngraph::element::i8}}
|
|
||||||
}),
|
|
||||||
PrecisionsRestriction::create<ngraph::opset1::Multiply>({
|
|
||||||
{{0}, {ngraph::element::u8}},
|
|
||||||
{{1}, {ngraph::element::i8}},
|
|
||||||
}),
|
|
||||||
PrecisionsRestriction::create<ngraph::opset1::MatMul>({
|
|
||||||
{{0}, {ngraph::element::u8, ngraph::element::i8}},
|
|
||||||
{{1}, {ngraph::element::i8}}
|
|
||||||
}),
|
|
||||||
PrecisionsRestriction::create<ngraph::opset5::LSTMSequence>({
|
|
||||||
{{0, 1}, {ngraph::element::u8, ngraph::element::i8}},
|
|
||||||
}),
|
|
||||||
PrecisionsRestriction::create<ngraph::opset6::GRUSequence>({
|
|
||||||
{{0, 1}, {ngraph::element::u8, ngraph::element::i8}},
|
|
||||||
}),
|
|
||||||
});
|
|
||||||
|
|
||||||
auto quantizationRestrictions = std::vector<QuantizationGranularityRestriction>({
|
|
||||||
QuantizationGranularityRestriction::create<ngraph::opset1::Convolution>({0}),
|
|
||||||
QuantizationGranularityRestriction::create<ngraph::opset1::ConvolutionBackpropData>({0})
|
|
||||||
});
|
|
||||||
|
|
||||||
// for GNA networks reference execution
|
|
||||||
bool updatePrecision = true;
|
|
||||||
if (hasINT16orINT32Levels) {
|
|
||||||
updatePrecision = false;
|
|
||||||
supportedPrecisions = std::vector<PrecisionsRestriction>({});
|
|
||||||
}
|
|
||||||
|
|
||||||
ov::pass::Manager lptManager;
|
|
||||||
lptManager.register_pass<ngraph::pass::low_precision::LowPrecision>(
|
|
||||||
supportedPrecisions,
|
|
||||||
quantizationRestrictions,
|
|
||||||
LayerTransformation::Params(updatePrecision, ngraph::element::f32, defaultPrecisions));
|
|
||||||
lptManager.get_pass_config()->set_callback<ngraph::pass::low_precision::MarkupPrecisions>([](const_node_ptr& node) -> bool {
|
|
||||||
if (const auto mulitply = std::dynamic_pointer_cast<const ngraph::opset1::Multiply>(node)) {
|
|
||||||
return !MultiplyToGroupConvolutionTransformation::canBeTransformedToGroupConvolution(mulitply);
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
});
|
|
||||||
lptManager.get_pass_config()->set_callback<ngraph::pass::low_precision::ConvolutionBackpropDataTransformation>(
|
|
||||||
[&defaultPrecisions](const_node_ptr& node) -> bool {
|
|
||||||
return LayerTransformation::isAsymmetricQuantization(node, defaultPrecisions) ||
|
|
||||||
WeightableLayerTransformation::isAsymmetricOnWeights(node, defaultPrecisions);
|
|
||||||
});
|
|
||||||
lptManager.get_pass_config()->set_callback<ngraph::pass::low_precision::MultiplyToGroupConvolutionTransformation>([](const_node_ptr& node) -> bool {
|
|
||||||
return true;//MultiplyToGroupConvolutionTransformation::isDynamicOrScalar(node);
|
|
||||||
});
|
|
||||||
lptManager.run_passes(nGraphFunc);
|
|
||||||
}
|
|
||||||
|
|
||||||
ov::pass::Manager postLPTPassManager;
|
|
||||||
postLPTPassManager.register_pass<ov::pass::UnrollTensorIterator>();
|
|
||||||
postLPTPassManager.register_pass<ov::pass::ReshapePRelu>();
|
|
||||||
postLPTPassManager.get_pass_config()->set_callback<ov::pass::UnrollTensorIterator>([](const_node_ptr &node) -> bool {
|
|
||||||
// UnrollTI transformation is disabled by default, is turned on by LowLatency transformation
|
|
||||||
return node->get_rt_info().count("UNROLL_TI") == 0;
|
|
||||||
});
|
|
||||||
postLPTPassManager.register_pass<MoveEltwiseUpThroughDataMov>();
|
|
||||||
postLPTPassManager.get_pass_config()->set_callback<MoveEltwiseUpThroughDataMov>([](const std::shared_ptr<const ngraph::Node>& node) -> bool {
|
|
||||||
if (node->get_input_size() >= 2) {
|
|
||||||
return node->get_input_element_type(1) == ngraph::element::i8 || node->get_input_element_type(1) == ngraph::element::u8;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
});
|
|
||||||
|
|
||||||
postLPTPassManager.register_pass<ov::pass::ConstantFolding>();
|
|
||||||
|
|
||||||
// Snippets may brake MHA patterns so the fusion has to performed before
|
|
||||||
postLPTPassManager.register_pass<MHAFusion>();
|
|
||||||
postLPTPassManager.register_pass<FuseFQtoInteraction>();
|
|
||||||
postLPTPassManager.get_pass_config()->set_callback<MHAFloatFusion, MHAFloatFusion2,
|
|
||||||
MHAQuantFusion, MHAQuantFusion2>([_enableBF16](const std::shared_ptr<const ov::Node>& n) -> bool {
|
|
||||||
std::string errorMessage;
|
|
||||||
|
|
||||||
if (!node::MHA::isSupportedOperation(n, errorMessage))
|
|
||||||
return true;
|
|
||||||
|
|
||||||
// Implementation calls AMX BF16 brgemm only for tensors with K and N aligned on 2, otherwise fallbacks on vector impl
|
|
||||||
// Vector madd BF16 instruction on SPR has reduced performance on HW level, which results in overall perf degradation
|
|
||||||
size_t bf16Factor = 2;
|
|
||||||
if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_bf16_amx_bf16) &&
|
|
||||||
(n->get_input_element_type(0) == element::bf16 || (n->get_input_element_type(0) == element::f32 && _enableBF16)) &&
|
|
||||||
(n->get_input_shape(0)[3] % bf16Factor != 0 || n->get_input_shape(1)[1] % bf16Factor != 0 || n->get_input_shape(3)[3] % bf16Factor != 0)) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
});
|
|
||||||
|
|
||||||
// Execute before snippets. Otherwise FQ will be converted to Subgraph
|
|
||||||
postLPTPassManager.register_pass<ConvertFqRnnToQuantizedRnn>();
|
|
||||||
postLPTPassManager.run_passes(nGraphFunc);
|
|
||||||
|
|
||||||
if (_enableSnippets && dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2)) {
|
|
||||||
ov::pass::Manager snippetsManager;
|
|
||||||
snippetsManager.register_pass<SnippetsMarkSkipped>();
|
|
||||||
snippetsManager.register_pass<ngraph::snippets::pass::EnumerateNodes>();
|
|
||||||
snippetsManager.register_pass<ngraph::snippets::pass::TokenizeSnippets>();
|
|
||||||
snippetsManager.get_pass_config()->set_callback<ngraph::snippets::pass::TokenizeSnippets>(
|
|
||||||
[](const std::shared_ptr<const ov::Node>& n) -> bool {
|
|
||||||
// CPU Plugin support Swish in Subgraph via conversion to SwichCPU which assumes second input to be constant
|
|
||||||
if (ov::is_type<const ov::op::v4::Swish>(n)) {
|
|
||||||
if (n->inputs().size() > 1 && !ov::is_type<const ov::op::v0::Constant>(n->get_input_node_shared_ptr(1)))
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
const auto& inputs = n->inputs();
|
|
||||||
// todo: clarify whether we can evaluate snippets on const paths
|
|
||||||
const bool has_only_const_inputs = std::all_of(inputs.begin(), inputs.end(),
|
|
||||||
[](const ov::Input<const ov::Node> &in) {
|
|
||||||
return ov::is_type<ov::op::v0::Constant>(in.get_source_output().get_node_shared_ptr());
|
|
||||||
});
|
|
||||||
// todo: clarify whether we can evaluate snippets on inputs with larger ranks
|
|
||||||
auto rank_is_too_large = [](const ov::descriptor::Tensor& t ) {
|
|
||||||
// callback is called has_supported_in_out(), so it's safe to assume that the shapes are static
|
|
||||||
return t.get_partial_shape().rank().get_length() > 6;
|
|
||||||
};
|
|
||||||
const bool bad_input_rank = std::any_of(inputs.begin(), inputs.end(),
|
|
||||||
[&](const ov::Input<const ov::Node>& in) {return rank_is_too_large(in.get_tensor());});
|
|
||||||
const auto& outputs = n->outputs();
|
|
||||||
const bool bad_output_rank = std::any_of(outputs.begin(), outputs.end(),
|
|
||||||
[&](const ov::Output<const ov::Node>& out) {return rank_is_too_large(out.get_tensor());});
|
|
||||||
return has_only_const_inputs || bad_input_rank || bad_output_rank;
|
|
||||||
});
|
|
||||||
snippetsManager.register_pass<ngraph::snippets::pass::CommonOptimizations>();
|
|
||||||
snippetsManager.run_passes(nGraphFunc);
|
|
||||||
}
|
|
||||||
|
|
||||||
ov::pass::Manager postSnippetsManager;
|
|
||||||
postSnippetsManager.register_pass<ov::pass::FakeQuantizeDecomposition>();
|
|
||||||
postSnippetsManager.get_pass_config()->set_callback<ov::pass::FakeQuantizeDecomposition>([](const_node_ptr& node) -> bool {
|
|
||||||
std::string errMsg;
|
|
||||||
return node::FakeQuantize::isSupportedOperation(node, errMsg);
|
|
||||||
});
|
|
||||||
postSnippetsManager.register_pass<ov::pass::ConstantFolding>();
|
|
||||||
postSnippetsManager.run_passes(nGraphFunc);
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool streamsSet(const std::map<std::string, std::string>& config) {
|
static bool streamsSet(const std::map<std::string, std::string>& config) {
|
||||||
return config.count(PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS) ||
|
return config.count(PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS) ||
|
||||||
config.count(ov::num_streams.name());
|
config.count(ov::num_streams.name());
|
||||||
@ -883,7 +325,7 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std
|
|||||||
const bool enableLPT = (lptProp != config.end() && lptProp->second == PluginConfigParams::YES) /* enabled in the orig_config*/
|
const bool enableLPT = (lptProp != config.end() && lptProp->second == PluginConfigParams::YES) /* enabled in the orig_config*/
|
||||||
|| Config::LPTransformsMode::On == engConfig.lpTransformsMode /* or already enabled for the plugin */;
|
|| Config::LPTransformsMode::On == engConfig.lpTransformsMode /* or already enabled for the plugin */;
|
||||||
const auto& BF16Prop = config.find(InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16);
|
const auto& BF16Prop = config.find(InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16);
|
||||||
bool enableBF16;
|
bool enableBF16 = false;
|
||||||
if (BF16Prop != config.end()) {
|
if (BF16Prop != config.end()) {
|
||||||
if (BF16Prop->second == PluginConfigParams::YES) {
|
if (BF16Prop->second == PluginConfigParams::YES) {
|
||||||
enableBF16 = dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core);
|
enableBF16 = dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core);
|
||||||
@ -901,7 +343,8 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std
|
|||||||
|
|
||||||
DEBUG_LOG(PrintableModel(*nGraphFunc, "org_"));
|
DEBUG_LOG(PrintableModel(*nGraphFunc, "org_"));
|
||||||
|
|
||||||
TransformationUpToCPUSpecificOpSet(nGraphFunc, enableLPT, enableBF16, enableSnippets, isLegacyAPI());
|
Transformations transformations(nGraphFunc, enableLPT, enableSnippets, enableBF16, isLegacyAPI(), engConfig);
|
||||||
|
transformations.UpToCpuSpecificOpSet();
|
||||||
|
|
||||||
// need to check that all outputs have static shapes
|
// need to check that all outputs have static shapes
|
||||||
// checking that all inputs have static shapes is performed in the common part
|
// checking that all inputs have static shapes is performed in the common part
|
||||||
@ -914,8 +357,7 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std
|
|||||||
}
|
}
|
||||||
|
|
||||||
ApplyPerformanceHints(config, nGraphFunc);
|
ApplyPerformanceHints(config, nGraphFunc);
|
||||||
|
transformations.CpuSpecificOpSet();
|
||||||
ConvertToCPUSpecificOpset(nGraphFunc);
|
|
||||||
|
|
||||||
DEBUG_LOG(PrintableModel(*nGraphFunc, "cpu_"));
|
DEBUG_LOG(PrintableModel(*nGraphFunc, "cpu_"));
|
||||||
|
|
||||||
@ -1153,19 +595,20 @@ QueryNetworkResult Engine::QueryNetwork(const CNNNetwork& network, const std::ma
|
|||||||
}
|
}
|
||||||
|
|
||||||
auto supported = GetSupportedNodes(model,
|
auto supported = GetSupportedNodes(model,
|
||||||
[&](std::shared_ptr<ov::Model>& model) {
|
[&](std::shared_ptr<ov::Model>& model) {
|
||||||
TransformationUpToCPUSpecificOpSet(model, enableLPT, conf.enforceBF16, enableSnippets, isLegacyAPI());
|
Transformations transformation(model, enableLPT, enableSnippets, conf.enforceBF16, isLegacyAPI(), engConfig);
|
||||||
ConvertToCPUSpecificOpset(model);
|
transformation.UpToCpuSpecificOpSet();
|
||||||
},
|
transformation.CpuSpecificOpSet();
|
||||||
[&](const std::shared_ptr<ngraph::Node>& op) {
|
},
|
||||||
std::unique_ptr<Node> ptr;
|
[&](const std::shared_ptr<ngraph::Node>& op) {
|
||||||
try {
|
std::unique_ptr<Node> ptr;
|
||||||
ptr.reset(Node::factory().create(op, {dnnl::engine::kind::cpu, 0}, extensionManager, fake_w_cache));
|
try {
|
||||||
} catch (const InferenceEngine::Exception&) {
|
ptr.reset(Node::factory().create(op, {dnnl::engine::kind::cpu, 0}, extensionManager, fake_w_cache));
|
||||||
return false;
|
} catch (const InferenceEngine::Exception&) {
|
||||||
}
|
return false;
|
||||||
return true;
|
}
|
||||||
});
|
return true;
|
||||||
|
});
|
||||||
|
|
||||||
for (auto&& layerName : supported) {
|
for (auto&& layerName : supported) {
|
||||||
res.supportedLayersMap.emplace(layerName, GetName());
|
res.supportedLayersMap.emplace(layerName, GetName());
|
||||||
|
@ -4,16 +4,12 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <cpp_interfaces/interface/ie_iplugin_internal.hpp>
|
|
||||||
#include "exec_network.h"
|
#include "exec_network.h"
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <unordered_map>
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <vector>
|
|
||||||
#include <cfloat>
|
|
||||||
|
|
||||||
namespace ov {
|
namespace ov {
|
||||||
namespace intel_cpu {
|
namespace intel_cpu {
|
||||||
|
609
src/plugins/intel_cpu/src/transformation_pipeline.cpp
Normal file
609
src/plugins/intel_cpu/src/transformation_pipeline.cpp
Normal file
@ -0,0 +1,609 @@
|
|||||||
|
// Copyright (C) 2022 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "transformation_pipeline.h"
|
||||||
|
|
||||||
|
// Operations
|
||||||
|
#include "openvino/opsets/opset1.hpp"
|
||||||
|
#include "openvino/opsets/opset2.hpp"
|
||||||
|
#include "openvino/opsets/opset3.hpp"
|
||||||
|
#include "openvino/opsets/opset4.hpp"
|
||||||
|
#include "openvino/opsets/opset5.hpp"
|
||||||
|
#include "openvino/opsets/opset6.hpp"
|
||||||
|
#include "openvino/opsets/opset10.hpp"
|
||||||
|
#include <ov_ops/augru_cell.hpp>
|
||||||
|
#include <ov_ops/augru_sequence.hpp>
|
||||||
|
|
||||||
|
// Common transformations
|
||||||
|
#include "transformations/common_optimizations/add_fake_quantize_fusion.hpp"
|
||||||
|
#include "transformations/common_optimizations/convert_compression_only_to_legacy.hpp"
|
||||||
|
#include "transformations/common_optimizations/convert_quantize_dequantize.hpp"
|
||||||
|
#include "transformations/common_optimizations/fq_mul_fusion.hpp"
|
||||||
|
#include "transformations/common_optimizations/mul_fake_quantize_fusion.hpp"
|
||||||
|
#include "transformations/common_optimizations/nop_elimination.hpp"
|
||||||
|
#include "transformations/common_optimizations/transpose_sinking.hpp"
|
||||||
|
#include "transformations/common_optimizations/weights_dequantize_to_fake_quantize.hpp"
|
||||||
|
#include "transformations/common_optimizations/augru_cell_fusion.hpp"
|
||||||
|
#include "transformations/common_optimizations/common_optimizations.hpp"
|
||||||
|
#include "transformations/common_optimizations/wrap_interpolate_into_transposes.hpp"
|
||||||
|
#include "transformations/control_flow/unroll_tensor_iterator.hpp"
|
||||||
|
#include "transformations/disable_decompression_convert_constant_folding.hpp"
|
||||||
|
#include "transformations/op_conversions/convert_batch_to_space.hpp"
|
||||||
|
#include "transformations/op_conversions/convert_broadcast_to_tiles.hpp"
|
||||||
|
#include "transformations/op_conversions/convert_depth_to_space.hpp"
|
||||||
|
#include "transformations/op_conversions/convert_gather_downgrade.hpp"
|
||||||
|
#include "transformations/op_conversions/convert_gather_upgrade.hpp"
|
||||||
|
#include "transformations/op_conversions/convert_gelu.hpp"
|
||||||
|
#include "transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp"
|
||||||
|
#include "transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.hpp"
|
||||||
|
#include "transformations/op_conversions/convert_minimum_to_power_and_max.hpp"
|
||||||
|
#include "transformations/op_conversions/convert_mod.hpp"
|
||||||
|
#include "transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.hpp"
|
||||||
|
#include "transformations/op_conversions/convert_nms9_to_nms_ie_internal.hpp"
|
||||||
|
#include "transformations/op_conversions/convert_previous_nms_to_nms_9.hpp"
|
||||||
|
#include "transformations/op_conversions/convert_reduce_to_pooling.hpp"
|
||||||
|
#include "transformations/op_conversions/convert_roi_align_v3_to_v9.hpp"
|
||||||
|
#include "transformations/op_conversions/convert_roi_align_v9_to_v3.hpp"
|
||||||
|
#include "transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp"
|
||||||
|
#include "transformations/op_conversions/convert_shuffle_channels3.hpp"
|
||||||
|
#include "transformations/op_conversions/convert_slice_to_strided_slice.hpp"
|
||||||
|
#include "transformations/op_conversions/convert_space_to_batch.hpp"
|
||||||
|
#include "transformations/op_conversions/convert_space_to_depth.hpp"
|
||||||
|
#include "transformations/op_conversions/convert_subtract.hpp"
|
||||||
|
#include "transformations/op_conversions/convert_ti_to_sequences.hpp"
|
||||||
|
#include "transformations/op_conversions/detection_output_downgrade.hpp"
|
||||||
|
#include "transformations/op_conversions/detection_output_upgrade.hpp"
|
||||||
|
#include "transformations/op_conversions/eye_decomposition.hpp"
|
||||||
|
#include "transformations/op_conversions/fq_decomposition.hpp"
|
||||||
|
#include "transformations/op_conversions/gelu7_downgrade.hpp"
|
||||||
|
#include "transformations/op_conversions/hsigmoid_decomposition.hpp"
|
||||||
|
#include "transformations/op_conversions/hswish_decomposition.hpp"
|
||||||
|
#include "transformations/op_conversions/gru_cell_decomposition.hpp"
|
||||||
|
#include "transformations/op_conversions/lstm_cell_decomposition.hpp"
|
||||||
|
#include "transformations/op_conversions/mvn6_decomposition.hpp"
|
||||||
|
#include "transformations/op_conversions/normalize_l2_decomposition.hpp"
|
||||||
|
#include "transformations/op_conversions/reduce_l1_decomposition.hpp"
|
||||||
|
#include "transformations/op_conversions/reduce_l2_decomposition.hpp"
|
||||||
|
#include "transformations/op_conversions/rnn_cell_decomposition.hpp"
|
||||||
|
#include "transformations/op_conversions/simplify_ctc_greedy_decoder_seq_len.hpp"
|
||||||
|
#include "transformations/op_conversions/softplus_decomposition.hpp"
|
||||||
|
#include "transformations/op_conversions/softsign_decomposition.hpp"
|
||||||
|
#include "transformations/op_conversions/softmax_decomposition.hpp"
|
||||||
|
#include "transformations/op_conversions/unique_decomposition.hpp"
|
||||||
|
#include "transformations/opset_conversions/convert_opset2_to_opset1.hpp"
|
||||||
|
#include "transformations/opset_conversions/convert_opset3_to_opset2.hpp"
|
||||||
|
#include "transformations/smart_reshape/matmul_sr.hpp"
|
||||||
|
#include "transformations/init_node_info.hpp"
|
||||||
|
#include "utils/ngraph_transformation.hpp"
|
||||||
|
|
||||||
|
// LPT transformations
|
||||||
|
#include "transformations/low_precision/mark_dequantization_subgraph.hpp"
|
||||||
|
#include "low_precision/convolution_backprop_data.hpp"
|
||||||
|
#include "low_precision/convert_subtract_constant.hpp"
|
||||||
|
#include "low_precision/network_helper.hpp"
|
||||||
|
#include "low_precision/multiply_to_group_convolution.hpp"
|
||||||
|
#include "low_precision/group_convolution.hpp"
|
||||||
|
|
||||||
|
// CPU specific transformations
|
||||||
|
#include "ngraph_transformations/convert_to_cpu_specific_opset.hpp"
|
||||||
|
#include "ngraph_transformations/snippets_mark_skipped.hpp"
|
||||||
|
#include "ngraph_transformations/mha_fusion.hpp"
|
||||||
|
#include "ngraph_transformations/convert_to_interaction.hpp"
|
||||||
|
#include "ngraph_transformations/convert_fq_rnn_to_quantized_rnn.hpp"
|
||||||
|
#include "ngraph_transformations/move_eltwise_up_data_movement.hpp"
|
||||||
|
#include "ngraph_transformations/swap_convert_transpose.hpp"
|
||||||
|
|
||||||
|
// Snippets
|
||||||
|
#include "snippets/pass/collapse_subgraph.hpp"
|
||||||
|
#include "snippets/pass/common_optimizations.hpp"
|
||||||
|
|
||||||
|
// Misc
|
||||||
|
#include "nodes/mvn.h"
|
||||||
|
#include "nodes/normalize.h"
|
||||||
|
#include "nodes/fake_quantize.h"
|
||||||
|
#include "nodes/mha.h"
|
||||||
|
|
||||||
|
#include "dnnl.hpp"
|
||||||
|
#include <cpu/x64/cpu_isa_traits.hpp>
|
||||||
|
|
||||||
|
namespace ov {
|
||||||
|
namespace intel_cpu {
|
||||||
|
|
||||||
|
using const_node_ptr = const std::shared_ptr<const ov::Node>;
|
||||||
|
|
||||||
|
bool Transformations::fuse_type_to_convert(const std::shared_ptr<ngraph::Node>& node, ov::element::Type to, size_t idx) {
|
||||||
|
if (auto convert = ov::as_type_ptr<ov::opset10::Convert>(node)) {
|
||||||
|
// For Convert node, converting precision from floating point to boolean will lead to mathematical
|
||||||
|
// error, because here the output precision boolean is replaced by u8. E.g. floating point value 0.01
|
||||||
|
// is converted to be 1 for boolean, but 0 for u8. Thus an Abs and Ceil node should be added before the
|
||||||
|
// Convert node for this scenario.
|
||||||
|
if (convert->input(0).get_element_type().is_real() &&
|
||||||
|
convert->get_convert_element_type() == ngraph::element::boolean && to.is_integral_number()) {
|
||||||
|
auto abs = std::make_shared<ov::opset10::Abs>(convert->input_value(0).get_node_shared_ptr());
|
||||||
|
auto ceil = std::make_shared<ov::opset10::Ceiling>(abs);
|
||||||
|
auto new_convert = std::make_shared<ov::opset10::Convert>(ceil, to);
|
||||||
|
new_convert->set_friendly_name(convert->get_friendly_name());
|
||||||
|
ov::copy_runtime_info(convert, {abs, ceil, new_convert});
|
||||||
|
ov::replace_node(convert, new_convert);
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
convert->set_convert_element_type(to);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Transformations::UpToCpuSpecificOpSet() {
|
||||||
|
const bool useLpt = enableLpt &&
|
||||||
|
ngraph::pass::low_precision::LowPrecision::isFunctionQuantized(model) &&
|
||||||
|
CPU_DEBUG_CAP_IS_TRANSFORMATION_ENABLED(config.debugCaps, Lpt);
|
||||||
|
|
||||||
|
const bool useSnippets = enableSnippets &&
|
||||||
|
CPU_DEBUG_CAP_IS_TRANSFORMATION_ENABLED(config.debugCaps, Snippets);
|
||||||
|
|
||||||
|
auto defaultPrecisions = useLpt ? ngraph::pass::low_precision::precision_set::int8_support : std::vector<ov::element::Type>{};
|
||||||
|
bool hasINT16orINT32Levels = false;
|
||||||
|
|
||||||
|
if (useLpt) {
|
||||||
|
CPU_LPT_SCOPE(LowPrecisionTransformations_Part1);
|
||||||
|
hasINT16orINT32Levels = ngraph::pass::low_precision::LowPrecision::isFQLevelsPresent(
|
||||||
|
model,
|
||||||
|
{ngraph::pass::low_precision::levels::int16, ngraph::pass::low_precision::levels::int16_narrow_range,
|
||||||
|
ngraph::pass::low_precision::levels::int32, ngraph::pass::low_precision::levels::int32_narrow_range});
|
||||||
|
if (hasINT16orINT32Levels) {
|
||||||
|
defaultPrecisions = ngraph::pass::low_precision::precision_set::int8_int16_int32_support;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PreLpt(defaultPrecisions, isLegacyApi);
|
||||||
|
|
||||||
|
if (useLpt)
|
||||||
|
Lpt(hasINT16orINT32Levels, defaultPrecisions);
|
||||||
|
|
||||||
|
PostLpt();
|
||||||
|
|
||||||
|
if (useSnippets)
|
||||||
|
Snippets();
|
||||||
|
}
|
||||||
|
|
||||||
|
void Transformations::CpuSpecificOpSet(void) {
|
||||||
|
CPU_DEBUG_CAP_TRANSFORMATION_SCOPE(this, Specific);
|
||||||
|
|
||||||
|
ConvertToCPUSpecificOpset(model);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecisions, const bool isLegacyApi) {
|
||||||
|
CPU_DEBUG_CAP_TRANSFORMATION_SCOPE(this, PreLpt);
|
||||||
|
|
||||||
|
ov::pass::Manager manager;
|
||||||
|
manager.set_per_pass_validation(false);
|
||||||
|
manager.register_pass<ov::pass::InitNodeInfo>();
|
||||||
|
|
||||||
|
const bool useLpt = !defaultPrecisions.empty();
|
||||||
|
if (useLpt) {
|
||||||
|
manager.register_pass<ov::pass::MarkDequantizationSubgraph>(defaultPrecisions);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto get_convert_precisions = []() {
|
||||||
|
precisions_array array = {
|
||||||
|
{ov::element::i64, ov::element::i32},
|
||||||
|
{ov::element::u64, ov::element::i32},
|
||||||
|
{ov::element::i16, ov::element::i32},
|
||||||
|
{ov::element::u16, ov::element::i32},
|
||||||
|
{ov::element::u32, ov::element::i32},
|
||||||
|
{ov::element::f64, ov::element::f32},
|
||||||
|
{ov::element::f16, ov::element::f32},
|
||||||
|
{ov::element::boolean, ov::element::u8},
|
||||||
|
{ov::element::i4, ov::element::i8},
|
||||||
|
{ov::element::u4, ov::element::u8}
|
||||||
|
};
|
||||||
|
|
||||||
|
if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core))
|
||||||
|
array.push_back({ov::element::bf16, ov::element::f32});
|
||||||
|
|
||||||
|
return array;
|
||||||
|
};
|
||||||
|
static const auto precisions = get_convert_precisions();
|
||||||
|
type_to_fuse_map type_to_fuse = {{ov::opset10::Convert::get_type_info_static(), fuse_type_to_convert}};
|
||||||
|
|
||||||
|
manager.register_pass<ov::pass::AUGRUCellFusion>();
|
||||||
|
manager.register_pass<ov::pass::CommonOptimizations>();
|
||||||
|
manager.register_pass<ov::pass::WrapInterpolateIntoTransposes>();
|
||||||
|
manager.register_pass<ov::pass::TransposeSinking>();
|
||||||
|
manager.register_pass<ov::pass::ConvertSequenceToTensorIterator>();
|
||||||
|
manager.register_pass<ov::pass::ConvertOpSet3ToOpSet2>();
|
||||||
|
manager.register_pass<ov::pass::ConvertOpSet2ToOpSet1>();
|
||||||
|
manager.register_pass<ov::pass::LSTMCellDecomposition>();
|
||||||
|
manager.register_pass<ov::pass::GRUCellDecomposition>();
|
||||||
|
manager.register_pass<ov::pass::RNNCellDecomposition>();
|
||||||
|
manager.register_pass<ov::pass::ConvertNMS1ToNMS9>();
|
||||||
|
manager.register_pass<ov::pass::ConvertNMS3ToNMS9>();
|
||||||
|
manager.register_pass<ov::pass::ConvertNMS4ToNMS9>();
|
||||||
|
manager.register_pass<ov::pass::ConvertNMS5ToNMS9>();
|
||||||
|
manager.register_pass<ov::pass::ConvertNMS9ToNMSIEInternal>();
|
||||||
|
manager.register_pass<ov::pass::ConvertMulticlassNmsToMulticlassNmsIE>();
|
||||||
|
manager.register_pass<ov::pass::ConvertMatrixNmsToMatrixNmsIE>();
|
||||||
|
manager.register_pass<ov::pass::TransposeMatMul>();
|
||||||
|
manager.register_pass<ov::pass::ConstantFolding>();
|
||||||
|
|
||||||
|
if (useLpt) {
|
||||||
|
CPU_LPT_SCOPE(LowPrecisionTransformations_Part2);
|
||||||
|
manager.register_pass<ngraph::pass::low_precision::ConvertSubtractConstant>(defaultPrecisions);
|
||||||
|
}
|
||||||
|
manager.register_pass<ov::pass::Validate>();
|
||||||
|
manager.register_pass<ov::pass::ConvertPrecision>(precisions, type_to_fuse);
|
||||||
|
manager.register_pass<ov::pass::EliminateConvert>();
|
||||||
|
manager.register_pass<SwapConvertTranspose>();
|
||||||
|
manager.register_pass<ConvertToInteraction>();
|
||||||
|
manager.register_pass<ConvertInteractionInt8>();
|
||||||
|
|
||||||
|
auto pass_config = manager.get_pass_config();
|
||||||
|
|
||||||
|
// SpaceToDepth/ DepthToSpace node implementation supports only equal input/output tensors with rank <= 5
|
||||||
|
pass_config->set_callback<ov::pass::ConvertSpaceToDepth,
|
||||||
|
ov::pass::ConvertDepthToSpace>(
|
||||||
|
[](const_node_ptr &node) -> bool {
|
||||||
|
return node->input_value(0).get_shape().size() <= 5lu &&
|
||||||
|
node->input_value(0).get_shape().size() == node->get_output_shape(0).size();
|
||||||
|
});
|
||||||
|
|
||||||
|
pass_config->set_callback<ov::pass::ConvertBatchToSpace,
|
||||||
|
ov::pass::ConvertSpaceToBatch>(
|
||||||
|
[](const_node_ptr &node) -> bool {
|
||||||
|
const auto & rank = node->input(0).get_partial_shape().rank().get_length();
|
||||||
|
return rank == 4lu || rank == 5lu;
|
||||||
|
});
|
||||||
|
|
||||||
|
auto isCellPrimitiveSupported = [](const_node_ptr &node) -> bool {
|
||||||
|
if (const auto &rnn_cell = std::dynamic_pointer_cast<const ov::opset4::RNNCell>(node)) {
|
||||||
|
return rnn_cell->get_clip() == 0.0f;
|
||||||
|
} else if (const auto &gru_cell = std::dynamic_pointer_cast<const ov::opset4::GRUCell>(
|
||||||
|
node)) {
|
||||||
|
return gru_cell->get_clip() == 0.0f
|
||||||
|
&& gru_cell->get_activations() == std::vector<std::string>{"sigmoid", "tanh"};
|
||||||
|
} else if (const auto &augru_cell = std::dynamic_pointer_cast<const ov::op::internal::AUGRUCell>(
|
||||||
|
node)) {
|
||||||
|
return augru_cell->get_clip() == 0.0f
|
||||||
|
&& augru_cell->get_activations() == std::vector<std::string>{"sigmoid", "tanh"};
|
||||||
|
} else if (const auto &lstm_cell = std::dynamic_pointer_cast<const ov::opset4::LSTMCell>(
|
||||||
|
node)) {
|
||||||
|
return lstm_cell->get_clip() == 0.0f &&
|
||||||
|
lstm_cell->get_activations() == std::vector<std::string>{"sigmoid", "tanh", "tanh"};
|
||||||
|
} else if (const auto &lstm_cell_v1 = std::dynamic_pointer_cast<const ov::opset1::LSTMCell>(
|
||||||
|
node)) {
|
||||||
|
return lstm_cell_v1->get_clip() == 0.0f &&
|
||||||
|
lstm_cell_v1->get_activations() == std::vector<std::string>{"sigmoid", "tanh", "tanh"};
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Sequences supported by the plugin shouldn't be converted to TensorIterator.
|
||||||
|
// sequence_length input is not supported in all Sequences, so if is_seq_len_provided() == true, we
|
||||||
|
// should always convert to TensorIterator.
|
||||||
|
// RNN/GRU/LSTM Sequences are supported with clip == 0, and with default activations.
|
||||||
|
auto isSequencePrimitiveSupported = [](const_node_ptr &node) -> bool {
|
||||||
|
const auto& data = node->input(0);
|
||||||
|
const auto& data_pshape = data.get_partial_shape();
|
||||||
|
// WA: dynamic shapes make impossible to check seq_len due to shapeOf subgraphs
|
||||||
|
// but the sequence is still supported in CPU and doesn't need to be decomposed
|
||||||
|
if (data_pshape.is_dynamic())
|
||||||
|
return true;
|
||||||
|
if (data_pshape.rank().is_static() && data_pshape.rank().get_length() > 1 && !data_pshape[1].is_static())
|
||||||
|
return false;
|
||||||
|
auto max_seq_len = data.get_shape().at(1);
|
||||||
|
if (const auto &rnn_seq = std::dynamic_pointer_cast<const ov::opset6::RNNSequence>(node)) {
|
||||||
|
return rnn_seq->get_clip() == 0.0f &&
|
||||||
|
!ov::op::util::is_seq_len_provided(rnn_seq->get_input_node_shared_ptr(2),
|
||||||
|
max_seq_len);
|
||||||
|
} else if (const auto &gru_seq = std::dynamic_pointer_cast<const ov::opset6::GRUSequence>(
|
||||||
|
node)) {
|
||||||
|
return gru_seq->get_clip() == 0.0f &&
|
||||||
|
gru_seq->get_activations() == std::vector<std::string>{"sigmoid", "tanh"} &&
|
||||||
|
!ov::op::util::is_seq_len_provided(gru_seq->get_input_node_shared_ptr(2),
|
||||||
|
max_seq_len);
|
||||||
|
} else if (const auto &augru_seq = std::dynamic_pointer_cast<const ov::op::internal::AUGRUSequence>(
|
||||||
|
node)) {
|
||||||
|
return augru_seq->get_clip() == 0.0f &&
|
||||||
|
augru_seq->get_activations() == std::vector<std::string>{"sigmoid", "tanh"} &&
|
||||||
|
!ov::op::util::is_seq_len_provided(augru_seq->get_input_node_shared_ptr(2),
|
||||||
|
max_seq_len);
|
||||||
|
} else if (const auto &lstm_seq = std::dynamic_pointer_cast<const ov::opset6::LSTMSequence>(
|
||||||
|
node)) {
|
||||||
|
return lstm_seq->get_clip() == 0.0f &&
|
||||||
|
lstm_seq->get_activations() == std::vector<std::string>{"sigmoid", "tanh", "tanh"} &&
|
||||||
|
!ov::op::util::is_seq_len_provided(lstm_seq->get_input_node_shared_ptr(3),
|
||||||
|
max_seq_len);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
|
||||||
|
pass_config->set_callback<ov::pass::ConvertRNNSequenceToTensorIterator,
|
||||||
|
ov::pass::ConvertGRUSequenceToTensorIterator,
|
||||||
|
ov::pass::ConvertLSTMSequenceToTensorIterator>(
|
||||||
|
[isSequencePrimitiveSupported](const_node_ptr &node) -> bool {
|
||||||
|
return isSequencePrimitiveSupported(node);
|
||||||
|
});
|
||||||
|
|
||||||
|
pass_config->set_callback<ov::pass::RNNCellDecomposition, ov::pass::GRUCellDecomposition,
|
||||||
|
ov::pass::LSTMCellDecomposition>(
|
||||||
|
[isCellPrimitiveSupported](const_node_ptr &node) -> bool {
|
||||||
|
return isCellPrimitiveSupported(node);
|
||||||
|
});
|
||||||
|
|
||||||
|
pass_config->set_callback<ov::pass::MVN6Decomposition>(
|
||||||
|
[](const_node_ptr &node) -> bool {
|
||||||
|
std::string errorMessage;
|
||||||
|
return node::MVN::isSupportedOperation(node, errorMessage);
|
||||||
|
});
|
||||||
|
|
||||||
|
pass_config->set_callback<ov::pass::NormalizeL2Decomposition>(
|
||||||
|
[](const_node_ptr &node) -> bool {
|
||||||
|
std::string errorMsg;
|
||||||
|
return node::NormalizeL2::isSupportedOperation(node, errorMsg);
|
||||||
|
});
|
||||||
|
|
||||||
|
pass_config->enable<ngraph::pass::SoftmaxDecomposition>();
|
||||||
|
pass_config->set_callback<ngraph::pass::SoftmaxDecomposition>(
|
||||||
|
[](const_node_ptr &node) -> bool {
|
||||||
|
return node->input_value(0).get_partial_shape().rank().get_length() <= 5;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!isLegacyApi) {
|
||||||
|
auto nmsCallback = [](const_node_ptr &node) -> bool {
|
||||||
|
for (size_t i = 0; i < node->get_output_size(); i++) {
|
||||||
|
const auto outputs = node->get_output_target_inputs(i);
|
||||||
|
for (const auto &out : outputs) {
|
||||||
|
if (!ov::op::util::is_output(out.get_node())) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
|
||||||
|
pass_config->set_callback<ov::pass::ConvertNMS9ToNMSIEInternal>(nmsCallback);
|
||||||
|
pass_config->set_callback<ov::pass::ConvertMulticlassNmsToMulticlassNmsIE>(nmsCallback);
|
||||||
|
pass_config->set_callback<ov::pass::ConvertMatrixNmsToMatrixNmsIE>(nmsCallback);
|
||||||
|
}
|
||||||
|
|
||||||
|
// List of enabled/disabled transformations
|
||||||
|
|
||||||
|
// Allow FP16 Converts to be folded and FP16 constants to be upgraded to FP32 data type
|
||||||
|
pass_config->disable<ov::pass::DisableDecompressionConvertConstantFolding>();
|
||||||
|
pass_config->disable<ov::pass::ConvertCompressedOnlyToLegacy>();
|
||||||
|
pass_config->disable<ov::pass::EyeDecomposition>();
|
||||||
|
|
||||||
|
pass_config->disable<ov::pass::ConvertGELU>();
|
||||||
|
pass_config->disable<ov::pass::ConvertShuffleChannels3>();
|
||||||
|
pass_config->disable<ov::pass::Gelu7Downgrade>();
|
||||||
|
pass_config->disable<ov::pass::HSwishDecomposition>();
|
||||||
|
pass_config->disable<ov::pass::ReduceL1Decomposition>();
|
||||||
|
pass_config->disable<ov::pass::ReduceL2Decomposition>();
|
||||||
|
pass_config->disable<ov::pass::SoftPlusDecomposition>();
|
||||||
|
pass_config->disable<ov::pass::HSigmoidDecomposition>();
|
||||||
|
pass_config->disable<ov::pass::ConvertMod>();
|
||||||
|
pass_config->disable<ov::pass::ConvertShuffleChannels3>();
|
||||||
|
pass_config->disable<ov::pass::WeightsDequantizeToFakeQuantize>();
|
||||||
|
pass_config->disable<ov::pass::SimplifyCTCGreedyDecoderSeqLen>();
|
||||||
|
pass_config->disable<ov::pass::ConvertGather7ToGather1>();
|
||||||
|
pass_config->disable<ov::pass::ConvertGather8ToGather7>();
|
||||||
|
pass_config->disable<ov::pass::ConvertMinimum>();
|
||||||
|
pass_config->disable<ov::pass::ConvertBroadcastToTiles>();
|
||||||
|
pass_config->disable<ov::pass::ConvertReduceMeanToPooling>();
|
||||||
|
pass_config->disable<ov::pass::ConvertReduceMaxToPooling>();
|
||||||
|
pass_config->disable<ov::pass::ConvertReduceSumToPooling>();
|
||||||
|
pass_config->disable<ov::pass::SliceToStridedSlice>();
|
||||||
|
pass_config->disable<ov::pass::ConvertDetectionOutput8ToDetectionOutput1>();
|
||||||
|
pass_config->disable<ov::pass::ConvertROIAlign9To3>();
|
||||||
|
pass_config->disable<ov::pass::SoftSignDecomposition>();
|
||||||
|
pass_config->disable<ov::pass::UniqueDecomposition>();
|
||||||
|
|
||||||
|
pass_config->enable<ov::pass::NormalizeL2Decomposition>();
|
||||||
|
pass_config->enable<ov::pass::ConvertInterpolate1ToInterpolate4>();
|
||||||
|
pass_config->enable<ov::pass::ConvertGather1ToGather7>();
|
||||||
|
pass_config->enable<ov::pass::ConvertDetectionOutput1ToDetectionOutput8>();
|
||||||
|
pass_config->enable<ov::pass::ConvertROIAlign3To9>();
|
||||||
|
|
||||||
|
if (useLpt) {
|
||||||
|
CPU_LPT_SCOPE(LowPrecisionTransformations_Part3);
|
||||||
|
pass_config->set_callback<ov::pass::AddFakeQuantizeFusion,
|
||||||
|
ov::pass::MulFakeQuantizeFusion,
|
||||||
|
ov::pass::FakeQuantizeMulFusion>(
|
||||||
|
[](const_node_ptr &node) -> bool {
|
||||||
|
std::string errMsg;
|
||||||
|
return !node::FakeQuantize::isSupportedOperation(node, errMsg);
|
||||||
|
});
|
||||||
|
|
||||||
|
pass_config->set_callback<ov::pass::ConvertQuantizeDequantize>([&defaultPrecisions](const_node_ptr &node) -> bool {
|
||||||
|
return ngraph::pass::low_precision::NetworkHelper::areQuantizeAndDequantizeSupportedForMultiply(node, defaultPrecisions);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
manager.run_passes(model);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Transformations::Lpt(const bool hasINT16orINT32Levels, const std::vector<ov::element::Type>& defaultPrecisions) {
|
||||||
|
CPU_DEBUG_CAP_TRANSFORMATION_SCOPE(this, Lpt);
|
||||||
|
|
||||||
|
using namespace ngraph::pass::low_precision;
|
||||||
|
CPU_LPT_SCOPE(LowPrecisionTransformations_Part4);
|
||||||
|
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "LowPrecisionTransformations");
|
||||||
|
//Only enable conv/group conv signed input on AMX platform.
|
||||||
|
std::vector<ov::element::Type> input0LowPrecisionList;
|
||||||
|
if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_amx)) {
|
||||||
|
input0LowPrecisionList = {ov::element::u8, ov::element::i8};
|
||||||
|
} else {
|
||||||
|
input0LowPrecisionList = {ov::element::u8};
|
||||||
|
}
|
||||||
|
auto supportedPrecisions = std::vector<PrecisionsRestriction>({
|
||||||
|
PrecisionsRestriction::create<ov::opset1::Convolution>({
|
||||||
|
{{0}, input0LowPrecisionList},
|
||||||
|
{{1}, {ov::element::i8}},
|
||||||
|
}),
|
||||||
|
PrecisionsRestriction::create<ov::opset1::ConvolutionBackpropData>({
|
||||||
|
{{0}, {ov::element::u8, ov::element::i8}},
|
||||||
|
{{1}, {ov::element::i8}}
|
||||||
|
}),
|
||||||
|
PrecisionsRestriction::create<ov::opset1::GroupConvolution>({
|
||||||
|
{{0}, input0LowPrecisionList},
|
||||||
|
{{1}, {ov::element::i8}}
|
||||||
|
}),
|
||||||
|
PrecisionsRestriction::create<ov::opset1::Multiply>({
|
||||||
|
{{0}, {ov::element::u8}},
|
||||||
|
{{1}, {ov::element::i8}},
|
||||||
|
}),
|
||||||
|
PrecisionsRestriction::create<ov::opset1::MatMul>({
|
||||||
|
{{0}, {ov::element::u8, ov::element::i8}},
|
||||||
|
{{1}, {ov::element::i8}}
|
||||||
|
}),
|
||||||
|
PrecisionsRestriction::create<ov::opset5::LSTMSequence>({
|
||||||
|
{{0, 1}, {ov::element::u8, ov::element::i8}},
|
||||||
|
}),
|
||||||
|
PrecisionsRestriction::create<ov::opset6::GRUSequence>({
|
||||||
|
{{0, 1}, {ov::element::u8, ov::element::i8}},
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
auto quantizationRestrictions = std::vector<QuantizationGranularityRestriction>({
|
||||||
|
QuantizationGranularityRestriction::create<ov::opset1::Convolution>({0}),
|
||||||
|
QuantizationGranularityRestriction::create<ov::opset1::ConvolutionBackpropData>({0})
|
||||||
|
});
|
||||||
|
|
||||||
|
// for GNA networks reference execution
|
||||||
|
bool updatePrecision = true;
|
||||||
|
if (hasINT16orINT32Levels) {
|
||||||
|
updatePrecision = false;
|
||||||
|
supportedPrecisions = std::vector<PrecisionsRestriction>({});
|
||||||
|
}
|
||||||
|
|
||||||
|
ov::pass::Manager lptManager;
|
||||||
|
lptManager.register_pass<ngraph::pass::low_precision::LowPrecision>(
|
||||||
|
supportedPrecisions,
|
||||||
|
quantizationRestrictions,
|
||||||
|
LayerTransformation::Params(updatePrecision, ov::element::f32, defaultPrecisions));
|
||||||
|
lptManager.get_pass_config()->set_callback<ngraph::pass::low_precision::MarkupPrecisions>([](const_node_ptr& node) -> bool {
|
||||||
|
if (const auto mulitply = std::dynamic_pointer_cast<const ov::opset1::Multiply>(node)) {
|
||||||
|
return !MultiplyToGroupConvolutionTransformation::canBeTransformedToGroupConvolution(mulitply);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
});
|
||||||
|
lptManager.get_pass_config()->set_callback<ngraph::pass::low_precision::ConvolutionBackpropDataTransformation>(
|
||||||
|
[&defaultPrecisions](const_node_ptr& node) -> bool {
|
||||||
|
return LayerTransformation::isAsymmetricQuantization(node, defaultPrecisions) ||
|
||||||
|
WeightableLayerTransformation::isAsymmetricOnWeights(node, defaultPrecisions);
|
||||||
|
});
|
||||||
|
|
||||||
|
lptManager.get_pass_config()->disable<ngraph::pass::low_precision::MultiplyToGroupConvolutionTransformation>();
|
||||||
|
|
||||||
|
lptManager.run_passes(model);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Transformations::PostLpt() {
|
||||||
|
CPU_DEBUG_CAP_TRANSFORMATION_SCOPE(this, PostLpt);
|
||||||
|
|
||||||
|
ov::pass::Manager postLPTPassManager;
|
||||||
|
postLPTPassManager.register_pass<ov::pass::UnrollTensorIterator>();
|
||||||
|
postLPTPassManager.register_pass<ov::pass::ReshapePRelu>();
|
||||||
|
postLPTPassManager.get_pass_config()->set_callback<ov::pass::UnrollTensorIterator>([](const_node_ptr &node) -> bool {
|
||||||
|
// UnrollTI transformation is disabled by default, is turned on by LowLatency transformation
|
||||||
|
return node->get_rt_info().count("UNROLL_TI") == 0;
|
||||||
|
});
|
||||||
|
postLPTPassManager.register_pass<MoveEltwiseUpThroughDataMov>();
|
||||||
|
postLPTPassManager.get_pass_config()->set_callback<MoveEltwiseUpThroughDataMov>([](const std::shared_ptr<const ov::Node>& node) -> bool {
|
||||||
|
if (node->get_input_size() >= 2) {
|
||||||
|
return node->get_input_element_type(1) == ov::element::i8 || node->get_input_element_type(1) == ov::element::u8;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
});
|
||||||
|
|
||||||
|
postLPTPassManager.register_pass<ov::pass::ConstantFolding>();
|
||||||
|
|
||||||
|
// Snippets may brake MHA patterns so the fusion has to performed before
|
||||||
|
postLPTPassManager.register_pass<MHAFusion>();
|
||||||
|
postLPTPassManager.register_pass<FuseFQtoInteraction>();
|
||||||
|
postLPTPassManager.get_pass_config()->set_callback<MHAFloatFusion, MHAFloatFusion2,
|
||||||
|
MHAQuantFusion, MHAQuantFusion2>
|
||||||
|
([this](const std::shared_ptr<const ov::Node>& n) -> bool {
|
||||||
|
std::string errorMessage;
|
||||||
|
|
||||||
|
if (!node::MHA::isSupportedOperation(n, errorMessage))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
// Implementation calls AMX BF16 brgemm only for tensors with K and N aligned on 2, otherwise fallbacks on vector impl
|
||||||
|
// Vector madd BF16 instruction on SPR has reduced performance on HW level, which results in overall perf degradation
|
||||||
|
size_t bf16Factor = 2;
|
||||||
|
if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_bf16_amx_bf16) &&
|
||||||
|
(n->get_input_element_type(0) == element::bf16 || (n->get_input_element_type(0) == element::f32 && enableBF16)) &&
|
||||||
|
(n->get_input_shape(0)[3] % bf16Factor != 0 || n->get_input_shape(1)[1] % bf16Factor != 0 || n->get_input_shape(3)[3] % bf16Factor != 0)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
});
|
||||||
|
|
||||||
|
// Execute before snippets. Otherwise FQ will be converted to Subgraph
|
||||||
|
postLPTPassManager.register_pass<ConvertFqRnnToQuantizedRnn>();
|
||||||
|
postLPTPassManager.run_passes(model);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Transformations::MainSnippets(void) {
|
||||||
|
if (!enableSnippets ||
|
||||||
|
!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2)) // snippets are implemeted only for relevant platforms (avx2+ extentions)
|
||||||
|
return;
|
||||||
|
|
||||||
|
ov::pass::Manager snippetsManager;
|
||||||
|
snippetsManager.register_pass<SnippetsMarkSkipped>();
|
||||||
|
snippetsManager.register_pass<ngraph::snippets::pass::EnumerateNodes>();
|
||||||
|
snippetsManager.register_pass<ngraph::snippets::pass::TokenizeSnippets>();
|
||||||
|
snippetsManager.get_pass_config()->set_callback<ngraph::snippets::pass::TokenizeSnippets>(
|
||||||
|
[](const std::shared_ptr<const ov::Node>& n) -> bool {
|
||||||
|
// CPU Plugin support Swish in Subgraph via conversion to SwichCPU which assumes second input to be constant
|
||||||
|
if (ov::is_type<const ov::op::v4::Swish>(n)) {
|
||||||
|
if (n->inputs().size() > 1 && !ov::is_type<const ov::op::v0::Constant>(n->get_input_node_shared_ptr(1)))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto& inputs = n->inputs();
|
||||||
|
// todo: clarify whether we can evaluate snippets on const paths
|
||||||
|
const bool has_only_const_inputs = std::all_of(inputs.begin(), inputs.end(),
|
||||||
|
[](const ov::Input<const ov::Node> &in) {
|
||||||
|
return ov::is_type<ov::op::v0::Constant>(in.get_source_output().get_node_shared_ptr());
|
||||||
|
});
|
||||||
|
// todo: clarify whether we can evaluate snippets on inputs with larger ranks
|
||||||
|
auto rank_is_too_large = [](const ov::descriptor::Tensor& t ) {
|
||||||
|
// callback is called has_supported_in_out(), so it's safe to assume that the shapes are static
|
||||||
|
return t.get_partial_shape().rank().get_length() > 6;
|
||||||
|
};
|
||||||
|
const bool bad_input_rank = std::any_of(inputs.begin(), inputs.end(),
|
||||||
|
[&](const ov::Input<const ov::Node>& in) {return rank_is_too_large(in.get_tensor());});
|
||||||
|
const auto& outputs = n->outputs();
|
||||||
|
const bool bad_output_rank = std::any_of(outputs.begin(), outputs.end(),
|
||||||
|
[&](const ov::Output<const ov::Node>& out) {return rank_is_too_large(out.get_tensor());});
|
||||||
|
return has_only_const_inputs || bad_input_rank || bad_output_rank;
|
||||||
|
});
|
||||||
|
snippetsManager.register_pass<ngraph::snippets::pass::CommonOptimizations>();
|
||||||
|
snippetsManager.run_passes(model);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Transformations::PostSnippets(void) {
|
||||||
|
ov::pass::Manager postSnippetsManager;
|
||||||
|
postSnippetsManager.register_pass<ov::pass::FakeQuantizeDecomposition>();
|
||||||
|
postSnippetsManager.get_pass_config()->set_callback<ov::pass::FakeQuantizeDecomposition>([](const_node_ptr& node) -> bool {
|
||||||
|
std::string errMsg;
|
||||||
|
return node::FakeQuantize::isSupportedOperation(node, errMsg);
|
||||||
|
});
|
||||||
|
postSnippetsManager.register_pass<ov::pass::ConstantFolding>();
|
||||||
|
postSnippetsManager.run_passes(model);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Transformations::Snippets(void) {
|
||||||
|
CPU_DEBUG_CAP_TRANSFORMATION_SCOPE(this, Snippets);
|
||||||
|
|
||||||
|
MainSnippets();
|
||||||
|
PostSnippets();
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace intel_cpu
|
||||||
|
} // namespace ov
|
65
src/plugins/intel_cpu/src/transformation_pipeline.h
Normal file
65
src/plugins/intel_cpu/src/transformation_pipeline.h
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
// Copyright (C) 2022 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "openvino/core/model.hpp"
|
||||||
|
#include "low_precision/low_precision.hpp"
|
||||||
|
#include "config.h"
|
||||||
|
|
||||||
|
#include "itt.h"
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
using namespace InferenceEngine;
|
||||||
|
|
||||||
|
#define IE_CPU_PLUGIN_THROW(...) IE_THROW(__VA_ARGS__) << "CPU plugin: "
|
||||||
|
|
||||||
|
namespace ov {
|
||||||
|
namespace intel_cpu {
|
||||||
|
|
||||||
|
class Transformations {
|
||||||
|
public:
|
||||||
|
Transformations(const std::shared_ptr<ov::Model>& initialModel,
|
||||||
|
const bool enableLpt,
|
||||||
|
const bool enableSnippets,
|
||||||
|
const bool enableBF16,
|
||||||
|
const bool isLegacyApi,
|
||||||
|
const Config& config)
|
||||||
|
: model(initialModel),
|
||||||
|
enableLpt(enableLpt),
|
||||||
|
enableSnippets(enableSnippets),
|
||||||
|
enableBF16(enableBF16),
|
||||||
|
isLegacyApi(isLegacyApi),
|
||||||
|
config(config) {}
|
||||||
|
|
||||||
|
void UpToCpuSpecificOpSet();
|
||||||
|
void CpuSpecificOpSet(void);
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::shared_ptr<ov::Model> model;
|
||||||
|
const bool enableLpt;
|
||||||
|
const bool enableSnippets;
|
||||||
|
const bool enableBF16;
|
||||||
|
const bool isLegacyApi;
|
||||||
|
const Config& config;
|
||||||
|
|
||||||
|
void PreLpt(const std::vector<ov::element::Type>& defaultPrecisions, const bool isLegacyApi);
|
||||||
|
|
||||||
|
void Lpt(const bool hasINT16orINT32Levels, const std::vector<ov::element::Type>& defaultPrecisions);
|
||||||
|
|
||||||
|
void PostLpt();
|
||||||
|
|
||||||
|
void MainSnippets(void);
|
||||||
|
|
||||||
|
void PostSnippets(void);
|
||||||
|
|
||||||
|
void Snippets(void);
|
||||||
|
|
||||||
|
static bool fuse_type_to_convert(const std::shared_ptr<ngraph::Node>& node, ov::element::Type to, size_t idx);
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace intel_cpu
|
||||||
|
} // namespace ov
|
@ -5,7 +5,7 @@
|
|||||||
|
|
||||||
#ifdef CPU_DEBUG_CAPS
|
#ifdef CPU_DEBUG_CAPS
|
||||||
|
|
||||||
#define CPU_DEBUG_CAP_ENABLE(_x) _x;
|
#define CPU_DEBUG_CAP_ENABLE(...) __VA_ARGS__
|
||||||
#define CPU_DEBUG_CAPS_ALWAYS_TRUE(x) true
|
#define CPU_DEBUG_CAPS_ALWAYS_TRUE(x) true
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
@ -147,7 +147,7 @@ static inline std::ostream& write_all_to_stream(std::ostream& os, const T& arg,
|
|||||||
|
|
||||||
#else // !CPU_DEBUG_CAPS
|
#else // !CPU_DEBUG_CAPS
|
||||||
|
|
||||||
#define CPU_DEBUG_CAP_ENABLE(_x)
|
#define CPU_DEBUG_CAP_ENABLE(...)
|
||||||
#define CPU_DEBUG_CAPS_ALWAYS_TRUE(x) x
|
#define CPU_DEBUG_CAPS_ALWAYS_TRUE(x) x
|
||||||
|
|
||||||
#define DEBUG_LOG(...)
|
#define DEBUG_LOG(...)
|
||||||
|
66
src/plugins/intel_cpu/src/utils/debug_caps_config.cpp
Normal file
66
src/plugins/intel_cpu/src/utils/debug_caps_config.cpp
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
// Copyright (C) 2018-2022 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
#ifdef CPU_DEBUG_CAPS
|
||||||
|
|
||||||
|
#include "debug_caps_config.h"
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
namespace ov {
|
||||||
|
namespace intel_cpu {
|
||||||
|
|
||||||
|
void DebugCapsConfig::readProperties() {
|
||||||
|
auto readEnv = [](const char* envVar) {
|
||||||
|
return std::getenv(envVar);
|
||||||
|
};
|
||||||
|
|
||||||
|
auto parseDumpFormat = [](const std::string& format) {
|
||||||
|
if (format == "BIN")
|
||||||
|
return FORMAT::BIN;
|
||||||
|
else if (format == "TEXT")
|
||||||
|
return FORMAT::TEXT;
|
||||||
|
else
|
||||||
|
IE_THROW() << "readDebugCapsProperties: Unknown dump format";
|
||||||
|
};
|
||||||
|
|
||||||
|
const char* envVarValue = nullptr;
|
||||||
|
|
||||||
|
if ((envVarValue = readEnv("OV_CPU_EXEC_GRAPH_PATH")))
|
||||||
|
execGraphPath = envVarValue;
|
||||||
|
|
||||||
|
if ((envVarValue = readEnv("OV_CPU_VERBOSE")))
|
||||||
|
verbose = envVarValue;
|
||||||
|
|
||||||
|
if ((envVarValue = readEnv("OV_CPU_BLOB_DUMP_DIR")))
|
||||||
|
blobDumpDir = envVarValue;
|
||||||
|
|
||||||
|
if ((envVarValue = readEnv("OV_CPU_BLOB_DUMP_FORMAT")))
|
||||||
|
blobDumpFormat = parseDumpFormat(envVarValue);
|
||||||
|
|
||||||
|
if ((envVarValue = readEnv("OV_CPU_BLOB_DUMP_NODE_EXEC_ID")))
|
||||||
|
blobDumpFilters[BY_EXEC_ID] = envVarValue;
|
||||||
|
|
||||||
|
if ((envVarValue = readEnv("OV_CPU_BLOB_DUMP_NODE_PORTS")))
|
||||||
|
blobDumpFilters[BY_PORTS] = envVarValue;
|
||||||
|
|
||||||
|
if ((envVarValue = readEnv("OV_CPU_BLOB_DUMP_NODE_TYPE")))
|
||||||
|
blobDumpFilters[BY_TYPE] = envVarValue;
|
||||||
|
|
||||||
|
if ((envVarValue = readEnv("OV_CPU_BLOB_DUMP_NODE_NAME")))
|
||||||
|
blobDumpFilters[BY_NAME] = envVarValue;
|
||||||
|
|
||||||
|
if ((envVarValue = readEnv("OV_CPU_SUMMARY_PERF"))) {
|
||||||
|
summaryPerf = envVarValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((envVarValue = readEnv("OV_CPU_DISABLE")))
|
||||||
|
disable.parseAndSet(envVarValue);
|
||||||
|
|
||||||
|
if ((envVarValue = readEnv("OV_CPU_DUMP_IR")))
|
||||||
|
dumpIR.parseAndSet(envVarValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace intel_cpu
|
||||||
|
} // namespace ov
|
||||||
|
#endif // CPU_DEBUG_CAPS
|
213
src/plugins/intel_cpu/src/utils/debug_caps_config.h
Normal file
213
src/plugins/intel_cpu/src/utils/debug_caps_config.h
Normal file
@ -0,0 +1,213 @@
|
|||||||
|
// Copyright (C) 2018-2022 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
#pragma once
|
||||||
|
#ifdef CPU_DEBUG_CAPS
|
||||||
|
|
||||||
|
#include "ie_common.h"
|
||||||
|
#include "openvino/util/common_util.hpp"
|
||||||
|
|
||||||
|
#include <bitset>
|
||||||
|
#include <unordered_map>
|
||||||
|
|
||||||
|
namespace ov {
|
||||||
|
namespace intel_cpu {
|
||||||
|
|
||||||
|
class DebugCapsConfig {
|
||||||
|
private:
|
||||||
|
struct PropertySetter;
|
||||||
|
using PropertySetterPtr = std::shared_ptr<PropertySetter>;
|
||||||
|
|
||||||
|
public:
|
||||||
|
DebugCapsConfig() {
|
||||||
|
readProperties();
|
||||||
|
}
|
||||||
|
|
||||||
|
enum FILTER {
|
||||||
|
BY_PORTS,
|
||||||
|
BY_EXEC_ID,
|
||||||
|
BY_TYPE,
|
||||||
|
BY_NAME,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class FORMAT {
|
||||||
|
BIN,
|
||||||
|
TEXT,
|
||||||
|
};
|
||||||
|
|
||||||
|
std::string execGraphPath;
|
||||||
|
std::string verbose;
|
||||||
|
std::string blobDumpDir = "cpu_dump";
|
||||||
|
FORMAT blobDumpFormat = FORMAT::TEXT;
|
||||||
|
// std::hash<int> is necessary for Ubuntu-16.04 (gcc-5.4 and defect in C++11 standart)
|
||||||
|
std::unordered_map<FILTER, std::string, std::hash<int>> blobDumpFilters;
|
||||||
|
std::string summaryPerf = "";
|
||||||
|
|
||||||
|
struct TransformationFilter {
|
||||||
|
enum Type : uint8_t {
|
||||||
|
PreLpt = 0, Lpt, PostLpt, Snippets, Specific, NumOfTypes
|
||||||
|
};
|
||||||
|
std::bitset<NumOfTypes> filter;
|
||||||
|
|
||||||
|
PropertySetterPtr getPropertySetter() {
|
||||||
|
return PropertySetterPtr(new BitsetFilterPropertySetter<NumOfTypes>("transformations", filter,
|
||||||
|
{{"all", {PreLpt, Lpt, PostLpt, Snippets, Specific}},
|
||||||
|
{"common", {PreLpt, PostLpt}},
|
||||||
|
{"prelpt", {PreLpt}},
|
||||||
|
{"lpt", {Lpt}},
|
||||||
|
{"postlpt", {PostLpt}},
|
||||||
|
{"snippets", {Snippets}},
|
||||||
|
{"specific", {Specific}}
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
struct IrFormatFilter {
|
||||||
|
enum Type : uint8_t {
|
||||||
|
Xml = 0, XmlBin, Dot, Svg, NumOfTypes
|
||||||
|
};
|
||||||
|
std::bitset<NumOfTypes> filter;
|
||||||
|
|
||||||
|
PropertySetterPtr getPropertySetter() {
|
||||||
|
return PropertySetterPtr(new BitsetFilterPropertySetter<NumOfTypes>("formats", filter,
|
||||||
|
{{"all", {XmlBin, Dot, Svg}},
|
||||||
|
{"xml", {Xml}},
|
||||||
|
{"xmlbin", {XmlBin}},
|
||||||
|
{"dot", {Dot}},
|
||||||
|
{"svg", {Svg}},
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct PropertyGroup {
|
||||||
|
virtual std::vector<PropertySetterPtr> getPropertySetters(void) = 0;
|
||||||
|
|
||||||
|
void parseAndSet(const std::string& str) {
|
||||||
|
const auto& options = ov::util::split(str, ' ');
|
||||||
|
const auto& propertySetters = getPropertySetters();
|
||||||
|
bool failed = false;
|
||||||
|
auto getHelp = [propertySetters] (void) {
|
||||||
|
std::string help;
|
||||||
|
for (const auto& property : propertySetters)
|
||||||
|
help.append('\t' + property->getPropertyName() + "=<" + property->getPropertyValueDescription() + ">\n");
|
||||||
|
return help;
|
||||||
|
};
|
||||||
|
|
||||||
|
for (const auto& option : options) {
|
||||||
|
const auto& parts = ov::util::split(option, '=');
|
||||||
|
if (parts.size() > 2) {
|
||||||
|
failed = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
const auto& propertyName = ov::util::to_lower(parts.front());
|
||||||
|
const auto& foundSetter = std::find_if(propertySetters.begin(), propertySetters.end(),
|
||||||
|
[propertyName] (const PropertySetterPtr& setter) { return setter->getPropertyName() == propertyName; });
|
||||||
|
if (foundSetter == propertySetters.end() ||
|
||||||
|
!(*foundSetter)->parseAndSet(parts.size() == 1 ? "" : parts.back())) {
|
||||||
|
failed = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (failed)
|
||||||
|
IE_THROW() << "Wrong syntax: " << str << std::endl
|
||||||
|
<< "The following space separated options are supported (option names are case insensitive):" << std::endl
|
||||||
|
<< getHelp();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct : PropertyGroup {
|
||||||
|
TransformationFilter transformations;
|
||||||
|
|
||||||
|
std::vector<PropertySetterPtr> getPropertySetters(void) override {
|
||||||
|
return { transformations.getPropertySetter() };
|
||||||
|
}
|
||||||
|
} disable;
|
||||||
|
|
||||||
|
struct : PropertyGroup {
|
||||||
|
std::string dir = "intel_cpu_dump";
|
||||||
|
IrFormatFilter format = { 1 << IrFormatFilter::Xml };
|
||||||
|
TransformationFilter transformations;
|
||||||
|
|
||||||
|
std::vector<PropertySetterPtr> getPropertySetters(void) override {
|
||||||
|
return { PropertySetterPtr(new StringPropertySetter("dir", dir, "path to dumped IRs")),
|
||||||
|
format.getPropertySetter(),
|
||||||
|
transformations.getPropertySetter() };
|
||||||
|
}
|
||||||
|
} dumpIR;
|
||||||
|
|
||||||
|
private:
|
||||||
|
struct PropertySetter {
|
||||||
|
virtual bool parseAndSet(const std::string& str) = 0;
|
||||||
|
virtual std::string getPropertyValueDescription(void) const = 0;
|
||||||
|
|
||||||
|
PropertySetter(const std::string&& name) : propertyName(name) {}
|
||||||
|
const std::string& getPropertyName(void) const { return propertyName; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
const std::string propertyName;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct StringPropertySetter : PropertySetter {
|
||||||
|
StringPropertySetter(const std::string&& name, std::string& ref, const std::string&& valueDescription)
|
||||||
|
: property(ref), propertyValueDescription(valueDescription), PropertySetter(std::move(name)) {}
|
||||||
|
bool parseAndSet(const std::string& str) override {
|
||||||
|
property = str;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
std::string getPropertyValueDescription(void) const override { return propertyValueDescription; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::string& property;
|
||||||
|
const std::string propertyValueDescription;
|
||||||
|
};
|
||||||
|
template<std::size_t NumOfBits>
|
||||||
|
|
||||||
|
struct BitsetFilterPropertySetter : PropertySetter {
|
||||||
|
struct Token {
|
||||||
|
std::string name;
|
||||||
|
std::vector<size_t> bits;
|
||||||
|
};
|
||||||
|
|
||||||
|
BitsetFilterPropertySetter(const std::string&& name, std::bitset<NumOfBits>& ref, const std::vector<Token>&& tokens)
|
||||||
|
: property(ref), propertyTokens(tokens), PropertySetter(std::move(name)) {}
|
||||||
|
bool parseAndSet(const std::string& str) override {
|
||||||
|
const auto& tokens = str.empty() ?
|
||||||
|
std::vector<std::string>{"all"} : ov::util::split(ov::util::to_lower(str), ',');
|
||||||
|
property.reset();
|
||||||
|
for (const auto& token : tokens) {
|
||||||
|
const bool tokenVal = (token.front() != '-');
|
||||||
|
const auto& tokenName = tokenVal ? token : token.substr(1);
|
||||||
|
const auto& foundToken = std::find_if(propertyTokens.begin(), propertyTokens.end(),
|
||||||
|
[tokenName] (const Token& token) { return token.name == tokenName; });
|
||||||
|
if (foundToken == propertyTokens.end())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
for (const auto& bit : foundToken->bits) {
|
||||||
|
property.set(bit, tokenVal);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
std::string getPropertyValueDescription(void) const override {
|
||||||
|
std::string supportedTokens = "comma separated filter tokens: ";
|
||||||
|
for (auto i = 0; i < propertyTokens.size(); i++) {
|
||||||
|
if (i)
|
||||||
|
supportedTokens.push_back(',');
|
||||||
|
supportedTokens.append(propertyTokens[i].name);
|
||||||
|
}
|
||||||
|
supportedTokens.append("; -'token' is used for exclusion, case does not matter, no tokens is treated as 'all'");
|
||||||
|
return supportedTokens;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::bitset<NumOfBits>& property;
|
||||||
|
const std::vector<Token> propertyTokens;
|
||||||
|
};
|
||||||
|
|
||||||
|
void readProperties();
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace intel_cpu
|
||||||
|
} // namespace ov
|
||||||
|
|
||||||
|
#endif // CPU_DEBUG_CAPS
|
113
src/plugins/intel_cpu/src/utils/ngraph_transformation.hpp
Normal file
113
src/plugins/intel_cpu/src/utils/ngraph_transformation.hpp
Normal file
@ -0,0 +1,113 @@
|
|||||||
|
// Copyright (C) 2022 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
#pragma once
|
||||||
|
#ifdef CPU_DEBUG_CAPS
|
||||||
|
|
||||||
|
#include "debug_caps_config.h"
|
||||||
|
#include "openvino/util/file_util.hpp"
|
||||||
|
#include <openvino/pass/manager.hpp>
|
||||||
|
#include <openvino/pass/serialize.hpp>
|
||||||
|
#include <openvino/pass/visualize_tree.hpp>
|
||||||
|
|
||||||
|
namespace ov {
|
||||||
|
namespace intel_cpu {
|
||||||
|
|
||||||
|
class TransformationDumper {
|
||||||
|
public:
|
||||||
|
explicit TransformationDumper(const DebugCapsConfig& config, const DebugCapsConfig::TransformationFilter::Type type,
|
||||||
|
const std::shared_ptr<ov::Model>& model)
|
||||||
|
: config(config), type(type), model(model) {
|
||||||
|
for (auto prev = infoMap.at(type).prev; prev != TransformationType::NumOfTypes;
|
||||||
|
prev = infoMap.at(prev).prev) {
|
||||||
|
// no need to serialize input graph if there was no transformations from previous dump
|
||||||
|
if (config.disable.transformations.filter[prev])
|
||||||
|
continue;
|
||||||
|
if (!config.dumpIR.transformations.filter[prev])
|
||||||
|
break;
|
||||||
|
if (wasDumped()[prev])
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
dump("_in");
|
||||||
|
}
|
||||||
|
~TransformationDumper() {
|
||||||
|
dump("_out");
|
||||||
|
wasDumped().set(type);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
const DebugCapsConfig& config;
|
||||||
|
const std::shared_ptr<ov::Model>& model;
|
||||||
|
using TransformationType = DebugCapsConfig::TransformationFilter::Type;
|
||||||
|
const TransformationType type;
|
||||||
|
|
||||||
|
struct TransformationInfo {
|
||||||
|
std::string name;
|
||||||
|
TransformationType prev;
|
||||||
|
};
|
||||||
|
// std::hash<std::underlying_type<FILTER>::type> is necessary for Ubuntu-16.04 (gcc-5.4 and defect in C++11 standart)
|
||||||
|
const std::unordered_map<TransformationType, TransformationInfo,
|
||||||
|
std::hash<std::underlying_type<TransformationType>::type>> infoMap =
|
||||||
|
{{TransformationType::PreLpt, {"preLpt", TransformationType::NumOfTypes}},
|
||||||
|
{TransformationType::Lpt, {"lpt", TransformationType::PreLpt}},
|
||||||
|
{TransformationType::PostLpt, {"postLpt", TransformationType::Lpt}},
|
||||||
|
{TransformationType::Snippets, {"snippets", TransformationType::PostLpt}},
|
||||||
|
{TransformationType::Specific, {"cpuSpecific", TransformationType::Snippets}}};
|
||||||
|
std::bitset<TransformationType::NumOfTypes>& wasDumped(void) {
|
||||||
|
static std::bitset<TransformationType::NumOfTypes> wasDumped;
|
||||||
|
return wasDumped;
|
||||||
|
}
|
||||||
|
void dump(const std::string&& postfix) {
|
||||||
|
static int num = 0; // just to keep dumped IRs ordered in filesystem
|
||||||
|
const auto pathAndName = config.dumpIR.dir + "/ir_" + std::to_string(num) + '_' +
|
||||||
|
infoMap.at(type).name + postfix;
|
||||||
|
|
||||||
|
ov::util::create_directory_recursive(config.dumpIR.dir);
|
||||||
|
|
||||||
|
ov::pass::Manager serializer;
|
||||||
|
|
||||||
|
if (config.dumpIR.format.filter[DebugCapsConfig::IrFormatFilter::XmlBin])
|
||||||
|
serializer.register_pass<ov::pass::Serialize>(pathAndName + ".xml", "");
|
||||||
|
|
||||||
|
if (config.dumpIR.format.filter[DebugCapsConfig::IrFormatFilter::Xml]) {
|
||||||
|
std::string xmlFile(pathAndName + ".xml");
|
||||||
|
std::string binFile("/dev/null"); // @todo make it crossplatform using dummy implementation of std::ostream
|
||||||
|
|
||||||
|
serializer.register_pass<ov::pass::Serialize>(xmlFile, binFile);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (config.dumpIR.format.filter[DebugCapsConfig::IrFormatFilter::Svg]) {
|
||||||
|
serializer.register_pass<ov::pass::VisualizeTree>(pathAndName + ".svg");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (config.dumpIR.format.filter[DebugCapsConfig::IrFormatFilter::Dot]) {
|
||||||
|
serializer.register_pass<ov::pass::VisualizeTree>(pathAndName + ".dot");
|
||||||
|
}
|
||||||
|
|
||||||
|
serializer.run_passes(model);
|
||||||
|
num++;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace intel_cpu
|
||||||
|
} // namespace ov
|
||||||
|
|
||||||
|
# define CPU_DEBUG_CAP_IS_TRANSFORMATION_DISABLED(_config, _type) \
|
||||||
|
_config.disable.transformations.filter[DebugCapsConfig::TransformationFilter::Type::_type]
|
||||||
|
# define CPU_DEBUG_CAP_IS_TRANSFORMATION_ENABLED(...) !CPU_DEBUG_CAP_IS_TRANSFORMATION_DISABLED(__VA_ARGS__)
|
||||||
|
# define CPU_DEBUG_CAP_TRANSFORMATION_DUMP(_this, _type) \
|
||||||
|
IE_ASSERT(CPU_DEBUG_CAP_IS_TRANSFORMATION_ENABLED(_this->config.debugCaps, _type)); \
|
||||||
|
auto dumperPtr = _this->config.debugCaps.dumpIR.transformations.filter[DebugCapsConfig::TransformationFilter::Type::_type] ? \
|
||||||
|
std::unique_ptr<TransformationDumper>(new TransformationDumper(_this->config.debugCaps, \
|
||||||
|
DebugCapsConfig::TransformationFilter::Type::_type, _this->model)) : \
|
||||||
|
nullptr
|
||||||
|
# define CPU_DEBUG_CAP_TRANSFORMATION_SCOPE(_this, _type) \
|
||||||
|
if (CPU_DEBUG_CAP_IS_TRANSFORMATION_DISABLED(_this->config.debugCaps, _type)) \
|
||||||
|
return; \
|
||||||
|
CPU_DEBUG_CAP_TRANSFORMATION_DUMP(_this, _type)
|
||||||
|
#else
|
||||||
|
# define CPU_DEBUG_CAP_IS_TRANSFORMATION_DISABLED(_config, _type) false
|
||||||
|
# define CPU_DEBUG_CAP_IS_TRANSFORMATION_ENABLED(...) true
|
||||||
|
# define CPU_DEBUG_CAP_TRANSFORMATION_DUMP(_this, _type)
|
||||||
|
# define CPU_DEBUG_CAP_TRANSFORMATION_SCOPE(_this, _type)
|
||||||
|
#endif // CPU_DEBUG_CAPS
|
@ -5,6 +5,7 @@
|
|||||||
|
|
||||||
#include "node_dumper.h"
|
#include "node_dumper.h"
|
||||||
|
|
||||||
|
#include "utils/debug_caps_config.h"
|
||||||
#include <node.h>
|
#include <node.h>
|
||||||
#include "ie_common.h"
|
#include "ie_common.h"
|
||||||
#include "utils/blob_dump.h"
|
#include "utils/blob_dump.h"
|
||||||
@ -26,20 +27,20 @@ static void formatNodeName(std::string& name) {
|
|||||||
std::replace(name.begin(), name.end(), ':', '-');
|
std::replace(name.begin(), name.end(), ':', '-');
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool shouldBeDumped(const NodePtr& node, const Config& config, const std::string& portsKind) {
|
static bool shouldBeDumped(const NodePtr& node, const DebugCapsConfig& config, const std::string& portsKind) {
|
||||||
const auto& dumpFilters = config.blobDumpFilters;
|
const auto& dumpFilters = config.blobDumpFilters;
|
||||||
|
|
||||||
if (dumpFilters.empty())
|
if (dumpFilters.empty())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (dumpFilters.count(Config::FILTER::BY_PORTS)) { // filter by ports configured
|
if (dumpFilters.count(DebugCapsConfig::FILTER::BY_PORTS)) { // filter by ports configured
|
||||||
if (dumpFilters.at(Config::FILTER::BY_PORTS) != "ALL" &&
|
if (dumpFilters.at(DebugCapsConfig::FILTER::BY_PORTS) != "ALL" &&
|
||||||
portsKind != dumpFilters.at(Config::FILTER::BY_PORTS))
|
portsKind != dumpFilters.at(DebugCapsConfig::FILTER::BY_PORTS))
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dumpFilters.count(Config::FILTER::BY_EXEC_ID)) { // filter by exec id configured
|
if (dumpFilters.count(DebugCapsConfig::FILTER::BY_EXEC_ID)) { // filter by exec id configured
|
||||||
std::stringstream ss(dumpFilters.at(Config::FILTER::BY_EXEC_ID));
|
std::stringstream ss(dumpFilters.at(DebugCapsConfig::FILTER::BY_EXEC_ID));
|
||||||
int id;
|
int id;
|
||||||
bool matched = false;
|
bool matched = false;
|
||||||
|
|
||||||
@ -54,8 +55,8 @@ static bool shouldBeDumped(const NodePtr& node, const Config& config, const std:
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dumpFilters.count(Config::FILTER::BY_TYPE)) { // filter by type configured
|
if (dumpFilters.count(DebugCapsConfig::FILTER::BY_TYPE)) { // filter by type configured
|
||||||
std::stringstream ss(dumpFilters.at(Config::FILTER::BY_TYPE));
|
std::stringstream ss(dumpFilters.at(DebugCapsConfig::FILTER::BY_TYPE));
|
||||||
std::string type;
|
std::string type;
|
||||||
bool matched = false;
|
bool matched = false;
|
||||||
|
|
||||||
@ -70,22 +71,22 @@ static bool shouldBeDumped(const NodePtr& node, const Config& config, const std:
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dumpFilters.count(Config::FILTER::BY_NAME)) { // filter by name configured
|
if (dumpFilters.count(DebugCapsConfig::FILTER::BY_NAME)) { // filter by name configured
|
||||||
if (dumpFilters.at(Config::FILTER::BY_NAME) != "*" && // to have 'single char' option for matching all the names
|
if (dumpFilters.at(DebugCapsConfig::FILTER::BY_NAME) != "*" && // to have 'single char' option for matching all the names
|
||||||
!std::regex_match(node->getName(), std::regex(dumpFilters.at(Config::FILTER::BY_NAME)))) // name does not match
|
!std::regex_match(node->getName(), std::regex(dumpFilters.at(DebugCapsConfig::FILTER::BY_NAME)))) // name does not match
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void dump(const BlobDumper& bd, const std::string& file, const Config& config) {
|
static void dump(const BlobDumper& bd, const std::string& file, const DebugCapsConfig& config) {
|
||||||
switch (config.blobDumpFormat) {
|
switch (config.blobDumpFormat) {
|
||||||
case Config::FORMAT::BIN: {
|
case DebugCapsConfig::FORMAT::BIN: {
|
||||||
bd.dump(file);
|
bd.dump(file);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Config::FORMAT::TEXT: {
|
case DebugCapsConfig::FORMAT::TEXT: {
|
||||||
bd.dumpAsTxt(file);
|
bd.dumpAsTxt(file);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -94,7 +95,7 @@ static void dump(const BlobDumper& bd, const std::string& file, const Config& co
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void dumpInternalBlobs(const NodePtr& node, const Config& config) {
|
static void dumpInternalBlobs(const NodePtr& node, const DebugCapsConfig& config) {
|
||||||
std::string nodeName = node->getName();
|
std::string nodeName = node->getName();
|
||||||
formatNodeName(nodeName);
|
formatNodeName(nodeName);
|
||||||
|
|
||||||
@ -116,7 +117,7 @@ static void dumpInternalBlobs(const NodePtr& node, const Config& config) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void dumpInputBlobs(const NodePtr& node, const Config& config, int count) {
|
void dumpInputBlobs(const NodePtr& node, const DebugCapsConfig& config, int count) {
|
||||||
if (!shouldBeDumped(node, config, "IN"))
|
if (!shouldBeDumped(node, config, "IN"))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
@ -150,7 +151,7 @@ void dumpInputBlobs(const NodePtr& node, const Config& config, int count) {
|
|||||||
dumpInternalBlobs(node, config);
|
dumpInternalBlobs(node, config);
|
||||||
}
|
}
|
||||||
|
|
||||||
void dumpOutputBlobs(const NodePtr& node, const Config& config, int count) {
|
void dumpOutputBlobs(const NodePtr& node, const DebugCapsConfig& config, int count) {
|
||||||
if (!shouldBeDumped(node, config, "OUT"))
|
if (!shouldBeDumped(node, config, "OUT"))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
@ -1,25 +1,26 @@
|
|||||||
// Copyright (C) 2018-2022 Intel Corporation
|
// Copyright (C) 2018-2022 Intel Corporation
|
||||||
// SPDX-License-Identifier: Apache-2.0
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
//
|
//
|
||||||
#ifdef CPU_DEBUG_CAPS
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#ifdef CPU_DEBUG_CAPS
|
||||||
|
#include "utils/debug_caps_config.h"
|
||||||
#include <node.h>
|
#include <node.h>
|
||||||
#include "config.h"
|
|
||||||
|
|
||||||
namespace ov {
|
namespace ov {
|
||||||
namespace intel_cpu {
|
namespace intel_cpu {
|
||||||
|
|
||||||
void dumpInputBlobs(const NodePtr &node, const Config& config, int count = -1);
|
void dumpInputBlobs(const NodePtr &node, const DebugCapsConfig& config, int count = -1);
|
||||||
void dumpOutputBlobs(const NodePtr &node, const Config& config, int count = -1);
|
void dumpOutputBlobs(const NodePtr &node, const DebugCapsConfig& config, int count = -1);
|
||||||
|
|
||||||
class DumpHelper {
|
class DumpHelper {
|
||||||
const NodePtr& node;
|
const NodePtr& node;
|
||||||
const int count;
|
const int count;
|
||||||
const Config& config;
|
const DebugCapsConfig& config;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
explicit DumpHelper(const NodePtr& _node, const Config& _config, int _count = -1): node(_node), config(_config), count(_count) {
|
explicit DumpHelper(const NodePtr& _node, const DebugCapsConfig& _config, int _count = -1):
|
||||||
|
node(_node), config(_config), count(_count) {
|
||||||
dumpInputBlobs(node, config, count);
|
dumpInputBlobs(node, config, count);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user