From 6e490c24e28f0fe9f735c58de1a9d139b94759a4 Mon Sep 17 00:00:00 2001 From: Marina Kolpakova Date: Wed, 10 Mar 2021 14:15:38 +0300 Subject: [PATCH] =?UTF-8?q?[=C2=A7]=20introduces=20snippets=20generator=20?= =?UTF-8?q?(#4349)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- inference-engine/src/CMakeLists.txt | 4 +- .../src/inference_engine/CMakeLists.txt | 4 +- .../src/legacy_api/CMakeLists.txt | 5 +- .../src/convert_function_to_cnn_network.cpp | 10 + inference-engine/src/snippets/CMakeLists.txt | 56 ++ .../snippets/include/snippets/generator.hpp | 123 +++++ .../include/snippets/op/blockedload.hpp | 36 ++ .../include/snippets/op/blockedparameter.hpp | 38 ++ .../include/snippets/op/broadcastload.hpp | 48 ++ .../include/snippets/op/broadcastmove.hpp | 41 ++ .../src/snippets/include/snippets/op/load.hpp | 42 ++ .../src/snippets/include/snippets/op/nop.hpp | 30 + .../snippets/include/snippets/op/scalar.hpp | 48 ++ .../include/snippets/op/scalarload.hpp | 36 ++ .../include/snippets/op/scalarstore.hpp | 36 ++ .../include/snippets/op/staticpower.hpp | 44 ++ .../snippets/include/snippets/op/store.hpp | 38 ++ .../snippets/include/snippets/op/subgraph.hpp | 101 ++++ .../include/snippets/op/vectorload.hpp | 36 ++ .../include/snippets/op/vectorstore.hpp | 36 ++ .../snippets/pass/assign_registers.hpp | 30 + .../snippets/pass/collapse_subgraph.hpp | 74 +++ .../snippets/pass/insert_load_store.hpp | 41 ++ .../snippets/pass/insert_movebroadcast.hpp | 29 + .../load_movebroadcast_to_broadcastload.hpp | 29 + .../snippets/pass/vector_to_scalar.hpp | 42 ++ .../include/snippets/register_info.hpp | 24 + .../include/snippets/snippets_isa.hpp | 32 ++ .../include/snippets/snippets_isa_tbl.hpp | 84 +++ .../src/snippets/src/generator.cpp | 30 + inference-engine/src/snippets/src/itt.hpp | 71 +++ .../src/snippets/src/op/blockedload.cpp | 12 + .../src/snippets/src/op/blockedparameter.cpp | 9 + .../src/snippets/src/op/broadcastload.cpp | 35 ++ .../src/snippets/src/op/broadcastmove.cpp | 68 +++ inference-engine/src/snippets/src/op/load.cpp | 48 ++ inference-engine/src/snippets/src/op/nop.cpp | 18 + .../src/snippets/src/op/scalar.cpp | 9 + .../src/snippets/src/op/scalarload.cpp | 12 + .../src/snippets/src/op/scalarstore.cpp | 12 + .../src/snippets/src/op/staticpower.cpp | 9 + .../src/snippets/src/op/store.cpp | 48 ++ .../src/snippets/src/op/subgraph.cpp | 344 ++++++++++++ .../src/snippets/src/op/vectorload.cpp | 12 + .../src/snippets/src/op/vectorstore.cpp | 12 + .../snippets/src/pass/assign_registers.cpp | 183 +++++++ .../snippets/src/pass/collapse_subgraph.cpp | 516 ++++++++++++++++++ .../snippets/src/pass/insert_load_store.cpp | 67 +++ .../src/pass/insert_movebroadcast.cpp | 177 ++++++ .../load_movebroadcast_to_broadcastload.cpp | 59 ++ .../snippets/src/pass/vector_to_scalar.cpp | 40 ++ .../src/snippets/src/register_info.cpp | 9 + inference-engine/src/snippets/src/remarks.hpp | 20 + .../inference_engine/CMakeLists.txt | 1 + .../snippets/broadcast_fusion.cpp | 135 +++++ .../inference_engine/snippets/memory_ops.cpp | 94 ++++ .../snippets/movebroadcast.cpp | 44 ++ .../inference_engine/snippets/registers.cpp | 137 +++++ .../snippets/tokenization.cpp | 154 ++++++ .../snippets/vector_scalar.cpp | 72 +++ .../tests_deprecated/unit/CMakeLists.txt | 4 +- ngraph/core/src/op/non_zero.cpp | 2 +- ngraph/core/src/op/one_hot.cpp | 2 +- ngraph/core/src/op/util/op_types.cpp | 2 + 64 files changed, 3657 insertions(+), 7 deletions(-) create mode 100644 inference-engine/src/snippets/CMakeLists.txt create mode 100644 inference-engine/src/snippets/include/snippets/generator.hpp create mode 100644 inference-engine/src/snippets/include/snippets/op/blockedload.hpp create mode 100644 inference-engine/src/snippets/include/snippets/op/blockedparameter.hpp create mode 100644 inference-engine/src/snippets/include/snippets/op/broadcastload.hpp create mode 100644 inference-engine/src/snippets/include/snippets/op/broadcastmove.hpp create mode 100644 inference-engine/src/snippets/include/snippets/op/load.hpp create mode 100644 inference-engine/src/snippets/include/snippets/op/nop.hpp create mode 100644 inference-engine/src/snippets/include/snippets/op/scalar.hpp create mode 100644 inference-engine/src/snippets/include/snippets/op/scalarload.hpp create mode 100644 inference-engine/src/snippets/include/snippets/op/scalarstore.hpp create mode 100644 inference-engine/src/snippets/include/snippets/op/staticpower.hpp create mode 100644 inference-engine/src/snippets/include/snippets/op/store.hpp create mode 100644 inference-engine/src/snippets/include/snippets/op/subgraph.hpp create mode 100644 inference-engine/src/snippets/include/snippets/op/vectorload.hpp create mode 100644 inference-engine/src/snippets/include/snippets/op/vectorstore.hpp create mode 100644 inference-engine/src/snippets/include/snippets/pass/assign_registers.hpp create mode 100644 inference-engine/src/snippets/include/snippets/pass/collapse_subgraph.hpp create mode 100644 inference-engine/src/snippets/include/snippets/pass/insert_load_store.hpp create mode 100644 inference-engine/src/snippets/include/snippets/pass/insert_movebroadcast.hpp create mode 100644 inference-engine/src/snippets/include/snippets/pass/load_movebroadcast_to_broadcastload.hpp create mode 100644 inference-engine/src/snippets/include/snippets/pass/vector_to_scalar.hpp create mode 100644 inference-engine/src/snippets/include/snippets/register_info.hpp create mode 100644 inference-engine/src/snippets/include/snippets/snippets_isa.hpp create mode 100644 inference-engine/src/snippets/include/snippets/snippets_isa_tbl.hpp create mode 100644 inference-engine/src/snippets/src/generator.cpp create mode 100644 inference-engine/src/snippets/src/itt.hpp create mode 100644 inference-engine/src/snippets/src/op/blockedload.cpp create mode 100644 inference-engine/src/snippets/src/op/blockedparameter.cpp create mode 100644 inference-engine/src/snippets/src/op/broadcastload.cpp create mode 100644 inference-engine/src/snippets/src/op/broadcastmove.cpp create mode 100644 inference-engine/src/snippets/src/op/load.cpp create mode 100644 inference-engine/src/snippets/src/op/nop.cpp create mode 100644 inference-engine/src/snippets/src/op/scalar.cpp create mode 100644 inference-engine/src/snippets/src/op/scalarload.cpp create mode 100644 inference-engine/src/snippets/src/op/scalarstore.cpp create mode 100644 inference-engine/src/snippets/src/op/staticpower.cpp create mode 100644 inference-engine/src/snippets/src/op/store.cpp create mode 100644 inference-engine/src/snippets/src/op/subgraph.cpp create mode 100644 inference-engine/src/snippets/src/op/vectorload.cpp create mode 100644 inference-engine/src/snippets/src/op/vectorstore.cpp create mode 100644 inference-engine/src/snippets/src/pass/assign_registers.cpp create mode 100644 inference-engine/src/snippets/src/pass/collapse_subgraph.cpp create mode 100644 inference-engine/src/snippets/src/pass/insert_load_store.cpp create mode 100644 inference-engine/src/snippets/src/pass/insert_movebroadcast.cpp create mode 100644 inference-engine/src/snippets/src/pass/load_movebroadcast_to_broadcastload.cpp create mode 100644 inference-engine/src/snippets/src/pass/vector_to_scalar.cpp create mode 100644 inference-engine/src/snippets/src/register_info.cpp create mode 100644 inference-engine/src/snippets/src/remarks.hpp create mode 100644 inference-engine/tests/functional/inference_engine/snippets/broadcast_fusion.cpp create mode 100644 inference-engine/tests/functional/inference_engine/snippets/memory_ops.cpp create mode 100644 inference-engine/tests/functional/inference_engine/snippets/movebroadcast.cpp create mode 100644 inference-engine/tests/functional/inference_engine/snippets/registers.cpp create mode 100644 inference-engine/tests/functional/inference_engine/snippets/tokenization.cpp create mode 100644 inference-engine/tests/functional/inference_engine/snippets/vector_scalar.cpp diff --git a/inference-engine/src/CMakeLists.txt b/inference-engine/src/CMakeLists.txt index 63d85a820c2..10bfb7e5875 100644 --- a/inference-engine/src/CMakeLists.txt +++ b/inference-engine/src/CMakeLists.txt @@ -40,13 +40,15 @@ add_subdirectory(low_precision_transformations) add_subdirectory(offline_transformations) +add_subdirectory(snippets) + # add a custom target to build all Inference Engine Core libraries add_custom_target(ie_libraries ALL DEPENDS inference_engine_transformations inference_engine_legacy inference_engine inference_engine_preproc inference_engine_ir_v7_reader inference_engine_ir_reader - inference_engine_lp_transformations) + inference_engine_lp_transformations inference_engine_snippets) if(NGRAPH_ONNX_IMPORT_ENABLE) add_dependencies(ie_libraries inference_engine_onnx_reader) diff --git a/inference-engine/src/inference_engine/CMakeLists.txt b/inference-engine/src/inference_engine/CMakeLists.txt index 1ea32276311..cfad762a626 100644 --- a/inference-engine/src/inference_engine/CMakeLists.txt +++ b/inference-engine/src/inference_engine/CMakeLists.txt @@ -171,7 +171,9 @@ if(WIN32) set_target_properties(${TARGET_NAME}_s PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME}_s) endif() -target_link_libraries(${TARGET_NAME}_s PRIVATE openvino::itt ${CMAKE_DL_LIBS} ${NGRAPH_LIBRARIES} +target_link_libraries(${TARGET_NAME}_s PRIVATE openvino::itt openvino::conditional_compilation + ${CMAKE_DL_LIBS} ${NGRAPH_LIBRARIES} + inference_engine_snippets inference_engine_transformations pugixml) target_compile_definitions(${TARGET_NAME}_s PUBLIC USE_STATIC_IE) diff --git a/inference-engine/src/legacy_api/CMakeLists.txt b/inference-engine/src/legacy_api/CMakeLists.txt index 66498fdbd49..09bcf94c273 100644 --- a/inference-engine/src/legacy_api/CMakeLists.txt +++ b/inference-engine/src/legacy_api/CMakeLists.txt @@ -42,6 +42,7 @@ target_include_directories(${TARGET_NAME}_obj PRIVATE ${PUBLIC_HEADERS_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/src ${IE_MAIN_SOURCE_DIR}/src/inference_engine # For CNNNetworkNGraphImpl + $ $ $ $ @@ -53,7 +54,7 @@ add_cpplint_target(${TARGET_NAME}_obj_cpplint FOR_TARGETS ${TARGET_NAME}_obj) # Create shared library -add_library(${TARGET_NAME} SHARED +add_library(${TARGET_NAME} SHARED ${CMAKE_CURRENT_BINARY_DIR}/dummy.cpp $) @@ -62,7 +63,7 @@ ie_add_vs_version_file(NAME ${TARGET_NAME} set_ie_threading_interface_for(${TARGET_NAME}) -target_link_libraries(${TARGET_NAME} PUBLIC inference_engine +target_link_libraries(${TARGET_NAME} PUBLIC inference_engine inference_engine_snippets PRIVATE pugixml openvino::itt ${NGRAPH_LIBRARIES} inference_engine_transformations) diff --git a/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp b/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp index 7b5ff7ae2fa..2ea30ad0631 100644 --- a/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp +++ b/inference-engine/src/legacy_api/src/convert_function_to_cnn_network.cpp @@ -39,6 +39,7 @@ #include "legacy/ngraph_ops/rnn_sequence_ie.hpp" #include "legacy/ngraph_ops/lstm_sequence_ie.hpp" #include "legacy/ngraph_ops/gru_sequence_ie.hpp" +#include "snippets/op/subgraph.hpp" #include "exec_graph_info.hpp" #include "caseless.hpp" @@ -1978,6 +1979,15 @@ void convertFunctionToICNNNetwork(const std::shared_ptrparams[ExecGraphInfoSerialization::ORIGINAL_NAMES] = originalNames; } + if (auto subgraph = ::ngraph::as_type_ptr(layer)) { + std::string names = ""; + for (const auto& op : subgraph->get_body()->get_ordered_ops()) { + names += ", " + op->get_friendly_name(); + } + + cnnLayer->params["originalLayersNames"] += names; + } + std::string primitivesPriority = ::ngraph::getPrimitivesPriority(layer); if (!primitivesPriority.empty()) { cnnLayer->params["PrimitivesPriority"] = primitivesPriority; diff --git a/inference-engine/src/snippets/CMakeLists.txt b/inference-engine/src/snippets/CMakeLists.txt new file mode 100644 index 00000000000..09bbe10a38b --- /dev/null +++ b/inference-engine/src/snippets/CMakeLists.txt @@ -0,0 +1,56 @@ +# Copyright (C) 2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +set (TARGET_NAME "inference_engine_snippets") + +set(PUBLIC_HEADERS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include") + +file(GLOB_RECURSE LIBRARY_SRC ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp) +file(GLOB_RECURSE PUBLIC_HEADERS ${PUBLIC_HEADERS_DIR}/snippets/*.hpp) + +# Create named folders for the sources within the .vcproj +# Empty name lists them directly under the .vcproj + +source_group("src" FILES ${LIBRARY_SRC}) +source_group("include" FILES ${PUBLIC_HEADERS}) + +# Create shared library + +add_library(${TARGET_NAME} SHARED + ${LIBRARY_SRC} + ${PUBLIC_HEADERS}) + +ie_faster_build(${TARGET_NAME} + UNITY +) + +ie_add_vs_version_file(NAME ${TARGET_NAME} + FILEDESCRIPTION "Inference Engine Snippets transformations library") + +target_compile_definitions(${TARGET_NAME} PRIVATE inference_engine_transformations_EXPORTS) + +target_link_libraries(${TARGET_NAME} PUBLIC inference_engine_transformations ${NGRAPH_LIBRARIES} + PRIVATE ${NGRAPH_REF_LIBRARIES} openvino::conditional_compilation) + +target_include_directories(${TARGET_NAME} PUBLIC ${PUBLIC_HEADERS_DIR} + PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src) + +add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME}) + +ie_add_api_validator_post_build_step(TARGET ${TARGET_NAME}) + +# LTO + +set_target_properties(${TARGET_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO}) + +# developer package + +ie_developer_export_targets(${TARGET_NAME}) + +# install + +install(TARGETS ${TARGET_NAME} + RUNTIME DESTINATION ${IE_CPACK_RUNTIME_PATH} COMPONENT core + ARCHIVE DESTINATION ${IE_CPACK_ARCHIVE_PATH} COMPONENT core + LIBRARY DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT core) diff --git a/inference-engine/src/snippets/include/snippets/generator.hpp b/inference-engine/src/snippets/include/snippets/generator.hpp new file mode 100644 index 00000000000..1c45f1e5167 --- /dev/null +++ b/inference-engine/src/snippets/include/snippets/generator.hpp @@ -0,0 +1,123 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/** + * @brief A file contains public interface for target indepenent code generator. + * @file generator.hpp + */ +#pragma once + +#include +#include "snippets_isa.hpp" + +namespace ngraph { +namespace snippets { + +using code = const uint8_t *; +using RegInfo = std::pair, std::vector>; + +TRANSFORMATIONS_API auto getRegisters(std::shared_ptr& n) -> ngraph::snippets::RegInfo; + +/** + * @interface Emitter + * @brief Base class for all target specific code emitters used by generator. + * @ingroup snippets + */ +class TRANSFORMATIONS_API Emitter { +public: + /** + * @brief Default constructor + */ + Emitter(const std::shared_ptr& n) { + } + + /** + * @brief called by generator to generate code to produce target code for a specific operation + * @param in vector of vector argument registers + * @param out vector of vector resulting registers + * @param pool optional vector of free vector registers which might be used inside method + * @param gpr vector of free generam puproce registers which might be used inside method + * @return void + */ + virtual void emit_code(const std::vector& in, + const std::vector& out, + const std::vector& pool = {}, + const std::vector& gpr = {}) const = 0; + + /** + * @brief called by generator to generate data section, if needed for a specific operation + * @return void + */ + virtual void emit_data() const { + } +}; + +/** + * @interface TargetMachine + * @brief Base class Target machine representation. Target derives from this class to provide generator information about supported emittors + * @ingroup snippets + */ +class TRANSFORMATIONS_API TargetMachine { +public: + /** + * @brief called by generator to all the emittors available for a target machine + * @return a map by node's type info with callbacks to create an instance of emmitter for corresponding operation type + */ + virtual auto getJitters() -> std::map(std::shared_ptr)>>{ + return {}; + } +}; + +/** + * @interface Schedule + * @brief Return scheduling information and pointer to generated kernel code + * @ingroup snippets + */ +class TRANSFORMATIONS_API Schedule { +public: + /** + * @brief Default constructor + */ + Schedule() : work_size({}), is_flat(false), ptr(nullptr) {} + /** + * @brief Default to create schedule out of specific parameters + * @param ws work size for kernel execution + * @param f can this kernel be linearided to 1D range + * @param p pointer to generated code + */ + Schedule(const Shape& ws, bool f, code p) : work_size(ws), is_flat(f), ptr(p) {} + + Shape work_size {}; + bool is_flat {false}; + code ptr {nullptr}; +}; + +/** + * @interface Generator + * @brief Target independent code generator interface + * @ingroup snippets + */ +class TRANSFORMATIONS_API Generator { +public: + /** + * @brief Default constructor + */ + Generator() = default; + /** + * @brief Default destructor + */ + virtual ~Generator() = default; + /** + * @brief virtual method any specific implementation should implement + * @param f runction in canonical for for table-based code generation + * @return pointer to generated code + */ + virtual code generate(std::shared_ptr& f) const = 0; + +protected: + mutable std::map(std::shared_ptr)>> jitters; +}; + +} // namespace snippets +} // namespace ngraph \ No newline at end of file diff --git a/inference-engine/src/snippets/include/snippets/op/blockedload.hpp b/inference-engine/src/snippets/include/snippets/op/blockedload.hpp new file mode 100644 index 00000000000..fe3a1d86cb8 --- /dev/null +++ b/inference-engine/src/snippets/include/snippets/op/blockedload.hpp @@ -0,0 +1,36 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include +#include "load.hpp" + +namespace ngraph { +namespace snippets { +namespace op { + +/** + * @interface BlockedLoad + * @brief Generated by Canonicalization step for blocked data (NCHWc) to be loaded + * @ingroup snippets + */ +class TRANSFORMATIONS_API BlockedLoad : public Load { +public: + NGRAPH_RTTI_DECLARATION; + + BlockedLoad(const Output& x); + BlockedLoad() = default; + + std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override { + check_new_args_count(this, new_args); + return std::make_shared(new_args.at(0)); + } +}; + +} // namespace op +} // namespace snippets +} // namespace ngraph \ No newline at end of file diff --git a/inference-engine/src/snippets/include/snippets/op/blockedparameter.hpp b/inference-engine/src/snippets/include/snippets/op/blockedparameter.hpp new file mode 100644 index 00000000000..fade0611e40 --- /dev/null +++ b/inference-engine/src/snippets/include/snippets/op/blockedparameter.hpp @@ -0,0 +1,38 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include +#include + +namespace ngraph { +namespace snippets { +namespace op { + +/** + * @interface BlockedParameter + * @brief Represents blocked input (NCHWc) for a subgraph + * @ingroup snippets + */ +class TRANSFORMATIONS_API BlockedParameter : public ngraph::op::Parameter { +public: + NGRAPH_RTTI_DECLARATION; + + BlockedParameter() = default; + BlockedParameter(const ngraph::element::Type& element_type, const PartialShape& pshape) + : Parameter(element_type, pshape) { + } + + std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override { + check_new_args_count(this, new_args); + return std::make_shared(m_element_type, m_partial_shape); + } +}; + +} // namespace op +} // namespace snippets +} // namespace ngraph \ No newline at end of file diff --git a/inference-engine/src/snippets/include/snippets/op/broadcastload.hpp b/inference-engine/src/snippets/include/snippets/op/broadcastload.hpp new file mode 100644 index 00000000000..d174eb902fe --- /dev/null +++ b/inference-engine/src/snippets/include/snippets/op/broadcastload.hpp @@ -0,0 +1,48 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include "ngraph/op/op.hpp" + +namespace ngraph { +namespace snippets { +namespace op { + +/** + * @interface BroadcastLoad + * @brief Is generated for broadcasting by least varying dimension for non-blocked cases and the second varying dimension for blocked + * @ingroup snippets + */ +class TRANSFORMATIONS_API BroadcastLoad : public BroadcastMove { +public: + NGRAPH_RTTI_DECLARATION; + + BroadcastLoad(const Output& x, Shape output_shape); + BroadcastLoad() = default; + + bool visit_attributes(AttributeVisitor& visitor) override; + + std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; + + void validate_and_infer_types() override; + + void set_broadcast_info(const Shape& bct) { + broadcast_info = bct; + } + + bool is_broadcast(size_t idx) { + return broadcast_info[idx] == 1; + } + +private: + Shape broadcast_info; +}; + +} // namespace op +} // namespace snippets +} // namespace ngraph \ No newline at end of file diff --git a/inference-engine/src/snippets/include/snippets/op/broadcastmove.hpp b/inference-engine/src/snippets/include/snippets/op/broadcastmove.hpp new file mode 100644 index 00000000000..4ddb652faaa --- /dev/null +++ b/inference-engine/src/snippets/include/snippets/op/broadcastmove.hpp @@ -0,0 +1,41 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include "ngraph/op/op.hpp" + +namespace ngraph { +namespace snippets { +namespace op { + +/** + * @interface BroadcastMove + * @brief Added to a subgraph if explicit broadcast instruction should be generated + * @ingroup snippets + */ +class TRANSFORMATIONS_API BroadcastMove : public ngraph::op::Op { +public: + NGRAPH_RTTI_DECLARATION; + + BroadcastMove(const Output& x, Shape output_shape); + BroadcastMove() = default; + + bool visit_attributes(AttributeVisitor& visitor) override; + + std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; + + void validate_and_infer_types() override; + + bool evaluate(const HostTensorVector& output_values, const HostTensorVector& input_values) const override; + +protected: + Shape output_shape; +}; + +} // namespace op +} // namespace snippets +} // namespace ngraph \ No newline at end of file diff --git a/inference-engine/src/snippets/include/snippets/op/load.hpp b/inference-engine/src/snippets/include/snippets/op/load.hpp new file mode 100644 index 00000000000..557d9dd078d --- /dev/null +++ b/inference-engine/src/snippets/include/snippets/op/load.hpp @@ -0,0 +1,42 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include + +namespace ngraph { +namespace snippets { +namespace op { + +/** + * @interface Load + * @brief Generated by Canonicalization step where explicit load instruction should be emmiteed + * ScalarLoad == scalar instruction + post increment + * Load (VectorLoad) == vector instruction + post increment + * BroadcastLoad == scalar instruction - post increment + * BlockedLoad == vector instruction - post increment + * @ingroup snippets + */ +class TRANSFORMATIONS_API Load : public ngraph::op::Op { +public: + NGRAPH_RTTI_DECLARATION; + + Load(const Output& x); + Load() = default; + + bool visit_attributes(AttributeVisitor& visitor) override; + + std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; + + void validate_and_infer_types() override; + + bool evaluate(const HostTensorVector& output_values, const HostTensorVector& input_values) const override; +}; + +} // namespace op +} // namespace snippets +} // namespace ngraph \ No newline at end of file diff --git a/inference-engine/src/snippets/include/snippets/op/nop.hpp b/inference-engine/src/snippets/include/snippets/op/nop.hpp new file mode 100644 index 00000000000..8fc731d04dd --- /dev/null +++ b/inference-engine/src/snippets/include/snippets/op/nop.hpp @@ -0,0 +1,30 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include "ngraph/op/op.hpp" + +namespace ngraph { +namespace snippets { +namespace op { + +/** + * @interface Nop + * @brief Generated by Canonicalization and represents not-an-operation + * @ingroup snippets + */ +class TRANSFORMATIONS_API Nop : public ngraph::op::Op { +public: + NGRAPH_RTTI_DECLARATION; + + Nop(const OutputVector& arguments, const OutputVector& results); + Nop() = default; +}; + +} // namespace op +} // namespace snippets +} // namespace ngraph \ No newline at end of file diff --git a/inference-engine/src/snippets/include/snippets/op/scalar.hpp b/inference-engine/src/snippets/include/snippets/op/scalar.hpp new file mode 100644 index 00000000000..bb2aad43d2c --- /dev/null +++ b/inference-engine/src/snippets/include/snippets/op/scalar.hpp @@ -0,0 +1,48 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include "ngraph/op/op.hpp" +#include "ngraph/op/constant.hpp" + +namespace ngraph { +namespace snippets { +namespace op { + +/** + * @interface Scalar + * @brief Generated by Canonicalization for a scalar constant Shape() == {1} + * @ingroup snippets + */ +class TRANSFORMATIONS_API Scalar : public ngraph::op::Constant { +public: + NGRAPH_RTTI_DECLARATION; + + Scalar() = default; + Scalar(const std::shared_ptr& tensor) : Constant(tensor) {} + template + Scalar(const element::Type& type, Shape shape, const std::vector& values) : Constant(type, shape, values) {} + Scalar(const element::Type& type, const Shape& shape) : Constant(type, shape) {} + template ::value>::type> + Scalar(const element::Type& type, Shape shape, T value) : Constant(type, shape, value) {} + Scalar(const element::Type& type, Shape shape, const std::vector& values) : Constant(type, shape, values) {} + Scalar(const element::Type& type, const Shape& shape, const void* data) : Constant(type, shape, data) {} + + Scalar(const Constant& other) : Constant(other) {} + Scalar(const Scalar& other) : Constant(other) {} + Scalar& operator=(const Scalar&) = delete; + ~Scalar() override {} + + std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override { + check_new_args_count(this, new_args); + return std::make_shared(*this); + } +}; + +} // namespace op +} // namespace snippets +} // namespace ngraph \ No newline at end of file diff --git a/inference-engine/src/snippets/include/snippets/op/scalarload.hpp b/inference-engine/src/snippets/include/snippets/op/scalarload.hpp new file mode 100644 index 00000000000..6553bc78e0a --- /dev/null +++ b/inference-engine/src/snippets/include/snippets/op/scalarload.hpp @@ -0,0 +1,36 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include +#include "load.hpp" + +namespace ngraph { +namespace snippets { +namespace op { + +/** + * @interface ScalarLoad + * @brief Generated by Canonicalization for a scalar value load to vector register + * @ingroup snippets + */ +class TRANSFORMATIONS_API ScalarLoad : public Load { +public: + NGRAPH_RTTI_DECLARATION; + + ScalarLoad(const Output& x); + ScalarLoad() = default; + + std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override { + check_new_args_count(this, new_args); + return std::make_shared(new_args.at(0)); + } +}; + +} // namespace op +} // namespace snippets +} // namespace ngraph \ No newline at end of file diff --git a/inference-engine/src/snippets/include/snippets/op/scalarstore.hpp b/inference-engine/src/snippets/include/snippets/op/scalarstore.hpp new file mode 100644 index 00000000000..190f53d2d5f --- /dev/null +++ b/inference-engine/src/snippets/include/snippets/op/scalarstore.hpp @@ -0,0 +1,36 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include +#include "store.hpp" + +namespace ngraph { +namespace snippets { +namespace op { + +/** + * @interface ScalarStore + * @brief Generated by Canonicalization for a scalar value store from vector register + * @ingroup snippets + */ +class TRANSFORMATIONS_API ScalarStore : public Store { +public: + NGRAPH_RTTI_DECLARATION; + + ScalarStore(const Output& x); + ScalarStore() = default; + + std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override { + check_new_args_count(this, new_args); + return std::make_shared(new_args.at(0)); + } +}; + +} // namespace op +} // namespace snippets +} // namespace ngraph \ No newline at end of file diff --git a/inference-engine/src/snippets/include/snippets/op/staticpower.hpp b/inference-engine/src/snippets/include/snippets/op/staticpower.hpp new file mode 100644 index 00000000000..42128f761b8 --- /dev/null +++ b/inference-engine/src/snippets/include/snippets/op/staticpower.hpp @@ -0,0 +1,44 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include +#include +#include + +namespace ngraph { +namespace snippets { +namespace op { + +/** + * @interface PowerStatic + * @brief Generated by Canonicalization for a spasical case of power innstruction which has constant power value + * @ingroup snippets + */ +class TRANSFORMATIONS_API PowerStatic : public ngraph::op::v1::Power { +public: + NGRAPH_RTTI_DECLARATION; + + PowerStatic() : Power() { + } + + PowerStatic(const Output& arg0, + const Output& arg1, + const ngraph::op::AutoBroadcastSpec& auto_broadcast = + ngraph::op::AutoBroadcastSpec(ngraph::op::AutoBroadcastType::NUMPY)) : Power(arg0, arg1, auto_broadcast) { + NGRAPH_CHECK(!!std::dynamic_pointer_cast(arg1.get_node_shared_ptr()), "second argument must be scalar constant."); + } + + std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override { + check_new_args_count(this, new_args); + return std::make_shared(new_args.at(0), new_args.at(1), this->get_autob()); + } +}; + +} // namespace op +} // namespace snippets +} // namespace ngraph \ No newline at end of file diff --git a/inference-engine/src/snippets/include/snippets/op/store.hpp b/inference-engine/src/snippets/include/snippets/op/store.hpp new file mode 100644 index 00000000000..4bf883f39e1 --- /dev/null +++ b/inference-engine/src/snippets/include/snippets/op/store.hpp @@ -0,0 +1,38 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include + +namespace ngraph { +namespace snippets { +namespace op { + +/** + * @interface Load + * @brief Generated by Canonicalization step where explicit store instruction should be emmiteed + * @ingroup snippets + */ +class TRANSFORMATIONS_API Store : public ngraph::op::Op { +public: + NGRAPH_RTTI_DECLARATION; + + Store(const Output& x); + Store() = default; + + bool visit_attributes(AttributeVisitor& visitor) override; + + std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; + + void validate_and_infer_types() override; + + bool evaluate(const HostTensorVector& output_values, const HostTensorVector& input_values) const override; +}; + +} // namespace op +} // namespace snippets +} // namespace ngraph \ No newline at end of file diff --git a/inference-engine/src/snippets/include/snippets/op/subgraph.hpp b/inference-engine/src/snippets/include/snippets/op/subgraph.hpp new file mode 100644 index 00000000000..9d5a3a8e263 --- /dev/null +++ b/inference-engine/src/snippets/include/snippets/op/subgraph.hpp @@ -0,0 +1,101 @@ +// Copyright (C) 2018-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include +#include +#include +#include + +#include "snippets/generator.hpp" + +namespace ngraph { +namespace snippets { +namespace op { + +/** + * @interface Subgraph + * @brief An operation that is implemented by a function + * @ingroup snippets + */ +class TRANSFORMATIONS_API Subgraph : public ngraph::op::Op { +public: + using BlockedShape = std::tuple; + using BlockedShapeVector = std::vector; + + NGRAPH_RTTI_DECLARATION; + + Subgraph(const OutputVector& args, std::shared_ptr body); + + Subgraph(const NodeVector& args, std::shared_ptr body); + + bool visit_attributes(AttributeVisitor& visitor) override; + + void validate_and_infer_types() override; + + std::shared_ptr clone_with_new_inputs(const OutputVector& inputs) const override; + + std::shared_ptr get_body() const { + return m_body; + } + + std::shared_ptr get_generator() const { + return m_generator; + } + + std::shared_ptr make_canonical_from_this(); + + snippets::Schedule generate(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes); + bool evaluate(const HostTensorVector& output_values, const HostTensorVector& input_values) const override; + + /// Set a new body for the op; body needs to satisfy requirements on inputs/outputs + void set_body(std::shared_ptr body); + + // plugin sets generator for a snippet to some specific generator. + // it's going to be replaced with Jitters table later + void set_generator(std::shared_ptr generator); + + void print() const; + void print_statistics(bool verbose); + + static auto wrap_node_as_subgraph(const std::shared_ptr& node) -> std::shared_ptr; + +private: + void canonicalize(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes); + void convert_to_snippet_dialect(); + + std::shared_ptr m_body; + std::shared_ptr m_generator; +}; + +static inline std::ostream& operator<<(std::ostream& os, const op::Subgraph::BlockedShape& blocked_shape) { + os << std::get<0>(blocked_shape) << " " << std::get<1>(blocked_shape) << " " << std::get<2>(blocked_shape); + return os; +} + +static inline auto is_scalar_constant(const std::shared_ptr& source_output_node) -> bool { + return !!ngraph::as_type_ptr(source_output_node) && + (source_output_node->get_shape() == ngraph::Shape() || ngraph::shape_size(source_output_node->get_shape()) == 1); +}; + +static inline auto create_body(std::string name, const ngraph::ResultVector& results, const ngraph::ParameterVector& parameters) -> + std::shared_ptr { + auto body = std::make_shared(results, parameters, name); + return body; +}; + +static inline auto build_subgraph(const std::shared_ptr& node, const ngraph::OutputVector& inputs, const std::shared_ptr& body) + -> std::shared_ptr{ + auto subgraph = std::make_shared(inputs, body); + copy_runtime_info(node, subgraph); + subgraph->set_friendly_name(node->get_friendly_name()); + return subgraph; +}; + +} // namespace op +} // namespace snippets +} // namespace ngraph diff --git a/inference-engine/src/snippets/include/snippets/op/vectorload.hpp b/inference-engine/src/snippets/include/snippets/op/vectorload.hpp new file mode 100644 index 00000000000..11c77150f9b --- /dev/null +++ b/inference-engine/src/snippets/include/snippets/op/vectorload.hpp @@ -0,0 +1,36 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include +#include "load.hpp" + +namespace ngraph { +namespace snippets { +namespace op { + +/** + * @interface VectorLoad + * @brief Generated by Canonicalization for a vector value load to vector register + * @ingroup snippets + */ +class TRANSFORMATIONS_API VectorLoad : public Load { +public: + NGRAPH_RTTI_DECLARATION; + + VectorLoad(const Output& x); + VectorLoad() = default; + + std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override { + check_new_args_count(this, new_args); + return std::make_shared(new_args.at(0)); + } +}; + +} // namespace op +} // namespace snippets +} // namespace ngraph \ No newline at end of file diff --git a/inference-engine/src/snippets/include/snippets/op/vectorstore.hpp b/inference-engine/src/snippets/include/snippets/op/vectorstore.hpp new file mode 100644 index 00000000000..307c2e68236 --- /dev/null +++ b/inference-engine/src/snippets/include/snippets/op/vectorstore.hpp @@ -0,0 +1,36 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include +#include "store.hpp" + +namespace ngraph { +namespace snippets { +namespace op { + +/** + * @interface VectorStore + * @brief Generated by Canonicalization for a vector value store from vector register + * @ingroup snippets + */ +class TRANSFORMATIONS_API VectorStore : public Store { +public: + NGRAPH_RTTI_DECLARATION; + + VectorStore(const Output& x); + VectorStore() = default; + + std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override { + check_new_args_count(this, new_args); + return std::make_shared(new_args.at(0)); + } +}; + +} // namespace op +} // namespace snippets +} // namespace ngraph \ No newline at end of file diff --git a/inference-engine/src/snippets/include/snippets/pass/assign_registers.hpp b/inference-engine/src/snippets/include/snippets/pass/assign_registers.hpp new file mode 100644 index 00000000000..d94cfde48bd --- /dev/null +++ b/inference-engine/src/snippets/include/snippets/pass/assign_registers.hpp @@ -0,0 +1,30 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +namespace ngraph { +namespace snippets { +namespace pass { + +/** + * @interface AssignRegisters + * @brief Assigns internal `vector` register indexes to operations. + * Changing order of variables or datafrow lead to invalidation of register assignment. + * @ingroup snippets + */ +class TRANSFORMATIONS_API AssignRegisters : public ngraph::pass::FunctionPass { +public: + AssignRegisters() : FunctionPass() { + set_property(ngraph::pass::PassProperty::REQUIRE_STATIC_SHAPE, true); + } + bool run_on_function(std::shared_ptr function) override; +}; + +} // namespace pass +} // namespace snippets +} // namespace ngraph diff --git a/inference-engine/src/snippets/include/snippets/pass/collapse_subgraph.hpp b/inference-engine/src/snippets/include/snippets/pass/collapse_subgraph.hpp new file mode 100644 index 00000000000..1a77699d020 --- /dev/null +++ b/inference-engine/src/snippets/include/snippets/pass/collapse_subgraph.hpp @@ -0,0 +1,74 @@ +// Copyright (C) 2018-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include +#include +#include + + +namespace ngraph { +namespace snippets { +namespace pass { + +/** + * @interface StartSubgraph + * @brief Matches multiple output loyout-oblivious operations to start a new subgraph + * @ingroup snippets + */ +class TRANSFORMATIONS_API StartSubgraph: public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + explicit StartSubgraph(bool tokenize_by_node = false); +}; + +/** + * @interface AttachToSubgraph + * @brief Matches loyout-oblivious operations with subgraph operation as an input to attech this node into it + * @ingroup snippets + */ +class TRANSFORMATIONS_API AttachToSubgraph: public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + explicit AttachToSubgraph(bool tokenize_by_node = false); +}; + +/** + * @interface TokenizeSnippets + * @brief Splits function to subgraphs if possible using rules above + * This pass tokenizes topology graph into subgraphs. + * Those subgraphs consists of unary or binary layout-oblivious (LO) opetations found in subset 1. + * Non-layout-oblivious (NLO) operations operations (called also support in this context) are ignored and become a fullstop in tokenization routine + * 1. if a considered LO operation doesn't have any unput subgraphs + * -> a new single-op subgraph is introduced + * 1. if a considered LO operation is a binary or an unary operation with at least one subgraph as an input + * -> 1. all inputs from the conput subgraphs are collected together + * 1. non-subgraph inputs are wrapped into parameters + * 1. all input bodies are merged and + * 1. this new operation is added to a body of input subgraph + * 1. outputs are collected subgraph (outputs consumed by some other node & subgraph outputs consumed by the node to be merged) + * 1. finally current node is replaced with the new subgraph. We cannot use replace_node because multiple nodes are replaced so + * make the replacement manually by redirecting ports + * Input subgraph is prefented from visiting twice if more than one output of it consumed by currently considered node + * New subgraph is introduced, if there is a loop introduced + * New subgraph is introduced, if number of inputs and outputs exceeds 7 due to scheduling limitation + * New subgraph is introduced, if multiple outputs of merged nodes are not broadcastable to each other (equality of all outputs is too much on the other hand) + * Scalar constants are placed as is into subgraph due to optimization purpose + * @ingroup snippets + */ +class TRANSFORMATIONS_API TokenizeSnippets: public ngraph::pass::GraphRewrite { +public: + NGRAPH_RTTI_DECLARATION; + TokenizeSnippets(bool tokenize_by_node = false) { + add_matcher(tokenize_by_node); + add_matcher(tokenize_by_node); + } +}; + +} // namespace pass +} // namespace snippets +} // namespace ngraph diff --git a/inference-engine/src/snippets/include/snippets/pass/insert_load_store.hpp b/inference-engine/src/snippets/include/snippets/pass/insert_load_store.hpp new file mode 100644 index 00000000000..797710dae02 --- /dev/null +++ b/inference-engine/src/snippets/include/snippets/pass/insert_load_store.hpp @@ -0,0 +1,41 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include +#include + +namespace ngraph { +namespace snippets { +namespace pass { + +/** + * @interface InsertLoad + * @brief Inserts explicit load instruction after each parameter. + * The pass is used to convert function to a canonical form for code generation + * @ingroup snippets + */ +class TRANSFORMATIONS_API InsertLoad: public ngraph::pass::MatcherPass { +public: + InsertLoad(); +}; + +/** + * @interface InsertStore + * @brief Inserts explicit store instruction before each result. + * The pass is used to convert function to a canonical form for code generation + * @ingroup snippets + */ +class TRANSFORMATIONS_API InsertStore: public ngraph::pass::MatcherPass { +public: + InsertStore(); +}; + + +} // namespace pass +} // namespace snippets +} // namespace ngraph diff --git a/inference-engine/src/snippets/include/snippets/pass/insert_movebroadcast.hpp b/inference-engine/src/snippets/include/snippets/pass/insert_movebroadcast.hpp new file mode 100644 index 00000000000..05e47374993 --- /dev/null +++ b/inference-engine/src/snippets/include/snippets/pass/insert_movebroadcast.hpp @@ -0,0 +1,29 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include +#include + +namespace ngraph { +namespace snippets { +namespace pass { + +/** + * @interface InsertMoveBroadcast + * @brief Inserts explicit MoveBroadcast instruction if broadcasting by most warying dimension is needed. + * The pass is used to convert function to a canonical form for code generation + * @ingroup snippets + */ +class TRANSFORMATIONS_API InsertMoveBroadcast: public ngraph::pass::MatcherPass { +public: + InsertMoveBroadcast(); +}; + +} // namespace pass +} // namespace snippets +} // namespace ngraph \ No newline at end of file diff --git a/inference-engine/src/snippets/include/snippets/pass/load_movebroadcast_to_broadcastload.hpp b/inference-engine/src/snippets/include/snippets/pass/load_movebroadcast_to_broadcastload.hpp new file mode 100644 index 00000000000..3af81c424b2 --- /dev/null +++ b/inference-engine/src/snippets/include/snippets/pass/load_movebroadcast_to_broadcastload.hpp @@ -0,0 +1,29 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include +#include + +namespace ngraph { +namespace snippets { +namespace pass { + +/** + * @interface LoadMoveBroadcastToBroadcastLoad + * @brief Fuses consecutive Load and MoveBroadcast into a single load insctruction. + * The pass is used to convert function to a canonical form for code generation + * @ingroup snippets + */ +class TRANSFORMATIONS_API LoadMoveBroadcastToBroadcastLoad: public ngraph::pass::MatcherPass { +public: + LoadMoveBroadcastToBroadcastLoad(); +}; + +} // namespace pass +} // namespace snippets +} // namespace ngraph diff --git a/inference-engine/src/snippets/include/snippets/pass/vector_to_scalar.hpp b/inference-engine/src/snippets/include/snippets/pass/vector_to_scalar.hpp new file mode 100644 index 00000000000..e01b240ae87 --- /dev/null +++ b/inference-engine/src/snippets/include/snippets/pass/vector_to_scalar.hpp @@ -0,0 +1,42 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include +#include + +namespace ngraph { +namespace snippets { +namespace pass { + +/** + * @interface ReplaceLoadsWithScalarLoads + * @brief Replases vector loads with scalar versions. + * The pass is used to cange alement type of function in a canonical form vector to scalar. + * Used for tail generation + * @ingroup snippets + */ +class TRANSFORMATIONS_API ReplaceLoadsWithScalarLoads: public ngraph::pass::MatcherPass { +public: + ReplaceLoadsWithScalarLoads(); +}; + +/** + * @interface ReplaceStoresWithScalarStores + * @brief Replases vector stores with scalar versions. + * The pass is used to cange alement type of function in a canonical form vector to scalar. + * Used for tail generation + * @ingroup snippets + */ +class TRANSFORMATIONS_API ReplaceStoresWithScalarStores: public ngraph::pass::MatcherPass { +public: + ReplaceStoresWithScalarStores(); +}; + +} // namespace pass +} // namespace snippets +} // namespace ngraph diff --git a/inference-engine/src/snippets/include/snippets/register_info.hpp b/inference-engine/src/snippets/include/snippets/register_info.hpp new file mode 100644 index 00000000000..dbe914552ed --- /dev/null +++ b/inference-engine/src/snippets/include/snippets/register_info.hpp @@ -0,0 +1,24 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +namespace ngraph { + +template <> +class TRANSFORMATIONS_API VariantWrapper> : public VariantImpl> { +public: + static constexpr VariantTypeInfo type_info{"Variant::RegInfo|Variant::RuntimeAttribute::AxisVector", 0}; + + const VariantTypeInfo& get_type_info() const override { return type_info; } + VariantWrapper(const value_type& value) + : VariantImpl(value) { + } +}; + +} // namespace ngraph \ No newline at end of file diff --git a/inference-engine/src/snippets/include/snippets/snippets_isa.hpp b/inference-engine/src/snippets/include/snippets/snippets_isa.hpp new file mode 100644 index 00000000000..f078d0570f3 --- /dev/null +++ b/inference-engine/src/snippets/include/snippets/snippets_isa.hpp @@ -0,0 +1,32 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "ngraph/ops.hpp" +#include + +#include "op/blockedload.hpp" +#include "op/blockedparameter.hpp" +#include "op/broadcastload.hpp" +#include "op/broadcastmove.hpp" +#include "op/load.hpp" +#include "op/nop.hpp" +#include "op/scalar.hpp" +#include "op/scalarload.hpp" +#include "op/scalarstore.hpp" +#include "op/staticpower.hpp" +#include "op/store.hpp" +#include "op/vectorload.hpp" +#include "op/vectorstore.hpp" + +namespace ngraph { +namespace snippets { +namespace isa { +#define NGRAPH_OP(a, b) using b::a; +#include "snippets_isa_tbl.hpp" +#undef NGRAPH_OP +} // namespace isa +} // namespace snippets +} // namespace ngraph diff --git a/inference-engine/src/snippets/include/snippets/snippets_isa_tbl.hpp b/inference-engine/src/snippets/include/snippets/snippets_isa_tbl.hpp new file mode 100644 index 00000000000..ecaf6e8dde1 --- /dev/null +++ b/inference-engine/src/snippets/include/snippets/snippets_isa_tbl.hpp @@ -0,0 +1,84 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#ifndef NGRAPH_OP +#warning "NGRAPH_OP not defined" +#define NGRAPH_OP(x, y) +#endif + +// SnippetS dialect +NGRAPH_OP(Load, ngraph::snippets::op) +NGRAPH_OP(ScalarLoad, ngraph::snippets::op) +NGRAPH_OP(VectorLoad, ngraph::snippets::op) +NGRAPH_OP(BlockedLoad, ngraph::snippets::op) +NGRAPH_OP(BroadcastLoad, ngraph::snippets::op) + +NGRAPH_OP(Store, ngraph::snippets::op) +NGRAPH_OP(ScalarStore, ngraph::snippets::op) +NGRAPH_OP(VectorStore, ngraph::snippets::op) + +NGRAPH_OP(BroadcastMove, ngraph::snippets::op) +NGRAPH_OP(Scalar, ngraph::snippets::op) +NGRAPH_OP(Nop, ngraph::snippets::op) + +// Layout-oblivious from opset1 + +// opset completeness +NGRAPH_OP(Constant, ngraph::op) +NGRAPH_OP(Parameter, ngraph::op::v0) +NGRAPH_OP(BlockedParameter, ngraph::snippets::op) +NGRAPH_OP(Result, ngraph::op::v0) +NGRAPH_OP(Broadcast, ngraph::op::v1) + +// unary +NGRAPH_OP(Abs, ngraph::op::v0) +NGRAPH_OP(Acos, ngraph::op::v0) +NGRAPH_OP(Asin, ngraph::op::v0) +NGRAPH_OP(Atan, ngraph::op::v0) +NGRAPH_OP(Ceiling, ngraph::op::v0) +NGRAPH_OP(Clamp, ngraph::op::v0) +NGRAPH_OP(Cos, ngraph::op::v0) +NGRAPH_OP(Cosh, ngraph::op::v0) +NGRAPH_OP(Elu, ngraph::op::v0) +NGRAPH_OP(Erf, ngraph::op::v0) +NGRAPH_OP(Exp, ngraph::op::v0) +NGRAPH_OP(Floor, ngraph::op::v0) +NGRAPH_OP(HardSigmoid, ngraph::op::v0) +NGRAPH_OP(Log, ngraph::op::v0) +NGRAPH_OP(LogicalNot, ngraph::op::v1) +NGRAPH_OP(Negative, ngraph::op::v0) +NGRAPH_OP(Relu, ngraph::op::v0) +NGRAPH_OP(Selu, ngraph::op::v0) +NGRAPH_OP(Sign, ngraph::op::v0) +NGRAPH_OP(Sigmoid, ngraph::op::v0) +NGRAPH_OP(Sin, ngraph::op::v0) +NGRAPH_OP(Sinh, ngraph::op::v0) +NGRAPH_OP(Sqrt, ngraph::op::v0) +NGRAPH_OP(Tan, ngraph::op::v0) +NGRAPH_OP(Tanh, ngraph::op::v0) + +// binary +NGRAPH_OP(Add, ngraph::op::v1) +NGRAPH_OP(Divide, ngraph::op::v1) +NGRAPH_OP(Equal, ngraph::op::v1) +NGRAPH_OP(FloorMod, ngraph::op::v1) +NGRAPH_OP(Greater, ngraph::op::v1) +NGRAPH_OP(GreaterEqual, ngraph::op::v1) +NGRAPH_OP(Less, ngraph::op::v1) +NGRAPH_OP(LessEqual, ngraph::op::v1) +NGRAPH_OP(LogicalAnd, ngraph::op::v1) +NGRAPH_OP(LogicalOr, ngraph::op::v1) +NGRAPH_OP(LogicalXor, ngraph::op::v1) +NGRAPH_OP(Maximum, ngraph::op::v1) +NGRAPH_OP(Minimum, ngraph::op::v1) +NGRAPH_OP(Mod, ngraph::op::v1) +NGRAPH_OP(Multiply, ngraph::op::v1) +NGRAPH_OP(NotEqual, ngraph::op::v1) +NGRAPH_OP(Power, ngraph::op::v1) +NGRAPH_OP(PRelu, ngraph::op::v0) +NGRAPH_OP(SquaredDifference, ngraph::op::v0) +NGRAPH_OP(Subtract, ngraph::op::v1) +NGRAPH_OP(Xor, ngraph::op::v0) diff --git a/inference-engine/src/snippets/src/generator.cpp b/inference-engine/src/snippets/src/generator.cpp new file mode 100644 index 00000000000..b037bea242e --- /dev/null +++ b/inference-engine/src/snippets/src/generator.cpp @@ -0,0 +1,30 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/generator.hpp" +#include "snippets/register_info.hpp" + +auto ngraph::snippets::getRegisters(std::shared_ptr& n) -> ngraph::snippets::RegInfo { + auto rt = n->get_rt_info(); + + std::vector rout; + if (auto rinfo = rt["reginfo"]) { + auto reginfo = ngraph::as_type_ptr>>(rinfo)->get(); + for (auto reg : reginfo) { + rout.push_back(reg); + } + } + + std::vector rin; + for (auto input : n->inputs()) { + auto rt = input.get_source_output().get_node_shared_ptr()->get_rt_info(); + if (auto rinfo = rt["reginfo"]) { + auto reginfo = ngraph::as_type_ptr>>(rinfo)->get(); + for (auto reg : reginfo) { + rin.push_back(reg); + } + } + } + return std::make_pair(rin, rout); +} \ No newline at end of file diff --git a/inference-engine/src/snippets/src/itt.hpp b/inference-engine/src/snippets/src/itt.hpp new file mode 100644 index 00000000000..d96715fcb2b --- /dev/null +++ b/inference-engine/src/snippets/src/itt.hpp @@ -0,0 +1,71 @@ +//***************************************************************************** +// Copyright 2017-2020 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** + +/** + * @brief Defines openvino domains for tracing + * @file itt.hpp + */ + +#pragma once + +#include +#include + +namespace ngraph { +namespace pass { +namespace itt { +namespace domains { + OV_ITT_DOMAIN(IETransform); +} // namespace domains +} // namespace itt +} // namespace pass +} // namespace ngraph + +OV_CC_DOMAINS(ngraph_pass); +OV_CC_DOMAINS(internal_op); + +/* + * RUN_ON_FUNCTION_SCOPE macro allows to disable the run_on_function pass + * MATCHER_SCOPE macro allows to disable the MatcherPass if matcher isn't applied + * INTERNAL_OP_SCOPE macro allows to disable parts of internal nGraph operations if they are not used + */ +#if defined(SELECTIVE_BUILD_ANALYZER) +#define RUN_ON_FUNCTION_SCOPE(region) OV_SCOPE(ngraph_pass, OV_PP_CAT(region, _run_on_function)) +#define MATCHER_SCOPE(region) \ + const std::string matcher_name(OV_PP_TOSTRING(region)) + +#define INTERNAL_OP_SCOPE(region) OV_SCOPE(internal_op, region) + +#elif defined(SELECTIVE_BUILD) + +#define MATCHER_SCOPE_(scope, region) \ + if (OV_CC_SCOPE_IS_ENABLED(OV_PP_CAT3(scope, _, region)) == 0) \ + throw ngraph::ngraph_error(std::string(OV_PP_TOSTRING(OV_PP_CAT3(scope, _, region))) + \ + " is disabled!") + +#define MATCHER_SCOPE(region) \ + const std::string matcher_name(OV_PP_TOSTRING(region)); \ + if (OV_CC_SCOPE_IS_ENABLED(OV_PP_CAT3(ngraph_pass, _, region)) == 0) \ + return +#define INTERNAL_OP_SCOPE(region) MATCHER_SCOPE_(internal_op, region) +#define RUN_ON_FUNCTION_SCOPE(region) MATCHER_SCOPE_(ngraph_pass, OV_PP_CAT(region, _run_on_function)) + +#else +#define MATCHER_SCOPE(region) \ + const std::string matcher_name(OV_PP_TOSTRING(region)) +#define INTERNAL_OP_SCOPE(region) +#define RUN_ON_FUNCTION_SCOPE(region) +#endif diff --git a/inference-engine/src/snippets/src/op/blockedload.cpp b/inference-engine/src/snippets/src/op/blockedload.cpp new file mode 100644 index 00000000000..f5dcbff8e4c --- /dev/null +++ b/inference-engine/src/snippets/src/op/blockedload.cpp @@ -0,0 +1,12 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/op/blockedload.hpp" + +using namespace ngraph; + +NGRAPH_RTTI_DEFINITION(snippets::op::BlockedLoad, "BlockedLoad", 0); + +snippets::op::BlockedLoad::BlockedLoad(const Output& x) : Load(x) { +} diff --git a/inference-engine/src/snippets/src/op/blockedparameter.cpp b/inference-engine/src/snippets/src/op/blockedparameter.cpp new file mode 100644 index 00000000000..85d66b3edba --- /dev/null +++ b/inference-engine/src/snippets/src/op/blockedparameter.cpp @@ -0,0 +1,9 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/op/blockedparameter.hpp" + +using namespace ngraph; + +NGRAPH_RTTI_DEFINITION(snippets::op::BlockedParameter, "BlockedParameter", 0); diff --git a/inference-engine/src/snippets/src/op/broadcastload.cpp b/inference-engine/src/snippets/src/op/broadcastload.cpp new file mode 100644 index 00000000000..d4f9372a2cf --- /dev/null +++ b/inference-engine/src/snippets/src/op/broadcastload.cpp @@ -0,0 +1,35 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "itt.hpp" + +#include "snippets/op/broadcastload.hpp" + +#include + +using namespace std; +using namespace ngraph; + +NGRAPH_RTTI_DEFINITION(snippets::op::BroadcastLoad, "BroadcastLoad", 0); + +snippets::op::BroadcastLoad::BroadcastLoad(const Output& x, Shape shape) +: BroadcastMove(x, shape), broadcast_info(x.get_shape().size(), 0) { + constructor_validate_and_infer_types(); +} + +bool snippets::op::BroadcastLoad::visit_attributes(AttributeVisitor& visitor) { + return true; +} + +std::shared_ptr snippets::op::BroadcastLoad::clone_with_new_inputs(const OutputVector& new_args) const { + INTERNAL_OP_SCOPE(BroadcastLoad); + check_new_args_count(this, new_args); + auto other = std::make_shared(new_args.at(0), output_shape); + other->set_broadcast_info(this->broadcast_info); + return other; +} + +void snippets::op::BroadcastLoad::validate_and_infer_types() { + set_output_type(0, get_input_element_type(0), output_shape); +} diff --git a/inference-engine/src/snippets/src/op/broadcastmove.cpp b/inference-engine/src/snippets/src/op/broadcastmove.cpp new file mode 100644 index 00000000000..c0e080de87d --- /dev/null +++ b/inference-engine/src/snippets/src/op/broadcastmove.cpp @@ -0,0 +1,68 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "itt.hpp" + +#include "snippets/op/broadcastmove.hpp" + +#include +#include + +using namespace std; +using namespace ngraph; + +NGRAPH_RTTI_DEFINITION(snippets::op::BroadcastMove, "BroadcastMove", 0); + +snippets::op::BroadcastMove::BroadcastMove(const Output& x, Shape shape) : Op({x}), output_shape(shape) { + constructor_validate_and_infer_types(); +} + +bool snippets::op::BroadcastMove::visit_attributes(AttributeVisitor& visitor) { + return true; +} + +std::shared_ptr snippets::op::BroadcastMove::clone_with_new_inputs(const OutputVector& new_args) const { + INTERNAL_OP_SCOPE(BroadcastMove); + check_new_args_count(this, new_args); + auto other = std::make_shared(new_args.at(0), this->output_shape); + return other; +} + +void snippets::op::BroadcastMove::validate_and_infer_types() { + set_output_type(0, get_input_element_type(0), this->output_shape); +} + +bool snippets::op::BroadcastMove::evaluate(const HostTensorVector& output_values, const HostTensorVector& input_values) const { + INTERNAL_OP_SCOPE(BroadcastMove); + NGRAPH_CHECK(input_values.size() == this->inputs().size(), "wrong input config"); + NGRAPH_CHECK(output_values.size() == this->outputs().size(), "wrong output config"); + NGRAPH_CHECK(input_values.size() == output_values.size() && input_values.size() == 1, "must be 1->1 operation"); + NGRAPH_CHECK(this->output(0).get_shape() == output_values[0]->get_shape(), "output vector must have the same shape as output port"); + NGRAPH_CHECK(this->input(0).get_shape() == input_values[0]->get_shape(), "input and output must have same shape"); + + auto ishape = input_values[0]->get_shape(); + auto oshape = output_values[0]->get_shape(); + + NGRAPH_CHECK(ishape.size() == oshape.size(), "input and output should have the same rank"); + + AxisSet broadcast_axes; + for (size_t k = 0; k < ishape.size(); k++) { + if (!((ishape[k] == oshape[k]) + || (ishape[k] != oshape[k] && ((ishape[k] == 1) != (oshape[k] == 1) ) ))) { + throw ngraph_error("FakeBroadcast::evaluate incompatible shapes"); + } + + if (ishape[k] != oshape[k]) { + broadcast_axes.insert(k); + } + } + + runtime::reference::broadcast(input_values[0]->get_data_ptr(), + output_values[0]->get_data_ptr(), + input_values[0]->get_shape(), + output_values[0]->get_shape(), + broadcast_axes, + sizeof(float)); + return true; +} \ No newline at end of file diff --git a/inference-engine/src/snippets/src/op/load.cpp b/inference-engine/src/snippets/src/op/load.cpp new file mode 100644 index 00000000000..abad20e1dfc --- /dev/null +++ b/inference-engine/src/snippets/src/op/load.cpp @@ -0,0 +1,48 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "itt.hpp" + +#include "snippets/op/load.hpp" + +#include + +using namespace std; +using namespace ngraph; + +NGRAPH_RTTI_DEFINITION(snippets::op::Load, "Load", 0); + +snippets::op::Load::Load(const Output& x) : Op({x}) { + constructor_validate_and_infer_types(); +} + +bool snippets::op::Load::visit_attributes(AttributeVisitor& visitor) { + return true; +} + +std::shared_ptr snippets::op::Load::clone_with_new_inputs(const OutputVector& new_args) const { + INTERNAL_OP_SCOPE(Load); + check_new_args_count(this, new_args); + return std::make_shared(new_args.at(0)); +} + +void snippets::op::Load::validate_and_infer_types() { + set_output_type(0, get_input_element_type(0), get_input_partial_shape(0)); +} + +bool snippets::op::Load::evaluate(const HostTensorVector& output_values, const HostTensorVector& input_values) const { + INTERNAL_OP_SCOPE(Load); + NGRAPH_CHECK(input_values.size() == this->inputs().size(), "wrong input config"); + NGRAPH_CHECK(output_values.size() == this->outputs().size(), "wrong output config"); + NGRAPH_CHECK(input_values.size() == output_values.size() && input_values.size() == 1, "must be 1->1 operation"); + NGRAPH_CHECK(this->output(0).get_shape() == output_values[0]->get_shape(), "output vector must have the same shape as output port"); + NGRAPH_CHECK(this->input(0).get_shape() == input_values[0]->get_shape(), "input and output must have same shape"); + NGRAPH_CHECK(this->input(0).get_shape() == input_values[0]->get_shape(), "input and output must have same shape"); + + std::copy(input_values[0]->get_data_ptr(), + input_values[0]->get_data_ptr() + shape_size(get_output_shape(0))*output_values[0]->get_element_type().size(), + output_values[0]->get_data_ptr()); + + return true; +} diff --git a/inference-engine/src/snippets/src/op/nop.cpp b/inference-engine/src/snippets/src/op/nop.cpp new file mode 100644 index 00000000000..73a5cebc124 --- /dev/null +++ b/inference-engine/src/snippets/src/op/nop.cpp @@ -0,0 +1,18 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/op/nop.hpp" + +using namespace std; +using namespace ngraph; + +NGRAPH_RTTI_DEFINITION(snippets::op::Nop, "Nop", 0); + +snippets::op::Nop::Nop(const OutputVector& arguments, const OutputVector& results) : Op([arguments, results]() -> OutputVector { + OutputVector x; + x.insert(x.end(), arguments.begin(), arguments.end()); + x.insert(x.end(), results.begin(), results.end()); + return x; + }()) { +} diff --git a/inference-engine/src/snippets/src/op/scalar.cpp b/inference-engine/src/snippets/src/op/scalar.cpp new file mode 100644 index 00000000000..704be035eb4 --- /dev/null +++ b/inference-engine/src/snippets/src/op/scalar.cpp @@ -0,0 +1,9 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/op/scalar.hpp" + +using namespace ngraph; + +NGRAPH_RTTI_DEFINITION(snippets::op::Scalar, "Scalar", 0); \ No newline at end of file diff --git a/inference-engine/src/snippets/src/op/scalarload.cpp b/inference-engine/src/snippets/src/op/scalarload.cpp new file mode 100644 index 00000000000..e3bff9123f8 --- /dev/null +++ b/inference-engine/src/snippets/src/op/scalarload.cpp @@ -0,0 +1,12 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/op/scalarload.hpp" + +using namespace ngraph; + +NGRAPH_RTTI_DEFINITION(snippets::op::ScalarLoad, "ScalarLoad", 0); + +snippets::op::ScalarLoad::ScalarLoad(const Output& x) : Load(x) { +} diff --git a/inference-engine/src/snippets/src/op/scalarstore.cpp b/inference-engine/src/snippets/src/op/scalarstore.cpp new file mode 100644 index 00000000000..991050fc016 --- /dev/null +++ b/inference-engine/src/snippets/src/op/scalarstore.cpp @@ -0,0 +1,12 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/op/scalarstore.hpp" + +using namespace ngraph; + +NGRAPH_RTTI_DEFINITION(snippets::op::ScalarStore, "ScalarStore", 0); + +snippets::op::ScalarStore::ScalarStore(const Output& x) : Store(x) { +} diff --git a/inference-engine/src/snippets/src/op/staticpower.cpp b/inference-engine/src/snippets/src/op/staticpower.cpp new file mode 100644 index 00000000000..38deb26ae5f --- /dev/null +++ b/inference-engine/src/snippets/src/op/staticpower.cpp @@ -0,0 +1,9 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/op/staticpower.hpp" + +using namespace ngraph; + +NGRAPH_RTTI_DEFINITION(snippets::op::PowerStatic, "PowerStatic", 0); diff --git a/inference-engine/src/snippets/src/op/store.cpp b/inference-engine/src/snippets/src/op/store.cpp new file mode 100644 index 00000000000..2055df1f395 --- /dev/null +++ b/inference-engine/src/snippets/src/op/store.cpp @@ -0,0 +1,48 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "itt.hpp" + +#include "snippets/op/scalarstore.hpp" + +#include + +using namespace std; +using namespace ngraph; + +NGRAPH_RTTI_DEFINITION(snippets::op::Store, "Store", 0); + +snippets::op::Store::Store(const Output& x) : Op({x}) { + constructor_validate_and_infer_types(); +} + +bool snippets::op::Store::visit_attributes(AttributeVisitor& visitor) { + return true; +} + +std::shared_ptr snippets::op::Store::clone_with_new_inputs(const OutputVector& new_args) const { + INTERNAL_OP_SCOPE(Store); + check_new_args_count(this, new_args); + return std::make_shared(new_args.at(0)); +} + +void snippets::op::Store::validate_and_infer_types() { + set_output_type(0, get_input_element_type(0), get_input_partial_shape(0)); +} + +bool snippets::op::Store::evaluate(const HostTensorVector& output_values, const HostTensorVector& input_values) const { + INTERNAL_OP_SCOPE(Store); + NGRAPH_CHECK(input_values.size() == this->inputs().size(), "wrong input config"); + NGRAPH_CHECK(output_values.size() == this->outputs().size(), "wrong output config"); + NGRAPH_CHECK(input_values.size() == output_values.size() && input_values.size() == 1, "must be 1->1 operation"); + NGRAPH_CHECK(this->output(0).get_shape() == output_values[0]->get_shape(), "output vector must have the same shape as output port"); + NGRAPH_CHECK(this->input(0).get_shape() == input_values[0]->get_shape(), "input and output must have same shape"); + NGRAPH_CHECK(this->input(0).get_shape() == input_values[0]->get_shape(), "input and output must have same shape"); + + std::copy(input_values[0]->get_data_ptr(), + input_values[0]->get_data_ptr() + shape_size(get_output_shape(0))*output_values[0]->get_element_type().size(), + output_values[0]->get_data_ptr()); + + return true; +} diff --git a/inference-engine/src/snippets/src/op/subgraph.cpp b/inference-engine/src/snippets/src/op/subgraph.cpp new file mode 100644 index 00000000000..ff1d61916ef --- /dev/null +++ b/inference-engine/src/snippets/src/op/subgraph.cpp @@ -0,0 +1,344 @@ +// Copyright (C) 2018-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "itt.hpp" +#include "remarks.hpp" + +#include "snippets/op/subgraph.hpp" +#include "snippets/pass/insert_load_store.hpp" +#include "snippets/pass/insert_movebroadcast.hpp" +#include "snippets/pass/load_movebroadcast_to_broadcastload.hpp" +#include "snippets/pass/assign_registers.hpp" + +#include + +#include +#include +#include + +using namespace std; +using namespace ngraph; + +NGRAPH_RTTI_DEFINITION(snippets::op::Subgraph, "Subgraph", 0); + +void snippets::op::Subgraph::set_generator(std::shared_ptr generator) { + m_generator = generator; +} + +snippets::op::Subgraph::Subgraph(const OutputVector& args, std::shared_ptr body) + : Op(args), m_body(body), m_generator(nullptr) { + constructor_validate_and_infer_types(); +} + +snippets::op::Subgraph::Subgraph(const NodeVector& args, std::shared_ptr body) + : Subgraph(as_output_vector(args), body) {} + +std::shared_ptr snippets::op::Subgraph::clone_with_new_inputs(const OutputVector& inputs) const { + INTERNAL_OP_SCOPE(Subgraph); + return make_shared(inputs, ngraph::clone_function(*m_body.get())); +} + +void snippets::op::Subgraph::validate_and_infer_types() { + INTERNAL_OP_SCOPE(Subgraph); + ngraph::ParameterVector old_parameters; + for (auto op : m_body->get_parameters()) { + old_parameters.push_back(op); + } + + for (size_t i = 0; i < get_input_size(); ++i) { + m_body->replace_parameter(i, std::make_shared(get_input_element_type(i), get_input_partial_shape(i))); + } + + m_body->validate_nodes_and_infer_types(); + + for (size_t i = 0; i < m_body->get_parameters().size(); i++) { + m_body->get_parameters()[i]->set_friendly_name(old_parameters[i]->get_friendly_name()); + } + + set_output_size(m_body->get_output_size()); + for (size_t i = 0; i < get_output_size(); ++i) { + set_output_type(i, m_body->get_output_element_type(i), m_body->get_output_partial_shape(i)); + } +} + +bool snippets::op::Subgraph::visit_attributes(AttributeVisitor& visitor) { + return true; +} + +auto snippets::op::Subgraph::wrap_node_as_subgraph(const std::shared_ptr& node) -> std::shared_ptr { + INTERNAL_OP_SCOPE(Subgraph); + ngraph::ParameterVector body_parameters; + ngraph::OutputVector body_inputs; + + ngraph::OutputVector subgraph_inputs; + + for (auto input : node->inputs()) { + auto source_output = input.get_source_output(); + if (is_scalar_constant(source_output.get_node_shared_ptr())) { + body_inputs.push_back(source_output); + } else { + auto parameter = std::make_shared(input.get_element_type(), input.get_partial_shape()); + body_parameters.push_back(parameter); + body_parameters.back()->set_friendly_name(source_output.get_node()->get_friendly_name()); + body_inputs.push_back(parameter->output(0)); + + subgraph_inputs.push_back(source_output); + } + } + + auto body_node = node->copy_with_new_inputs(body_inputs); + body_node->set_friendly_name(node->get_friendly_name()); + + if (node->get_output_size() != body_node->get_output_size()) { + throw ngraph::ngraph_error("original node outputs size and extracted subgraph node outputs size doesn't much"); + } + + ngraph::ResultVector body_results; + for (auto output : node->outputs()) { + body_results.push_back(std::make_shared(body_node->output(output.get_index()))); + } + + auto body = create_body(node->get_friendly_name(), body_results, body_parameters); + auto subgraph = build_subgraph(node, subgraph_inputs, body); + + for (size_t i = 0; i < body->get_parameters().size(); i++) { + body->get_parameters()[i]->set_friendly_name(body_parameters[i]->get_friendly_name()); + } + + if (subgraph->get_output_size() != body->get_results().size()) { + throw ngraph::ngraph_error("newly create subgraph doesn't much number of original node results"); + } + + return subgraph; +} + +std::shared_ptr snippets::op::Subgraph::make_canonical_from_this() { + INTERNAL_OP_SCOPE(Subgraph); + ngraph::OutputVector subgraph_node_inputs; + for (auto input : this->input_values()) { + subgraph_node_inputs.push_back(input); + } + auto new_body = ngraph::clone_function(*this->get_body().get()); + auto snippet = std::make_shared(subgraph_node_inputs, new_body); + ngraph::copy_runtime_info(this->shared_from_this(), snippet); + snippet->set_friendly_name(this->get_friendly_name()); + snippet->set_generator(this->m_generator); + + return snippet; +} + +// We also can think of canonization as of pass to copy original subgraph and transforming it to canonical form suitable for code generation +// pass actual parameters and results shapes to generate for as well as channel mapping, +// we need to distinguish between 5d tensors that represents and somehow like locked dimensions +// ngraph::AxisVector to code +void snippets::op::Subgraph::canonicalize(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes) { + INTERNAL_OP_SCOPE(Subgraph); + NODE_VALIDATION_CHECK(this, input_shapes.size() == m_body->get_parameters().size(), + "Number of parameters for snippet doesn't much passed to generate method: ", input_shapes.size(), " vs ", m_body->get_parameters().size(), "."); + + NODE_VALIDATION_CHECK(this, output_shapes.size() == m_body->get_results().size(), + "number of results for snippet doesn't much passed to generate method: ", output_shapes.size(), " vs ", m_body->get_results().size(), "."); + + // replace only constants which are actually should be represented as scalars during code generation and probably move this step a bit later + for (auto op : m_body->get_ordered_ops()) { + if (auto constant = ngraph::as_type_ptr(op)) { + auto scalar = std::make_shared(*constant); + scalar->set_friendly_name(constant->get_friendly_name()); + ngraph::copy_runtime_info(constant, scalar); + ngraph::replace_node(constant, scalar); + } + } + + // repalace power with power static + for (auto op : m_body->get_ordered_ops()) { + if (auto power = ngraph::as_type_ptr(op)) { + if (ngraph::as_type_ptr(power->input(1).get_node()->shared_from_this())) { + auto power_static = std::make_shared( + power->input(0).get_source_output(), power->input(1).get_source_output(), power->get_autob()); + power_static->set_friendly_name(power->get_friendly_name()); + ngraph::copy_runtime_info(power, power_static); + ngraph::replace_node(power, power_static); + } + } + } + + + // it should be in subgraph node to be aligned with internal and external parameter list, but adding this for testing + // TODO: store blocking into to Parameter's rt_info for future propagation + for (size_t i = 0; i < m_body->get_parameters().size(); i++) { + auto param = m_body->get_parameters()[i]; + if (param->get_shape().size() < 4) { + std::vector shape(4, 1); + std::copy(param->get_shape().begin(), param->get_shape().end(), &shape.at(4 - (param->get_shape().size() == 0 ? 1 : param->get_shape().size())) ); + m_body->replace_parameter(i, std::make_shared(param->get_element_type(), ngraph::Shape(shape))); + } else if (param->get_shape().size() >= 4) { + if (param->get_element_type() != std::get<2>(input_shapes[i])) { + throw ngraph::ngraph_error("changes in presision. Is it legal??"); + } + if (param->get_shape().size() != std::get<0>(input_shapes[i]).size()) { + m_body->replace_parameter(i, std::make_shared(std::get<2>(input_shapes[i]), std::get<0>(input_shapes[i]))); + } + } + } + + m_body->validate_nodes_and_infer_types(); + + for (size_t i = 0; i < m_body->get_results().size(); i++) { + auto result = m_body->get_results()[i]; + PartialShape partial(result->get_shape()); + bool isCompatible = ngraph::PartialShape::broadcast_merge_into(partial, std::get<0>(output_shapes[i]), ::ngraph::op::AutoBroadcastSpec::NUMPY); + // equality check won't pass since we reshape without changes on external snippet edges + NODE_VALIDATION_CHECK(this, isCompatible, "Inferend and passed results shapes are difference for snippet : ", + result->get_shape(), " vs ", std::get<0>(output_shapes[i]), "."); + } +} + +void snippets::op::Subgraph::convert_to_snippet_dialect() { + INTERNAL_OP_SCOPE(Subgraph); + ngraph::pass::Manager manager; + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.run_passes(m_body); +} + +snippets::Schedule snippets::op::Subgraph::generate(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes) { + INTERNAL_OP_SCOPE(Subgraph); + NGRAPH_CHECK(m_generator != nullptr, "generate is called while generator is not set"); + + canonicalize(output_shapes, input_shapes); + convert_to_snippet_dialect(); + + // generation flow + snippets::pass::AssignRegisters().run_on_function(m_body); + + // actual code emission + ngraph::snippets::code ptr = m_generator->generate(m_body); + + // chack that body doesnt have constants for scheduling + std::vector> constants; + for (auto op : m_body->get_ordered_ops()) { + if (auto constant = as_type_ptr(op)) { + if (ngraph::shape_size(constant->get_shape()) != 1 && constant->get_shape() != Shape()) { + constants.push_back(constant); + } + } + } + NGRAPH_CHECK(!constants.size(), "External constants detected. Snippet is illigal for sheduling"); + + // check resulting shapes are broadcastable to each other so can be scheduled + Shape work_size = m_body->output(0).get_shape(); + for (size_t k = 0; k < m_body->get_output_size(); k++) { + auto shape = m_body->output(k).get_shape(); + + if (work_size.size() != shape.size()) { + throw ngraph_error("rank for all outputs of a snippet should match"); + } + + for (size_t i = 0; i < work_size.size(); i++) { + if (work_size[i] != shape[i]) { + if (work_size[i] == 1) { + work_size[i] = shape[i]; + } else { + throw ngraph_error("incompatible shapes for output graphs"); + } + } + } + } + + return {work_size, false /*canBeLinearized*/, ptr}; +} + +bool snippets::op::Subgraph::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { + INTERNAL_OP_SCOPE(Subgraph); + return m_body->evaluate(outputs, inputs); +} + +void snippets::op::Subgraph::print() const { + INTERNAL_OP_SCOPE(Subgraph); + remark(13) << "subgraph " << this->get_friendly_name() << " " + << this->get_type_name() + << " which contains " << this->get_body()->get_ops().size() << " nodes" << std::endl; + + int qqq = 0; + for (auto op : this->get_body()->get_ordered_ops()) { + remark(13) << "op " << qqq++ << " " << op->get_friendly_name() << " (" << op->get_type_name() << ") " << op << std::endl; + } + + for (auto& in : this->inputs()) { + remark(13) << " -> " << in.get_source_output().get_node_shared_ptr()->get_friendly_name() << " " + << in.get_source_output().get_node_shared_ptr() << std::endl; + } + + for (auto& out : this->outputs()) { + for (auto& user : out.get_target_inputs()) { + remark(13) << " <- " << user.get_node()->get_friendly_name() << " " << user.get_node() << std::endl; + } + remark(13) << std::endl; + } +} + +void snippets::op::Subgraph::print_statistics(bool verbose) { + INTERNAL_OP_SCOPE(Subgraph); + auto getNodeInventory = [](std::shared_ptr n) -> size_t { + size_t total = 0; + + for (auto input : n->inputs()) { + total += input.get_tensor().size(); + } + + for (auto output : n->outputs()) { + total += output.get_tensor().size(); + } + + if (auto subgraph = ngraph::as_type_ptr(n)) { + for (auto op : subgraph->get_body()->get_ordered_ops()) { + if (ngraph::as_type_ptr(op)) { + total += op->output(0).get_tensor().size(); + } + } + } + + return total; + }; + + auto getFunctionInventory = [getNodeInventory](std::shared_ptr f) -> size_t { + size_t total = 0; + for (auto op : f->get_ordered_ops()) { + // Results and parameters are artificially introduced, + // while Constants are already considered if they are inputs of other operation + // this should lead to 1:1 inventory for single node operations + if (!ngraph::as_type_ptr(op) + && !ngraph::as_type_ptr(op) + && !ngraph::as_type_ptr(op)) { + total += getNodeInventory(op); + } + } + return total; + }; + + auto countConstants = [](std::shared_ptr f) -> size_t { + size_t count = 0; + for (auto op : f->get_ordered_ops()) { + count += !!ngraph::as_type_ptr(op) ? 1 : 0; + } + return count; + }; + + auto body = this->get_body(); + + std::cout << this->get_friendly_name() + << ";" << this + << ";" << body->get_ops().size() + << ";" << body->get_parameters().size() + << ";" << body->get_results().size() + << ";" << countConstants(body) + << ";" << getFunctionInventory(body) + << ";" << getNodeInventory(this->shared_from_this()) << std::endl; + + if (verbose) { + this->print(); + } +} diff --git a/inference-engine/src/snippets/src/op/vectorload.cpp b/inference-engine/src/snippets/src/op/vectorload.cpp new file mode 100644 index 00000000000..346767f9a9c --- /dev/null +++ b/inference-engine/src/snippets/src/op/vectorload.cpp @@ -0,0 +1,12 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/op/vectorload.hpp" + +using namespace ngraph; + +NGRAPH_RTTI_DEFINITION(snippets::op::VectorLoad, "VectorLoad", 0); + +snippets::op::VectorLoad::VectorLoad(const Output& x) : Load(x) { +} diff --git a/inference-engine/src/snippets/src/op/vectorstore.cpp b/inference-engine/src/snippets/src/op/vectorstore.cpp new file mode 100644 index 00000000000..116b071a0ce --- /dev/null +++ b/inference-engine/src/snippets/src/op/vectorstore.cpp @@ -0,0 +1,12 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/op/vectorstore.hpp" + +using namespace ngraph; + +NGRAPH_RTTI_DEFINITION(snippets::op::VectorStore, "VectorStore", 0); + +snippets::op::VectorStore::VectorStore(const Output& x) : Store(x) { +} diff --git a/inference-engine/src/snippets/src/pass/assign_registers.cpp b/inference-engine/src/snippets/src/pass/assign_registers.cpp new file mode 100644 index 00000000000..6abdf07d72b --- /dev/null +++ b/inference-engine/src/snippets/src/pass/assign_registers.cpp @@ -0,0 +1,183 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +// #include +#include "itt.hpp" +#include "remarks.hpp" + +#include "snippets/pass/assign_registers.hpp" +#include "snippets/register_info.hpp" +#include "snippets/snippets_isa.hpp" + +#include + +#include + +bool ngraph::snippets::pass::AssignRegisters::run_on_function(std::shared_ptr f) { + RUN_ON_FUNCTION_SCOPE(AssignRegisters); + int reg64_tmp_start { 8 }; // R8, R9, R10, R11, R12, R13, R14, R15 inputs+outputs+1 + using Reg = size_t; + auto ops = f->get_ordered_ops(); + decltype(ops) stmts; + std::copy_if(ops.begin(), ops.end(), std::back_inserter(stmts), [](decltype(ops[0]) op) { + return !(std::dynamic_pointer_cast(op) || std::dynamic_pointer_cast(op)); + }); + + size_t rdx = 0; + std::map, Reg> regs; + for (auto op : stmts) { + for (auto output : op->outputs()) { + regs[output.get_tensor_ptr()] = rdx++; + } + } + + std::vector> used; + std::vector> def; + + for (auto op : stmts) { + std::set u; + for (auto input : op->inputs()) { + if (regs.count(input.get_tensor_ptr())) { + u.insert(regs[input.get_tensor_ptr()]); + } + } + used.push_back(u); + + std::set d; + if (!std::dynamic_pointer_cast(op)) { + for (auto output : op->outputs()) { + d.insert(regs[output.get_tensor_ptr()]); + } + } + def.push_back(d); + } + + // define life intervals + std::vector> lifeIn(stmts.size(), std::set()); + std::vector> lifeOut(stmts.size(), std::set()); + + for (size_t i = 0; i < stmts.size(); i++) { + for (size_t n = 0; n < stmts.size(); n++) { + std::set_difference(lifeOut[n].begin(), lifeOut[n].end(), def[n].begin(), def[n].end(), std::inserter(lifeIn[n], lifeIn[n].begin())); + lifeIn[n].insert(used[n].begin(), used[n].end()); + } + for (size_t n = 0; n < stmts.size(); n++) { + auto node = stmts[n]; + if (!std::dynamic_pointer_cast(node)) { + for (auto out : node->outputs()) { + for (auto port : out.get_target_inputs()) { + auto pos = std::find(stmts.begin(), stmts.end(), port.get_node()->shared_from_this()); + if (pos != stmts.end()) { + auto k = pos-stmts.begin(); + lifeOut[n].insert(lifeIn[k].begin(), lifeIn[k].end()); + } + } + } + } + } + } + + struct by_starting { + auto operator()(const std::pair& lhs, const std::pair& rhs) const -> bool { + return lhs.first < rhs.first|| (lhs.first == rhs.first && lhs.second < rhs.second); + } + }; + + struct by_ending { + auto operator()(const std::pair& lhs, const std::pair& rhs) const -> bool { + return lhs.second < rhs.second || (lhs.second == rhs.second && lhs.first < rhs.first); + } + }; + + std::set, by_starting> live_intervals; + + std::reverse(lifeIn.begin(), lifeIn.end()); + auto find_last_use = [lifeIn](int i) -> int { + int ln = lifeIn.size()-1; + for (auto& x : lifeIn) { + if (x.find(i) != x.end()) { + return ln; + } + ln--; + } + return i; + }; + + for (size_t i = 0; i < stmts.size(); i++) { + live_intervals.insert(std::make_pair(i, find_last_use(i))); + } + + // http://web.cs.ucla.edu/~palsberg/course/cs132/linearscan.pdf + std::multiset, by_ending> active; + std::map register_map; + std::stack bank; + for (int i = 0; i < 16; i++) bank.push(16-1-i); + + for (auto interval : live_intervals) { + // check expired + while (!active.empty()) { + auto x = *active.begin(); + if (x.second >= interval.first) { + break; + } + active.erase(x); + bank.push(register_map[x.first]); + } + // allocate + if (active.size() == 16) { + throw ngraph_error("caanot allocate registers for a snippet "); + } else { + register_map[interval.first] = bank.top(); + bank.pop(); + active.insert(interval); + } + } + + std::map, Reg> physical_regs; + + for (auto reg : regs) { + physical_regs[reg.first] = register_map[reg.second]; + } + + size_t constantID = 0; + + for (auto n : f->get_ordered_ops()) { + auto& rt = n->get_rt_info(); + // nothing to do for function signature + if (std::dynamic_pointer_cast(n) || std::dynamic_pointer_cast(n)) { + continue; + } + + // store only effective address + if (auto result = std::dynamic_pointer_cast(n)) { + auto ea = reg64_tmp_start+static_cast(f->get_result_index(result) + f->get_parameters().size()); + rt["effectiveAddress"] = std::make_shared>(VariantWrapper(ea)); + continue; + } + // store effective address and procced with vector registers + if (as_type_ptr(n) || as_type_ptr(n)) { + auto source = n->get_input_source_output(0).get_node_shared_ptr(); + + if (auto param = as_type_ptr(source)) { + auto ea = reg64_tmp_start+static_cast(f->get_parameter_index(param)); + rt["effectiveAddress"] = std::make_shared>(VariantWrapper(ea)); + } else if (auto constant = as_type_ptr(source)) { + auto ea = reg64_tmp_start+static_cast(f->get_parameters().size() + f->get_results().size() + 1 + constantID); + rt["effectiveAddress"] = std::make_shared>(VariantWrapper(ea)); + constantID++; + } else { + throw ngraph_error("load/broadcast should follow only Parameter or non-Scalar constant"); + } + } + + std::vector regs; regs.reserve(n->outputs().size()); + for (auto output : n->outputs()) { + auto allocated = physical_regs[output.get_tensor_ptr()]; + regs.push_back(allocated); + } + rt["reginfo"] = std::make_shared>>(VariantWrapper>(regs)); + } + + return false; +} diff --git a/inference-engine/src/snippets/src/pass/collapse_subgraph.cpp b/inference-engine/src/snippets/src/pass/collapse_subgraph.cpp new file mode 100644 index 00000000000..2b08eea16a5 --- /dev/null +++ b/inference-engine/src/snippets/src/pass/collapse_subgraph.cpp @@ -0,0 +1,516 @@ +// Copyright (C) 2018-2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + + +#include "remarks.hpp" +#include "itt.hpp" + +#include "snippets/pass/collapse_subgraph.hpp" +#include "snippets/op/subgraph.hpp" + +#include +#include +#include + + +#include +#include +#include +#include +#include +#include + +NGRAPH_RTTI_DEFINITION(ngraph::snippets::pass::StartSubgraph, "CollapseSubgraph", 0); +NGRAPH_RTTI_DEFINITION(ngraph::snippets::pass::AttachToSubgraph, "CollapseSubgraph", 0); +NGRAPH_RTTI_DEFINITION(ngraph::snippets::pass::TokenizeSnippets, "CollapseSubgraph", 0); + +using namespace ngraph; +using namespace snippets; + +namespace { + +auto outputs_are_not_broadcastable(const std::shared_ptr& node) -> bool { + auto outputs = node->outputs(); + auto find_smallest_output_shape = [](const std::vector>& outputs) -> ngraph::Shape { + return std::accumulate(std::begin(outputs), std::end(outputs), ngraph::Shape(outputs.begin()->get_shape()), + [](ngraph::Shape other_shape, ngraph::Output output){ + return ngraph::shape_size(output.get_shape()) < ngraph::shape_size(other_shape) ? output.get_shape() : other_shape; + }); + }; + auto ref_shape = find_smallest_output_shape(outputs); + + auto check_shapes_broadcastable = [ref_shape](const ngraph::Output& output) -> bool { + auto other_shape = output.get_shape(); + + if (other_shape.size() != ref_shape.size()) { + return false; + } + + return std::inner_product(std::begin(other_shape), std::end(other_shape), std::begin(ref_shape), true, + std::logical_and(), [](ngraph::Shape::value_type lsh, ngraph::Shape::value_type rsh){ + return rsh == 1 || lsh == rsh; + }); + }; + + return std::find_if_not(std::begin(outputs), std::end(outputs), check_shapes_broadcastable) != std::end(outputs); +}; + +auto has_cycles_of_dependencies(const std::vector>>& results, + const std::vector>& inputs) -> bool { + auto BFS_from_to = [](ngraph::Node* from, ngraph::Node* to) -> bool { + std::unordered_set visited; + std::queue stack; + stack.push(from); + + while (stack.size() > 0) { + ngraph::Node* curr = stack.front(); + visited.insert(curr); + + if (ngraph::op::is_output(curr)) { + return false; + } + + stack.pop(); + + if (curr != to) { + for (const auto& next : curr->get_users()) { + if (visited.count(next.get()) == 0) { + stack.push(next.get()); + } + } + } else { + return true; + } + } + return false; + }; + + for (auto& result : results) { + for (auto& user : result) { + for (auto& input : inputs) { + auto source = input.get_source_output().get_node(); + auto containsLoop = BFS_from_to(user.get_node(), source); + + remark(1) << "checking path from " + << user.get_node()->get_friendly_name() + << " to " << source->get_friendly_name() + << " resulted in " << containsLoop << std::endl; + + if (containsLoop) { + return true; + } + } + } + } + return false; +} + +auto has_subgraph_as_input(std::shared_ptr node) -> bool { + auto inputs = node->inputs(); + for (auto input : inputs) { + auto parent = input.get_source_output().get_node_shared_ptr(); + if (!!as_type_ptr(parent)) { + return true; + } + } + return false; +}; + +auto is_lo(std::shared_ptr n) -> bool { + auto is_lob = [](std::shared_ptr n) -> bool { + using ngraph::as_type_ptr; + return !!as_type_ptr(n) + || !!as_type_ptr(n) + || !!as_type_ptr(n) + || !!as_type_ptr(n) + || !!as_type_ptr(n) + || !!as_type_ptr(n) + || !!as_type_ptr(n) + || !!as_type_ptr(n) + || !!as_type_ptr(n) + || !!as_type_ptr(n) + || !!as_type_ptr(n) + || !!as_type_ptr(n) + || !!as_type_ptr(n) + || !!as_type_ptr(n) + || !!as_type_ptr(n) + || !!as_type_ptr(n) + || !!as_type_ptr(n) + || !!as_type_ptr(n) + || !!as_type_ptr(n) + || !!as_type_ptr(n) + || !!as_type_ptr(n); + }; + + auto is_lou = [](std::shared_ptr n) -> bool { + using ngraph::as_type_ptr; + return !!as_type_ptr(n) + // || !!as_type_ptr(n) + // || !!as_type_ptr(n) + // || !!as_type_ptr(n) + // || !!as_type_ptr(n) ? + || !!as_type_ptr(n) + // || !!as_type_ptr(n) + // || !!as_type_ptr(n) + || !!as_type_ptr(n) + || !!as_type_ptr(n) + || !!as_type_ptr(n) + // || !!as_type_ptr(n) ? + // || !!as_type_ptr(n) ? + || !!as_type_ptr(n) + || !!as_type_ptr(n) + || !!as_type_ptr(n) + // || !!as_type_ptr(n) ? + || !!as_type_ptr(n) + // || !!as_type_ptr(n) + // || !!as_type_ptr(n) + || !!as_type_ptr(n) + // || !!as_type_ptr(n) + || !!as_type_ptr(n); + }; + + auto is_lot = [](std::shared_ptr n) -> bool { + using ngraph::as_type_ptr; + return false; + // return !!as_type_ptr(n) // ternary with 2 constants + // || !!as_type_ptr(n); // ternary with 2 constants / or DW + }; + + auto is_fq = [](std::shared_ptr n) -> bool { + using ngraph::as_type_ptr; + return false;//!!as_type_ptr(n); // 4->1 + }; + + return is_lou(n) || is_lob(n) ||is_lot(n) || is_fq(n); +} + +auto has_supported_in_out(std::shared_ptr n) -> bool { + for (auto in : n->inputs()) { + if (in.get_tensor().get_element_type() != ngraph::element::f32) { + return false; + } + } + + for (auto out : n->outputs()) { + if (out.get_tensor().get_element_type() != ngraph::element::f32) { + return false; + } + + for (auto in_out : out.get_target_inputs()) { + if (!!as_type_ptr(in_out.get_node()->shared_from_this())) { + return false; + } + } + } + + return true; +}; + +} // namespace + +ngraph::snippets::pass::StartSubgraph::StartSubgraph(bool tokenize_by_node) : MatcherPass() { + MATCHER_SCOPE(StartSubgraph); + + auto has_multiple_output_edges = [](std::shared_ptr n) -> bool { + for (auto out : n->outputs()) { + if (out.get_target_inputs().size() != 1) return true; + } + + return false; + }; + + register_matcher(std::make_shared( + std::make_shared(pattern::any_input(), + [tokenize_by_node, has_multiple_output_edges](std::shared_ptr n) { + return is_lo(n) && + has_supported_in_out(n) && + (tokenize_by_node || !has_subgraph_as_input(n)) && + has_multiple_output_edges(n); + })), + [](ngraph::pattern::Matcher &m) -> bool { + auto node = m.get_match_root(); + + remark(1) << "Match root" + << node->get_friendly_name() + << " " << node + << " Creating new snippet - no input subgraphs found" << std::endl; + + auto subgraph = op::Subgraph::wrap_node_as_subgraph(node); + ngraph::replace_node(node, subgraph); + + remark(1) << "Replacement (new) done for: " + << subgraph->get_friendly_name() + << " with " << subgraph->inputs().size() + << " inputs and " << subgraph->outputs().size() + << " outputs and " << subgraph->get_body()->get_ops().size() << " ops total\n"; + return true; + }); +} + +ngraph::snippets::pass::AttachToSubgraph::AttachToSubgraph(bool tokenize_by_node) : MatcherPass() { + MATCHER_SCOPE(AttachToSubgraph); + enum continuation_strategy { + reset, + abort + }; + + continuation_strategy strategy = continuation_strategy::abort; + + ngraph::graph_rewrite_callback continuation_callback = [strategy](ngraph::pattern::Matcher &m) -> bool { + auto node = m.get_match_root(); + + remark(1) << "Match root " << node->get_friendly_name() << " " << node << std::endl; + + // inputs that are already subgraphs + std::unordered_set> input_subgraphs; + // clone bodies because we need a rollback if loop is found + std::map, std::shared_ptr> clones; + + ParameterVector body_parameters; + OutputVector external_inputs; + OutputVector internal_inputs; + + auto inputs = node->inputs(); + + auto is_recurrent = [inputs](const ngraph::Output& to_find) -> bool { + for (auto in : inputs) { + if (in.get_source_output().get_node_shared_ptr() == to_find.get_node_shared_ptr()) { + return true; + } + } + return false; + }; + + auto get_input_index = [](const Output& found) -> size_t { + for (auto& input : found.get_target_inputs()) { + remark(13) << input.get_node() << " " << input.get_source_output() << " vs " + << found << found.get_node() << " : " << input.get_index() << " " << found.get_index() << std::endl; + } + + for (auto& input : found.get_target_inputs()) { + remark(13) << input.get_node() << " " << input.get_source_output() << " vs " + << found << " : " << input.get_index() << " " << found.get_index() << std::endl; + if (as_type_ptr(input.get_node()->shared_from_this()) != nullptr && input.get_source_output() == found) { + return input.get_index(); + } + } + return 0; + }; + + for (auto input : inputs) { + auto input_node = input.get_source_output().get_node_shared_ptr(); + + if (auto subgraph = as_type_ptr(input_node)) { + if (!clones.count(input_node)) { + auto f = ngraph::clone_function(*subgraph->get_body().get()); + f->set_friendly_name(subgraph->get_body()->get_friendly_name()); + clones[input_node] = f; + } + } + } + + for (auto input : inputs) { + auto input_node = input.get_source_output().get_node_shared_ptr(); + + if (auto subgraph = as_type_ptr(input_node)) { + if (!input_subgraphs.count(input_node)) { + input_subgraphs.insert(input_node); + + auto f = clones[input_node]; + const auto& input_body_parameters = f->get_parameters(); + + for (size_t i = 0; i < input_body_parameters.size(); ++i) { + auto found = std::find(external_inputs.begin(), external_inputs.end(), subgraph->input_value(i)); + if (found != external_inputs.end()) { + auto current_input_index = get_input_index(*found); + // Handling the case if multiple inputs referencing the same parameter comes from one subgraph => it's not introduced by SS. + // It might be better to keep track if body parameter relationship rather than that + if (current_input_index < body_parameters.size()) { + remark(13) << "replacing " << *found << " " << current_input_index << " with " + << body_parameters[current_input_index] << std::endl; + f->replace_parameter(i, body_parameters[current_input_index]); + } else { + external_inputs.push_back(subgraph->input_value(i)); + body_parameters.push_back(input_body_parameters[i]); + } + } else if (is_recurrent(subgraph->input_value(i))) { + remark(13) << "ternary merge is conducted " << subgraph->input_value(i).get_node_shared_ptr() << std::endl; + + auto internal = input_body_parameters[i]; + auto internal_consumers = internal->outputs(); + + for (auto output : internal->outputs()) { + for (auto consumer : output.get_target_inputs()) { + if (auto to_replace_with = as_type_ptr(subgraph->input_value(i).get_node_shared_ptr())) { + auto other_body = clones[subgraph->input_value(i).get_node_shared_ptr()]; + auto other_body_result = other_body->get_results()[consumer.get_source_output().get_index()]; + auto result_producer = other_body_result->input(0).get_source_output(); + + consumer.replace_source_output(result_producer.get_node_shared_ptr()); + } + } + } + } else { + external_inputs.push_back(subgraph->input_value(i)); + body_parameters.push_back(input_body_parameters[i]); + } + } + } + + // this is there stitching happens, get result of a copy of a body of currently processed input and put it to the new inputs + // internal output index == external output index + auto& input_body = clones[input_node]; + size_t source_output_index = input.get_source_output().get_index(); + auto source_result = input_body->get_results()[source_output_index]; + // Result op has a single input + internal_inputs.push_back(source_result->input_value(0)); + } else { + if (op::is_scalar_constant(input_node)) { + internal_inputs.push_back(input_node->output(0)); + } else { + external_inputs.push_back(input.get_source_output()); + auto new_parameter = std::make_shared(input.get_element_type(), input.get_partial_shape()); + new_parameter->set_friendly_name(input.get_source_output().get_node()->get_friendly_name()); + body_parameters.push_back(new_parameter); + body_parameters.back()->set_friendly_name(input.get_source_output().get_node()->get_friendly_name()); + internal_inputs.push_back(new_parameter->output(0)); + } + } + } + + auto body_node = node->copy_with_new_inputs(internal_inputs); + body_node->set_friendly_name(node->get_friendly_name()); + + remark(1) << "Original node outputs = " << node->get_output_size() + << " body node outputs = " << body_node->get_output_size() << std::endl; + + if (node->get_output_size() != body_node->get_output_size()) { + throw ngraph_error("original node outputs size and extracted node outputs size doesn't much"); + } + + ResultVector body_results; + std::vector>> subgraph_result_inputs; + + for (auto subgraph : input_subgraphs) { + for (auto output : subgraph->outputs()) { + bool first_side_consumer = true; + + for (auto target_input : output.get_target_inputs()) { + auto target_node = target_input.get_node()->shared_from_this(); + + if (input_subgraphs.count(target_node)) { + remark(13) << "ternary merge is conducted " << subgraph << " -> " << target_node << std::endl; + } + + if (!input_subgraphs.count(target_node) && target_node != node) { + if (first_side_consumer) { + auto& input_subgraph_body = clones[subgraph]; + body_results.push_back(std::make_shared(input_subgraph_body->get_results()[output.get_index()]->input_value(0))); + subgraph_result_inputs.push_back({}); + + first_side_consumer = false; + } + + if (!!subgraph_result_inputs.back().count(target_input)) { + throw ngraph_error("target input added twice!!!"); + } + // save target input port outside the body + subgraph_result_inputs.back().insert(target_input); + } + } + } + } + + for (auto output : node->outputs()) { + body_results.push_back(std::make_shared(body_node->output(output.get_index()))); + subgraph_result_inputs.push_back(output.get_target_inputs()); + } + + if (body_results.size() != subgraph_result_inputs.size()) { + throw ngraph_error("body results and node results size mismatch during subgraph collaps"); + } + + if (body_parameters.size() + body_results.size() > 7) { + if (strategy == continuation_strategy::reset) { + remark(13) << "new subgraph is created. Impossible to schedule subgraph with " + << body_parameters.size() << " inputs and " << body_results.size() << " outputs." << std::endl; + + auto single_node_subgraph = op::Subgraph::wrap_node_as_subgraph(node); + ngraph::replace_node(node, single_node_subgraph); + return true; + } else { + return false; + } + } + + auto body = op::create_body(node->get_friendly_name(), body_results, body_parameters); + for (size_t i = 0; i < body->get_parameters().size(); i++) { + body->get_parameters()[i]->set_friendly_name(body_parameters[i]->get_friendly_name()); + } + + auto subgraph = op::build_subgraph(node, external_inputs, body); + auto act_body = subgraph->get_body(); + for (size_t i = 0; i < act_body->get_parameters().size(); i++) { + act_body->get_parameters()[i]->set_friendly_name(body_parameters[i]->get_friendly_name()); + } + + if (subgraph->get_output_size() != subgraph_result_inputs.size()) { + throw ngraph_error("newly create subgraph doesn't much number of results"); + } + + if (outputs_are_not_broadcastable(subgraph)) { + if (strategy == continuation_strategy::reset) { + remark(13) << "New subgraph is created due to outputs of a subgraph not broadcastable." << std::endl; + + auto single_node_subgraph = op::Subgraph::wrap_node_as_subgraph(node); + single_node_subgraph->validate_and_infer_types(); + ngraph::replace_node(node, single_node_subgraph); + return true; + } else { + return false; + } + } + + if (has_cycles_of_dependencies(subgraph_result_inputs, subgraph->inputs())) { + if (strategy == continuation_strategy::reset) { + remark(13) << "New subgraph is created due to loop dependency introduced by one of input subgraphs." << std::endl; + + auto single_node_subgraph = op::Subgraph::wrap_node_as_subgraph(node); + single_node_subgraph->validate_and_infer_types(); + ngraph::replace_node(node, single_node_subgraph); + return true; + } else { + return false; + } + } + + for (size_t i = 0; i < subgraph->get_output_size(); ++i) { + for (auto target_input : subgraph_result_inputs[i]) { + target_input.replace_source_output(subgraph->output(i)); + } + } + + subgraph->validate_and_infer_types(); + + auto act_body1 = subgraph->get_body(); + for (size_t i = 0; i < act_body1->get_parameters().size(); i++) { + act_body1->get_parameters()[i]->set_friendly_name(body_parameters[i]->get_friendly_name()); + } + + remark(1) << "Replacement (merge) done for: " + << subgraph->get_friendly_name() + << " with " << subgraph->inputs().size() + << " inputs and " << subgraph->outputs().size() + << " outputs and " << subgraph->get_body()->get_ops().size() << " ops total\n"; + + return true; + }; + + register_matcher(std::make_shared( + std::make_shared(pattern::any_input(), + [](std::shared_ptr n) { + return is_lo(n) && has_supported_in_out(n) && has_subgraph_as_input(n); + })), + continuation_callback); +} diff --git a/inference-engine/src/snippets/src/pass/insert_load_store.cpp b/inference-engine/src/snippets/src/pass/insert_load_store.cpp new file mode 100644 index 00000000000..d1bfec5a0bf --- /dev/null +++ b/inference-engine/src/snippets/src/pass/insert_load_store.cpp @@ -0,0 +1,67 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "itt.hpp" +#include "remarks.hpp" + +#include "snippets/pass/insert_load_store.hpp" +#include "snippets/snippets_isa.hpp" + +#include +#include +#include + +ngraph::snippets::pass::InsertLoad::InsertLoad() { + MATCHER_SCOPE(InsertLoad); + register_matcher(std::make_shared( + ngraph::pattern::wrap_type()), + [this](ngraph::pattern::Matcher &m) { + auto root = m.get_match_root(); + + // check if already has Load as an output + for (auto output : root->outputs()) { + for (auto consumer : output.get_target_inputs()) { + if (dynamic_cast(consumer.get_node())) { + return false; + } + } + } + + auto load = std::make_shared (root); + ngraph::copy_runtime_info(root, load); + + bool rewritten = false; + for (auto output : root->outputs()) { + for (auto consumer : output.get_target_inputs()) { + if (consumer.get_node()->shared_from_this() != load) { + consumer.replace_source_output(load); + rewritten |= true; + } + } + } + + return rewritten; + }); +} + +ngraph::snippets::pass::InsertStore::InsertStore() { + MATCHER_SCOPE(InsertStore); + register_matcher(std::make_shared( + ngraph::pattern::wrap_type()), + [this](ngraph::pattern::Matcher &m) { + auto root = m.get_match_root(); + + // check if already has Store as an input + for (auto input : root->inputs()) { + if (dynamic_cast(input.get_source_output().get_node())) { + return false; + } + } + + auto store = std::make_shared (root->input_value(0)); + ngraph::copy_runtime_info(root, store); + root->set_argument(0, store); + return true; + }); +} diff --git a/inference-engine/src/snippets/src/pass/insert_movebroadcast.cpp b/inference-engine/src/snippets/src/pass/insert_movebroadcast.cpp new file mode 100644 index 00000000000..91ddc2096de --- /dev/null +++ b/inference-engine/src/snippets/src/pass/insert_movebroadcast.cpp @@ -0,0 +1,177 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "remarks.hpp" +#include "itt.hpp" + +#include "snippets/pass/insert_movebroadcast.hpp" +#include "snippets/snippets_isa.hpp" + +#include +#include + +#include +#include + +using namespace ngraph; + +static std::shared_ptr numpy_broadcast_node(const ngraph::Output& value, + const ngraph::Shape& output_shape, const ngraph::Shape& source_shape) { + std::shared_ptr broadcasted_node = value.get_node_shared_ptr(); + + if (output_shape == value.get_shape()) { + return broadcasted_node; + } + + NGRAPH_CHECK(source_shape.size() == output_shape.size(), + "Ranks of source_shape and output_shape dont match: ", + source_shape.size(), + " vs ", + output_shape.size()); + + ngraph::AxisVector broadcast_axes; + ngraph::Shape squeezed_shape; + for (size_t index = 0; index < output_shape.size(); ++index) { + if (source_shape.at(index) == 1 && output_shape.at(index) != 1) { + broadcast_axes.push_back(index); + } else { + squeezed_shape.push_back(source_shape.at(index)); + } + } + + remark(2) << "Insert explicit broadcast " << value.get_node()->get_type_name() + << " " << broadcast_axes << " " << broadcasted_node->get_shape() << " -> " << output_shape << std::endl; + + // it shouldn't be a probrem for now since we don't consider StridedSlice and Broadcast here + if (auto constant = ngraph::as_type_ptr(broadcasted_node)) { + if (constant->get_shape() == ngraph::Shape() || ngraph::shape_size(constant->get_shape()) == 1) { + remark(2) << "Insert explicit broadcast " << value.get_node()->get_type_name() + << " to scalar constant " << constant->get_shape() << " -- aborting!" << std::endl; + + return broadcasted_node; + } + } + + if (auto constant = ngraph::as_type_ptr(broadcasted_node)) { + if (constant->get_shape() == ngraph::Shape() || ngraph::shape_size(constant->get_shape()) == 1) { + remark(2) << "Insert explicit broadcast " << value.get_node()->get_type_name() + << " to scalar constant " << constant->get_shape() << " -- aborting!" << std::endl; + + return broadcasted_node; + } + } + + if (!broadcast_axes.empty()) { + // ShapeOf + broadcasted_node = std::make_shared(broadcasted_node, output_shape); + } + + return broadcasted_node; +} + +static ngraph::Shape calculate_broadcast_shape(ngraph::Shape lhs_shape, ngraph::Shape rhs_shape) { + ngraph::Shape result; + auto lhs_rank = lhs_shape.size(); + auto rhs_rank = rhs_shape.size(); + auto max_rank = std::max(lhs_rank, rhs_rank); + + // left-pad the lhs_shape with ones + lhs_shape.insert(begin(lhs_shape), max_rank - lhs_rank, 1); + // left-pad the rhs_shape with ones + rhs_shape.insert(begin(rhs_shape), max_rank - rhs_rank, 1); + + for (size_t index = 0; index < max_rank; ++index) { + size_t lhs_dim = lhs_shape.at(index); + size_t rhs_dim = rhs_shape.at(index); + + if (lhs_dim != rhs_dim && lhs_dim != 1 && rhs_dim != 1) { + throw ngraph::ngraph_error("incompatible shapes"); + } + + result.push_back(std::max(lhs_dim, rhs_dim)); + } + return result; +} + +std::pair> get_numpy_broadcast_shapes(const std::vector& input_shapes) { + ngraph::Shape target_shape = std::accumulate(begin(input_shapes), end(input_shapes), ngraph::Shape{}, calculate_broadcast_shape); + + std::vector full_shapes; + for (const ngraph::Shape& input : input_shapes) { + ngraph::Shape padded_shape{input}; + padded_shape.insert(begin(padded_shape), target_shape.size() - padded_shape.size(), 1); + full_shapes.push_back(move(padded_shape)); + } + + return {target_shape, full_shapes}; +} + +auto reset_broacast_config(const std::shared_ptr& op) -> void { + using namespace ngraph; + + bool is_scalar = false; + for (auto input : op->inputs()) { + if (input.get_shape() == Shape() || ngraph::shape_size(input.get_shape()) == 1) { + is_scalar = true; + } + } + + if (!is_scalar) { + if (auto binary = std::dynamic_pointer_cast(op)) { + binary->set_autob(ngraph::op::AutoBroadcastSpec::NONE); + } else if (auto binary = std::dynamic_pointer_cast(op)) { + binary->set_autob(ngraph::op::AutoBroadcastSpec::NONE); + } else if (auto binary = std::dynamic_pointer_cast(op)) { + binary->set_autob(ngraph::op::AutoBroadcastSpec::NONE); + } + } +} + +// adds explicit broadcasts if needed +// ToDO: this indeed make model not reshapable, need to come up with more clever way to insert fake broadcast, +// well on the other hand, if we replace scalar constant with Scalar op / or ShapeOf, we could have broadcasts that are reshapable +// TODO: generate FakeBroadcast if and only if broadcast is done by w dimension +ngraph::snippets::pass::InsertMoveBroadcast::InsertMoveBroadcast() { + MATCHER_SCOPE(InsertMoveBroadcast); + ngraph::graph_rewrite_callback callback = [this](ngraph::pattern::Matcher &m) { + auto root = m.get_match_root(); + const auto& values = root->input_values(); + if (values.empty()) { + return false; + } + + std::vector input_shapes; + for (const auto& input : values) { + input_shapes.push_back(input.get_shape()); + } + + // find the output tensor's shape, then broadcast all inputs so that they are compatible + auto bcast_shapes = get_numpy_broadcast_shapes(input_shapes); + + ngraph::OutputVector broadcasted_inputs; + for (size_t i = 0; i < values.size(); ++i) { + auto node = numpy_broadcast_node(values[i], bcast_shapes.first, bcast_shapes.second[i]); + ngraph::copy_runtime_info(root, node); + broadcasted_inputs.push_back(node); + } + + auto new_args = ngraph::as_node_vector(broadcasted_inputs); + for (size_t i = 0; i < new_args.size(); i++) { + root->input(i).replace_source_output(new_args[i]->output(0)); + } + + reset_broacast_config(root); + + return true; + }; + + // only numpy broadcast type is supported currently + auto any = std::make_shared(pattern::any_input(), + [](std::shared_ptr n) { + // should add supports_auto_broadcast to SquaredDifference + return (ngraph::op::supports_auto_broadcast(n) || !!as_type_ptr(n) || !!as_type_ptr(n)) + && n->get_autob().m_type == ngraph::op::AutoBroadcastType::NUMPY; }); + + register_matcher(std::make_shared(any), callback); +} \ No newline at end of file diff --git a/inference-engine/src/snippets/src/pass/load_movebroadcast_to_broadcastload.cpp b/inference-engine/src/snippets/src/pass/load_movebroadcast_to_broadcastload.cpp new file mode 100644 index 00000000000..33451846d4b --- /dev/null +++ b/inference-engine/src/snippets/src/pass/load_movebroadcast_to_broadcastload.cpp @@ -0,0 +1,59 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "remarks.hpp" +#include "itt.hpp" + +#include "snippets/pass/load_movebroadcast_to_broadcastload.hpp" +#include "snippets/snippets_isa.hpp" + +#include +#include +#include + +#include + +ngraph::snippets::pass::LoadMoveBroadcastToBroadcastLoad::LoadMoveBroadcastToBroadcastLoad() { + MATCHER_SCOPE(LoadMoveBroadcastToBroadcastLoad); + auto param_pattern = ngraph::pattern::wrap_type(); + auto load_pattern = std::make_shared(param_pattern); + auto fbn = std::make_shared(load_pattern, Shape{1}); + + register_matcher(std::make_shared(fbn), + [load_pattern, param_pattern](ngraph::pattern::Matcher &m) { + auto root = m.get_match_root(); + + const auto &pm = m.get_pattern_value_map(); + const auto input = pm.at(load_pattern).get_node_shared_ptr(); + const auto param = pm.at(param_pattern).get_node_shared_ptr(); + + // check if load has more than 1 user to avoid load+broadcast load on the same parameter + if (input->output(0).get_target_inputs().size() != 1) { + return false; + } + + if (root->inputs().size() != 1 || input->inputs().size() != 1) { + throw ngraph_error("cannot rewrite Broadcast load with more than one input"); + } + + auto inshape = root->input(0).get_shape(); + auto outshape = root->output(0).get_shape(); + auto broadcastload = std::make_shared(param, outshape); + Shape bct(inshape.size(), 0); + for (size_t k = 0; k < inshape.size(); k++) { + if (inshape[k] != outshape[k] && inshape[k] == 1) { + bct[k] = 1; + } + } + + broadcastload->set_broadcast_info(bct); + if (broadcastload->is_broadcast(outshape.size()-1)) { + ngraph::copy_runtime_info(root, broadcastload); + ngraph::replace_node(root, broadcastload); + return true; + } else { + return false; + } + }); +} \ No newline at end of file diff --git a/inference-engine/src/snippets/src/pass/vector_to_scalar.cpp b/inference-engine/src/snippets/src/pass/vector_to_scalar.cpp new file mode 100644 index 00000000000..64b96d118b1 --- /dev/null +++ b/inference-engine/src/snippets/src/pass/vector_to_scalar.cpp @@ -0,0 +1,40 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "itt.hpp" + +#include "snippets/pass/vector_to_scalar.hpp" +#include "snippets/snippets_isa.hpp" + +#include +#include +#include + +ngraph::snippets::pass::ReplaceLoadsWithScalarLoads::ReplaceLoadsWithScalarLoads() { + MATCHER_SCOPE(ReplaceLoadsWithScalarLoads); + register_matcher(std::make_shared( + ngraph::pattern::wrap_type()), + [this](ngraph::pattern::Matcher &m) { + auto root = m.get_match_root(); + auto load = std::make_shared (root->input_value(0)); + load->set_friendly_name(root->get_friendly_name()); + ngraph::copy_runtime_info(root, load); + ngraph::replace_node(root, load); + return true; + }); +} + +ngraph::snippets::pass::ReplaceStoresWithScalarStores::ReplaceStoresWithScalarStores() { + MATCHER_SCOPE(ReplaceStoresWithScalarStores); + register_matcher(std::make_shared( + ngraph::pattern::wrap_type()), + [this](ngraph::pattern::Matcher &m) { + auto root = m.get_match_root(); + auto store = std::make_shared (root->input_value(0)); + store->set_friendly_name(root->get_friendly_name()); + ngraph::copy_runtime_info(root, store); + ngraph::replace_node(root, store); + return true; + }); +} diff --git a/inference-engine/src/snippets/src/register_info.cpp b/inference-engine/src/snippets/src/register_info.cpp new file mode 100644 index 00000000000..1f9abbd7267 --- /dev/null +++ b/inference-engine/src/snippets/src/register_info.cpp @@ -0,0 +1,9 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/register_info.hpp" + +template class ngraph::VariantImpl>; + +constexpr ngraph::VariantTypeInfo ngraph::VariantWrapper>::type_info; diff --git a/inference-engine/src/snippets/src/remarks.hpp b/inference-engine/src/snippets/src/remarks.hpp new file mode 100644 index 00000000000..0b7d5cebce7 --- /dev/null +++ b/inference-engine/src/snippets/src/remarks.hpp @@ -0,0 +1,20 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +class logstreambuf: public std::streambuf { +public: + static const int threshold {5}; +}; + +template +static inline auto remark(T x) -> std::ostream& { + static logstreambuf nostreambuf; + static std::ostream nocout(&nostreambuf); + + return ((x >= logstreambuf::threshold)? std::cout << "Remark: " : nocout); +} diff --git a/inference-engine/tests/functional/inference_engine/CMakeLists.txt b/inference-engine/tests/functional/inference_engine/CMakeLists.txt index a9fe8c9b05a..492fad7ab5a 100644 --- a/inference-engine/tests/functional/inference_engine/CMakeLists.txt +++ b/inference-engine/tests/functional/inference_engine/CMakeLists.txt @@ -16,6 +16,7 @@ set(LINK_LIBRARIES openvino::itt openvino::conditional_compilation sharedTestClasses + inference_engine_snippets ) set(DEPENDENCIES diff --git a/inference-engine/tests/functional/inference_engine/snippets/broadcast_fusion.cpp b/inference-engine/tests/functional/inference_engine/snippets/broadcast_fusion.cpp new file mode 100644 index 00000000000..c89894afbf9 --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/snippets/broadcast_fusion.cpp @@ -0,0 +1,135 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include + +#include +#include + +#include + +#include "common_test_utils/ngraph_test_utils.hpp" + +using namespace testing; +using namespace ngraph; + +TEST(TransformationTests, FuseLoadWithBroadcastMoveByX) { + std::shared_ptr f(nullptr), f_ref(nullptr); + { + auto data0 = std::make_shared(element::f32, Shape{2, 1}); + auto data1 = std::make_shared(element::f32, Shape{2, 2}); + auto load0 = std::make_shared(data0); + auto load1 = std::make_shared(data1); + auto bct = std::make_shared(load0, load1->get_shape()); + auto add = std::make_shared(bct, load1); + auto store = std::make_shared(add); + f = std::make_shared(NodeVector{store}, ParameterVector{data0, data1}); + + pass::Manager m; + m.register_pass(); + m.register_pass(); + m.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + } + + { + auto data0 = std::make_shared(element::f32, Shape{2, 1}); + auto data1 = std::make_shared(element::f32, Shape{2, 2}); + auto load0 = std::make_shared(data0, data1->get_shape()); + auto load1 = std::make_shared(data1); + auto add = std::make_shared(load0, load1); + auto store = std::make_shared(add); + f_ref = std::make_shared(NodeVector{store}, ParameterVector{data0, data1}); + } + + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; +} + +TEST(TransformationTests, NotFuseLoadWithBroadcastMoveByY) { + std::shared_ptr f(nullptr), f_ref(nullptr); + { + auto data0 = std::make_shared(element::f32, Shape{1, 2}); + auto data1 = std::make_shared(element::f32, Shape{2, 2}); + auto load0 = std::make_shared(data0); + auto load1 = std::make_shared(data1); + auto bct = std::make_shared(load0, load1->get_shape()); + auto add = std::make_shared(bct, load1); + auto store = std::make_shared(add); + f = std::make_shared(NodeVector{store}, ParameterVector{data0, data1}); + + pass::Manager m; + m.register_pass(); + m.register_pass(); + m.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + } + + { + auto data0 = std::make_shared(element::f32, Shape{1, 2}); + auto data1 = std::make_shared(element::f32, Shape{2, 2}); + auto load0 = std::make_shared(data0); + auto load1 = std::make_shared(data1); + auto bct = std::make_shared(load0, load1->get_shape()); + auto add = std::make_shared(bct, load1); + auto store = std::make_shared(add); + f_ref = std::make_shared(NodeVector{store}, ParameterVector{data0, data1}); + } + + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; +} + +TEST(TransformationTests, NoFuseLoadWithBroadcastMoveMultipleUsers) { + std::shared_ptr f(nullptr), f_ref(nullptr); + { + auto data0 = std::make_shared(element::f32, Shape{2, 2}); + auto data1 = std::make_shared(element::f32, Shape{2, 1}); + auto data2 = std::make_shared(element::f32, Shape{2, 1}); + + auto load0 = std::make_shared(data0); + auto load1 = std::make_shared(data1); + auto load2 = std::make_shared(data2); + + auto bct1 = std::make_shared(load1, load0->get_shape()); + + auto add = std::make_shared(load0, bct1); + auto mul = std::make_shared(load1, load2); + + auto store0 = std::make_shared(add); + auto store1 = std::make_shared(mul); + f = std::make_shared(NodeVector{store0, store1}, ParameterVector{data0, data1, data2}); + + pass::Manager m; + m.register_pass(); + m.register_pass(); + m.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + } + + { + auto data0 = std::make_shared(element::f32, Shape{2, 2}); + auto data1 = std::make_shared(element::f32, Shape{2, 1}); + auto data2 = std::make_shared(element::f32, Shape{2, 1}); + + auto load0 = std::make_shared(data0); + auto load1 = std::make_shared(data1); + auto load2 = std::make_shared(data2); + + auto bct1 = std::make_shared(load1, load0->get_shape()); + + auto add = std::make_shared(load0, bct1); + auto mul = std::make_shared(load1, load2); + + auto store0 = std::make_shared(add); + auto store1 = std::make_shared(mul); + f_ref = std::make_shared(NodeVector{store0, store1}, ParameterVector{data0, data1, data2}); + } + + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; +} diff --git a/inference-engine/tests/functional/inference_engine/snippets/memory_ops.cpp b/inference-engine/tests/functional/inference_engine/snippets/memory_ops.cpp new file mode 100644 index 00000000000..3dbed7705b6 --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/snippets/memory_ops.cpp @@ -0,0 +1,94 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include + +#include +#include + +#include + +#include "common_test_utils/ngraph_test_utils.hpp" + +using namespace testing; +using namespace ngraph; + +TEST(TransformationTests, InsertLoadStore) { + std::shared_ptr f(nullptr), f_ref(nullptr); + { + auto data = std::make_shared(element::f32, Shape{2, 2}); + auto neg = std::make_shared(data); + f = std::make_shared(NodeVector{neg}, ParameterVector{data}); + + pass::Manager m; + m.register_pass(); + m.register_pass(); + m.register_pass(); + m.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + } + { + auto data = std::make_shared(element::f32, Shape{2, 2}); + auto load = std::make_shared(data); + auto neg = std::make_shared(load); + auto store = std::make_shared(neg); + f_ref = std::make_shared(NodeVector{store}, ParameterVector{data}); + } + + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; +} + +TEST(TransformationTests, InsertLoadTwise) { + std::shared_ptr f(nullptr), f_ref(nullptr); + { + auto data = std::make_shared(element::f32, Shape{2, 2}); + auto neg = std::make_shared(data); + f = std::make_shared(NodeVector{neg}, ParameterVector{data}); + + pass::Manager m; + m.register_pass(); + m.register_pass(); + m.register_pass(); + m.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + } + { + auto data = std::make_shared(element::f32, Shape{2, 2}); + auto load = std::make_shared(data); + auto neg = std::make_shared(load); + f_ref = std::make_shared(NodeVector{neg}, ParameterVector{data}); + } + + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; +} + +TEST(TransformationTests, InsertStoreTwise) { + std::shared_ptr f(nullptr), f_ref(nullptr); + { + auto data = std::make_shared(element::f32, Shape{2, 2}); + auto neg = std::make_shared(data); + f = std::make_shared(NodeVector{neg}, ParameterVector{data}); + + pass::Manager m; + m.register_pass(); + m.register_pass(); + m.register_pass(); + m.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + } + { + auto data = std::make_shared(element::f32, Shape{2, 2}); + auto neg = std::make_shared(data); + auto store = std::make_shared(neg); + f_ref = std::make_shared(NodeVector{store}, ParameterVector{data}); + } + + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; +} \ No newline at end of file diff --git a/inference-engine/tests/functional/inference_engine/snippets/movebroadcast.cpp b/inference-engine/tests/functional/inference_engine/snippets/movebroadcast.cpp new file mode 100644 index 00000000000..0c7b6ad2dc7 --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/snippets/movebroadcast.cpp @@ -0,0 +1,44 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include + +#include +#include + +#include + +#include "common_test_utils/ngraph_test_utils.hpp" + +using namespace testing; +using namespace ngraph; + +TEST(TransformationTests, InsertBroadcastMove) { + std::shared_ptr f(nullptr), f_ref(nullptr); + { + auto data0 = std::make_shared(element::f32, Shape{2, 3}); + auto data1 = std::make_shared(element::f32, Shape{1, 3}); + auto add = std::make_shared(data0, data1); + f = std::make_shared(NodeVector{add}, ParameterVector{data0, data1}); + + pass::Manager m; + m.register_pass(); + m.register_pass(); + m.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + } + { + auto data0 = std::make_shared(element::f32, Shape{2, 3}); + auto data1 = std::make_shared(element::f32, Shape{1, 3}); + auto move = std::make_shared(data1, data0->output(0).get_shape()); + auto add = std::make_shared(data0, move); + f_ref = std::make_shared(NodeVector{add}, ParameterVector{data0, data1}); + } + + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; +} diff --git a/inference-engine/tests/functional/inference_engine/snippets/registers.cpp b/inference-engine/tests/functional/inference_engine/snippets/registers.cpp new file mode 100644 index 00000000000..b4e64e0f83f --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/snippets/registers.cpp @@ -0,0 +1,137 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include + +#include +#include +#include + +#include + +#include "common_test_utils/ngraph_test_utils.hpp" + +using namespace testing; +using namespace ngraph; + +TEST(TransformationTests, AssignRegisters) { + std::shared_ptr f(nullptr); + { + auto p0 = std::make_shared(element::f32, Shape(1)); + auto p1 = std::make_shared(element::f32, Shape(1)); + auto y00 = std::make_shared(p0); y00->set_friendly_name("y00"); + auto y01 = std::make_shared(p1); y01->set_friendly_name("y01"); + auto y02 = std::make_shared(y00, y01); y02->set_friendly_name("y02"); + auto y03 = std::make_shared(y02); y03->set_friendly_name("y03"); + + f = std::make_shared(NodeVector{y03}, ParameterVector{p0, p1}); + + pass::Manager m; + m.register_pass(); + m.register_pass(); + m.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + } + + // instead of comparing to a reference function check that registers are correctly assigned + // and stored to runtime info + { + std::map ref_registers { + {"y00", 0}, + {"y01", 1}, + {"y02", 2} + }; + + auto total_ops = 0; + for (auto& op : f->get_ordered_ops()) { + auto& rt = op->get_rt_info(); + + if (auto rinfo = rt["reginfo"]) { + auto reginfo = as_type_ptr>>(rinfo)->get(); + auto reg = reginfo[0]; + ASSERT_TRUE(ref_registers[op->get_friendly_name()] == reg); + total_ops++; + } + } + ASSERT_EQ(total_ops, ref_registers.size()); + } +} + +TEST(TransformationTests, AssignRegisters2) { + std::shared_ptr f(nullptr); + { + auto p0 = std::make_shared(ngraph::element::f32, Shape()); + auto p1 = std::make_shared(ngraph::element::f32, Shape()); + auto p2 = std::make_shared(ngraph::element::f32, Shape()); + auto p3 = std::make_shared(ngraph::element::f32, Shape()); + auto p4 = std::make_shared(ngraph::element::f32, Shape()); + auto p5 = std::make_shared(ngraph::element::f32, Shape()); + auto p6 = std::make_shared(ngraph::element::f32, Shape()); + auto p7 = std::make_shared(ngraph::element::f32, Shape()); + + auto c0 = std::make_shared(ngraph::element::f32, Shape(), 3.14f); c0->set_friendly_name("r00"); + auto c1 = std::make_shared(ngraph::element::f32, Shape(), 6.6260701e-34f); c1->set_friendly_name("r01"); + + auto y00 = std::make_shared(p0); y00->set_friendly_name("r02"); + auto y01 = std::make_shared(p1); y01->set_friendly_name("r03"); + auto y02 = std::make_shared(y00, c0); y02->set_friendly_name("r04"); + auto y03 = std::make_shared(y01, c1); y03->set_friendly_name("r05"); + auto y04 = std::make_shared(p2); y04->set_friendly_name("r06"); + auto y05 = std::make_shared(p3); y05->set_friendly_name("r07"); + auto y06 = std::make_shared(y02, y03); y06->set_friendly_name("r08"); + auto y07 = std::make_shared(y04, c0); y07->set_friendly_name("r09"); + auto y08 = std::make_shared(y05, c1); y08->set_friendly_name("r10"); + auto y09 = std::make_shared(p4); y09->set_friendly_name("r11"); + auto y10 = std::make_shared(p5); y10->set_friendly_name("r12"); + auto y11 = std::make_shared(y07, y08); y11->set_friendly_name("r13"); + auto y12 = std::make_shared(y09, c0); y12->set_friendly_name("r14"); + auto y13 = std::make_shared(y10, c1); y13->set_friendly_name("r15"); + auto y14 = std::make_shared(p6); y14->set_friendly_name("r16"); + auto y15 = std::make_shared(y12, y13); y15->set_friendly_name("r17"); + auto y16 = std::make_shared(p7); y16->set_friendly_name("r18"); + auto y17 = std::make_shared(y14, c0); y17->set_friendly_name("r19"); + auto y18 = std::make_shared(y16, c1); y18->set_friendly_name("r20"); + auto y19 = std::make_shared(y06, y11); y19->set_friendly_name("r21"); + auto y20 = std::make_shared(y17, y18); y20->set_friendly_name("r22"); + auto y21 = std::make_shared(y15, y19); y21->set_friendly_name("r23"); + auto y22 = std::make_shared(y20, y21); y22->set_friendly_name("r24"); + auto y23 = std::make_shared(y22); + + f = std::make_shared(NodeVector{y23}, ParameterVector{p0, p1, p2, p3, p4, p5, p6, p7}); + + pass::Manager m; + m.register_pass(); + m.register_pass(); + m.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + } + + // instead of comparing to a reference function check that registers are correctly assigned + // and stored to runtime info + { + std::map ref_registers { + {"r00", 1}, {"r01", 3}, {"r02", 5}, {"r03", 5}, {"r04", 2}, {"r05", 6}, {"r06", 6}, {"r07", 6}, + {"r08", 5}, {"r09", 2}, {"r10", 1}, {"r11", 4}, {"r12", 4}, {"r13", 6}, {"r14", 2}, {"r15", 5}, + {"r16", 0}, {"r17", 4}, {"r18", 0}, {"r19", 2}, {"r20", 4}, {"r21", 1}, {"r22", 0}, {"r23", 6}, + {"r24", 1} + }; + + auto total_ops = 0; + for (auto& op : f->get_ordered_ops()) { + auto& rt = op->get_rt_info(); + + if (auto rinfo = rt["reginfo"]) { + auto reginfo = as_type_ptr>>(rinfo)->get(); + auto reg = reginfo[0]; + ASSERT_TRUE(ref_registers[op->get_friendly_name()] == reg); + total_ops++; + } + } + ASSERT_EQ(total_ops, ref_registers.size()); + } +} diff --git a/inference-engine/tests/functional/inference_engine/snippets/tokenization.cpp b/inference-engine/tests/functional/inference_engine/snippets/tokenization.cpp new file mode 100644 index 00000000000..379343aee59 --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/snippets/tokenization.cpp @@ -0,0 +1,154 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include + +#include +#include +#include + +#include + +#include "common_test_utils/ngraph_test_utils.hpp" + +using namespace testing; +using namespace ngraph; + +TEST(TransformationTests, StartSubgraphMultipleOutputs) { + std::shared_ptr f(nullptr), f_ref(nullptr); + { + auto data0 = std::make_shared(element::f32, Shape{2, 3}); + auto data1 = std::make_shared(element::f32, Shape{1, 3}); + auto add = std::make_shared(data0, data1); + auto sub = std::make_shared(add, data1); + auto mul = std::make_shared(add, sub); + f = std::make_shared(NodeVector{mul}, ParameterVector{data0, data1}); + + pass::Manager m; + m.register_pass(); + m.register_pass(); + m.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + } + + { + auto data0 = std::make_shared(element::f32, Shape{2, 3}); + auto data1 = std::make_shared(element::f32, Shape{1, 3}); + auto indata0 = std::make_shared(element::f32, Shape{2, 3}); + auto indata1 = std::make_shared(element::f32, Shape{1, 3}); + auto add = std::make_shared(NodeVector{data0, data1}, + std::make_shared(NodeVector{std::make_shared(indata0, indata1)}, ParameterVector{indata0, indata1})); + auto sub = std::make_shared(add, data1); + auto mul = std::make_shared(add, sub); + f_ref = std::make_shared(NodeVector{mul}, ParameterVector{data0, data1}); + } + + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; +} + +TEST(TransformationTests, DontStartSubgraphSingleOuptut) { + std::shared_ptr f(nullptr), f_ref(nullptr); + { + auto data0 = std::make_shared(element::f32, Shape{2, 3}); + auto data1 = std::make_shared(element::f32, Shape{1, 3}); + auto add = std::make_shared(data0, data1); + auto sub = std::make_shared(add, data1); + auto mul = std::make_shared(data0, sub); + f = std::make_shared(NodeVector{mul}, ParameterVector{data0, data1}); + + pass::Manager m; + m.register_pass(); + m.register_pass(); + m.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + } + + { + auto data0 = std::make_shared(element::f32, Shape{2, 3}); + auto data1 = std::make_shared(element::f32, Shape{1, 3}); + auto add = std::make_shared(data0, data1); + auto sub = std::make_shared(add, data1); + auto mul = std::make_shared(data0, sub); + f_ref = std::make_shared(NodeVector{mul}, ParameterVector{data0, data1}); + } + + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; +} + +TEST(TransformationTests, AttachToSubgraph) { + std::shared_ptr f(nullptr), f_ref(nullptr); + { + auto data0 = std::make_shared(element::f32, Shape{2, 3}); + auto data1 = std::make_shared(element::f32, Shape{1, 3}); + auto indata0 = std::make_shared(element::f32, Shape{2, 3}); + auto indata1 = std::make_shared(element::f32, Shape{1, 3}); + auto add = std::make_shared(NodeVector{data0, data1}, + std::make_shared(NodeVector{std::make_shared(indata0, indata1)}, ParameterVector{indata0, indata1})); + auto neg = std::make_shared(add); + auto concat = std::make_shared(NodeVector{add, neg}, 0); + f = std::make_shared(NodeVector{concat}, ParameterVector{data0, data1}); + + pass::Manager m; + m.register_pass(); + m.register_pass(); + m.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + } + + { + auto data0 = std::make_shared(element::f32, Shape{2, 3}); + auto data1 = std::make_shared(element::f32, Shape{1, 3}); + auto indata0 = std::make_shared(element::f32, Shape{2, 3}); + auto indata1 = std::make_shared(element::f32, Shape{1, 3}); + auto inner = std::make_shared(indata0, indata1); + auto add = std::make_shared(NodeVector{data0, data1}, + std::make_shared(NodeVector{std::make_shared(inner), inner}, ParameterVector{indata0, indata1})); + auto concat = std::make_shared(OutputVector{add->output(0), add->output(1)}, 0); + f_ref = std::make_shared(NodeVector{concat}, ParameterVector{data0, data1}); + } + + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; +} + +TEST(TransformationTests, DontAttachToSubgraphIfLoop) { + std::shared_ptr f(nullptr), f_ref(nullptr); + { + auto data0 = std::make_shared(element::f32, Shape{2, 3}); + auto data1 = std::make_shared(element::f32, Shape{1, 3}); + auto indata0 = std::make_shared(element::f32, Shape{2, 3}); + auto indata1 = std::make_shared(element::f32, Shape{1, 3}); + auto add = std::make_shared(NodeVector{data0, data1}, + std::make_shared(NodeVector{std::make_shared(indata0, indata1)}, ParameterVector{indata0, indata1})); + auto log = std::make_shared(add); + auto mul = std::make_shared(add, log); + f = std::make_shared(NodeVector{mul}, ParameterVector{data0, data1}); + + pass::Manager m; + m.register_pass(); + m.register_pass(); + m.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + } + + { + auto data0 = std::make_shared(element::f32, Shape{2, 3}); + auto data1 = std::make_shared(element::f32, Shape{1, 3}); + auto indata0 = std::make_shared(element::f32, Shape{2, 3}); + auto indata1 = std::make_shared(element::f32, Shape{1, 3}); + auto add = std::make_shared(NodeVector{data0, data1}, + std::make_shared(NodeVector{std::make_shared(indata0, indata1)}, ParameterVector{indata0, indata1})); + auto log = std::make_shared(add); + auto mul = std::make_shared(add, log); + f_ref = std::make_shared(NodeVector{mul}, ParameterVector{data0, data1}); + } + + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; +} \ No newline at end of file diff --git a/inference-engine/tests/functional/inference_engine/snippets/vector_scalar.cpp b/inference-engine/tests/functional/inference_engine/snippets/vector_scalar.cpp new file mode 100644 index 00000000000..eec873d6a0c --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/snippets/vector_scalar.cpp @@ -0,0 +1,72 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include + +#include +#include + +#include + +#include "common_test_utils/ngraph_test_utils.hpp" + +using namespace testing; +using namespace ngraph; + +TEST(TransformationTests, ReplaceLoadsWithScalarLoads) { + std::shared_ptr f(nullptr), f_ref(nullptr); + { + auto data = std::make_shared(element::f32, Shape{2, 2}); + auto load = std::make_shared(data); + auto neg = std::make_shared(load); + auto store = std::make_shared(neg); + f = std::make_shared(NodeVector{store}, ParameterVector{data}); + + pass::Manager m; + m.register_pass(); + m.register_pass(); + m.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + } + { + auto data = std::make_shared(element::f32, Shape{2, 2}); + auto load = std::make_shared(data); + auto neg = std::make_shared(load); + auto store = std::make_shared(neg); + f_ref = std::make_shared(NodeVector{store}, ParameterVector{data}); + } + + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; +} + +TEST(TransformationTests, ReplaceStoresWithScalarStores) { + std::shared_ptr f(nullptr), f_ref(nullptr); + { + auto data = std::make_shared(element::f32, Shape{2, 2}); + auto load = std::make_shared(data); + auto neg = std::make_shared(load); + auto store = std::make_shared(neg); + f = std::make_shared(NodeVector{store}, ParameterVector{data}); + + pass::Manager m; + m.register_pass(); + m.register_pass(); + m.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + } + { + auto data = std::make_shared(element::f32, Shape{2, 2}); + auto load = std::make_shared(data); + auto neg = std::make_shared(load); + auto store = std::make_shared(neg); + f_ref = std::make_shared(NodeVector{store}, ParameterVector{data}); + } + + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; +} \ No newline at end of file diff --git a/inference-engine/tests_deprecated/unit/CMakeLists.txt b/inference-engine/tests_deprecated/unit/CMakeLists.txt index 7949b0c7578..c749998f30b 100644 --- a/inference-engine/tests_deprecated/unit/CMakeLists.txt +++ b/inference-engine/tests_deprecated/unit/CMakeLists.txt @@ -137,7 +137,9 @@ target_link_libraries(${TARGET_NAME} PRIVATE # dynamic libraries inference_engine_transformations - inference_engine_lp_transformations) + inference_engine_lp_transformations + inference_engine_snippets + ) if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fuse-ld=gold") diff --git a/ngraph/core/src/op/non_zero.cpp b/ngraph/core/src/op/non_zero.cpp index 3c7c6cf2bd3..45b2432b623 100644 --- a/ngraph/core/src/op/non_zero.cpp +++ b/ngraph/core/src/op/non_zero.cpp @@ -158,7 +158,7 @@ namespace nonzero return rc; } - +#undef TYPE_OUT_CASE bool evaluate_nonzero(const HostTensorPtr& input, const HostTensorPtr& output) { bool rc = true; diff --git a/ngraph/core/src/op/one_hot.cpp b/ngraph/core/src/op/one_hot.cpp index c7e1db9c754..56c4f27cf85 100644 --- a/ngraph/core/src/op/one_hot.cpp +++ b/ngraph/core/src/op/one_hot.cpp @@ -178,7 +178,7 @@ namespace detail return rc; } - +#undef TYPE_OUT_CASE bool evaluate_onehot(const HostTensorVector& output_values, const HostTensorVector& input_values, const int64_t axis) diff --git a/ngraph/core/src/op/util/op_types.cpp b/ngraph/core/src/op/util/op_types.cpp index 966da266f81..eabdef5221c 100644 --- a/ngraph/core/src/op/util/op_types.cpp +++ b/ngraph/core/src/op/util/op_types.cpp @@ -27,6 +27,7 @@ #include "ngraph/op/parameter.hpp" #include "ngraph/op/result.hpp" #include "ngraph/op/select.hpp" +#include "ngraph/op/squared_difference.hpp" #include "ngraph/op/util/binary_elementwise_arithmetic.hpp" #include "ngraph/op/util/binary_elementwise_comparison.hpp" #include "ngraph/op/util/binary_elementwise_logical.hpp" @@ -60,6 +61,7 @@ bool ngraph::op::is_binary_elementwise_logical(const ngraph::Node* node) bool ngraph::op::supports_auto_broadcast(const ngraph::Node* node) { return dynamic_cast(node) != nullptr || + dynamic_cast(node) != nullptr || dynamic_cast(node) != nullptr || dynamic_cast(node) != nullptr || dynamic_cast(node) != nullptr;